python/invirt/config.py

   1 from __future__ import with_statement
   2
   3 import json
   4 from invirt.common import *
   5 import os
   6 from os import rename
   7 from os.path import getmtime
   8 from contextlib import closing
   9 import yaml
  10 import re
  11
  12 try:    loader = yaml.CSafeLoader
  13 except: loader = yaml.SafeLoader
  14
  15 src_path    = '/etc/invirt/master.yaml'
  16 src_dirpath = '/etc/invirt/conf.d'
  17 cache_path  = '/var/lib/invirt/cache.json'
  18 lock_path   = '/var/lib/invirt/cache.lock'
  19
  20 def augment(d1, d2):
  21     """Splice dict-tree d2 into d1.  Return d1.
  22
  23     Example:
  24     >>> d = {'a': {'b': 1}, 'c': 2}
  25     >>> augment(d, {'a': {'d': 3}})
  26     {'a': {'b', 1, 'd': 3}, 'c': 2}
  27     >>> d
  28     {'a': {'b', 1, 'd': 3}, 'c': 2}
  29     """
  30     for k in d2:
  31         if k in d1 and isinstance(d1[k], dict):
  32             augment(d1[k], d2[k])
  33         else:
  34             d1[k] = d2[k]
  35     return d1
  36
  37 def run_parts_list(dirname):
  38     """Reimplements Debian's run-parts --list.
  39
  40     One difference from run-parts's behavior: run-parts --list /foo/
  41     will give output like /foo//bar, but run_parts_list('/foo/') gives
  42     /foo/bar in deference to Python conventions.
  43
  44     Matches documented behavior of run-parts in debianutils v2.28.2, dated 2007.
  45     """
  46     # From run-parts(8).
  47     lanana_re   = re.compile('^[a-z0-9]+$')
  48     lsb_re      = re.compile('^_?([a-z0-9_.]+-)+[a-z0-9]+$')
  49     deb_cron_re = re.compile('^[a-z0-9][a-z0-9-]*$')
  50     for name in os.listdir(dirname):
  51         if lanana_re.match(name) or lsb_re.match(name) or deb_cron_re.match(name):
  52             yield os.path.join(dirname, name)
  53
  54 def list_files():
  55     yield src_path
  56     for name in run_parts_list(src_dirpath):
  57         yield name
  58
  59 def load_master():
  60     config = dict()
  61     for filename in list_files():
  62         with closing(file(filename)) as f:
  63             augment(config, yaml.load(f, loader))
  64     return config
  65
  66 def get_src_mtime():
  67     return max(max(getmtime(filename) for filename in list_files()),
  68                getmtime(src_dirpath))
  69
  70 def load(force_refresh = False):
  71     """
  72     Try loading the configuration from the faster-to-load JSON cache at
  73     cache_path.  If it doesn't exist or is outdated, load the configuration
  74     instead from the original YAML file at src_path and regenerate the cache.
  75     I assume I have the permissions to write to the cache directory.
  76     """
  77
  78     # Namespace container for state variables, so that they can be updated by
  79     # closures.
  80     ns = struct()
  81
  82     if force_refresh:
  83         do_refresh = True
  84     else:
  85         src_mtime = get_src_mtime()
  86         try:            cache_mtime = getmtime(cache_path)
  87         except OSError: do_refresh  = True
  88         else:           do_refresh  = src_mtime + 1 >= cache_mtime
  89
  90         # We chose not to simply say
  91         #
  92         #   do_refresh = src_mtime >= cache_time
  93         #
  94         # because between the getmtime(src_path) and the time the cache is
  95         # rewritten, the master configuration may have been updated, so future
  96         # checks here would find a cache with a newer mtime than the master
  97         # (and thus treat the cache as containing the latest version of the
  98         # master).  The +1 means that for at least a full second following the
  99         # update to the master, this function will refresh the cache, giving us
 100         # 1 second to write the cache.  Note that if it takes longer than 1
 101         # second to write the cache, then this situation could still arise.
 102         #
 103         # The getmtime calls should logically be part of the same transaction
 104         # as the rest of this function (cache read + conditional cache
 105         # refresh), but to wrap everything in an flock would cause the
 106         # following cache read to be less streamlined.
 107
 108     if not do_refresh:
 109         # Try reading from the cache first.  This must be transactionally
 110         # isolated from concurrent writes to prevent reading an incomplete
 111         # (changing) version of the data (but the transaction can share the
 112         # lock with other concurrent reads).  This isolation is accomplished
 113         # using an atomic filesystem rename in the refreshing stage.
 114         try:
 115             with closing(file(cache_path)) as f:
 116                 ns.cfg = json.read(f.read())
 117         except: do_refresh = True
 118
 119     if do_refresh:
 120         # Atomically reload the source and regenerate the cache.  The read and
 121         # write must be a single transaction, or a stale version may be
 122         # written (if another read/write of a more recent configuration
 123         # is interleaved).  The final atomic rename is to keep this
 124         # transactionally isolated from the above cache read.  If we fail to
 125         # acquire the lock, just try to load the master configuration.
 126         try:
 127             with lock_file(lock_path):
 128                 ns.cfg = load_master()
 129                 try:
 130                     with closing(file(cache_path + '.tmp', 'w')) as f:
 131                         f.write(json.write(ns.cfg))
 132                 except: pass # silent failure
 133                 else: rename(cache_path + '.tmp', cache_path)
 134         except IOError:
 135             ns.cfg = load_master()
 136     return ns.cfg
 137
 138 dicts = load()
 139 structs = dicts2struct(dicts)
 140
 141 # vim:et:sw=4:ts=4