python/invirt/config.py

   1 import json
   2 import os
   3 import re
   4
   5 import yaml
   6 import invirt.common
   7
   8 try:    loader = yaml.CSafeLoader
   9 except: loader = yaml.SafeLoader
  10
  11 src_path    = '/etc/invirt/master.yaml'
  12 src_dirpath = '/etc/invirt/conf.d'
  13 cache_path  = '/var/lib/invirt/cache.json'
  14 lock_path   = '/var/lib/invirt/cache.lock'
  15
  16 def augment(d1, d2):
  17     """
  18     Splice dict-tree d2 into d1.  Return d1.
  19
  20     d2 may be None for an empty dict-tree, because yaml.load produces that.
  21
  22     Example:
  23     >>> d = {'a': {'b': 1}, 'c': 2}
  24     >>> augment(d, {'a': {'d': 3}})
  25     {'a': {'b', 1, 'd': 3}, 'c': 2}
  26     >>> d
  27     {'a': {'b', 1, 'd': 3}, 'c': 2}
  28     """
  29
  30     if d2 is None:
  31         return d1
  32     for k in d2:
  33         if k in d1 and isinstance(d1[k], dict):
  34             augment(d1[k], d2[k])
  35         else:
  36             d1[k] = d2[k]
  37     return d1
  38
  39 def run_parts_list(dirname):
  40     """
  41     Reimplements Debian's run-parts --list.
  42
  43     One difference from run-parts's behavior: run-parts --list /foo/
  44     will give output like /foo//bar, but run_parts_list('/foo/') gives
  45     /foo/bar in deference to Python conventions.
  46
  47     Matches documented behavior of run-parts in debianutils v2.28.2, dated 2007.
  48     """
  49
  50     # From run-parts(8).
  51     lanana_re   = re.compile('^[a-z0-9]+$')
  52     lsb_re      = re.compile('^_?([a-z0-9_.]+-)+[a-z0-9]+$')
  53     deb_cron_re = re.compile('^[a-z0-9][a-z0-9-]*$')
  54     for name in os.listdir(dirname):
  55         if lanana_re.match(name) or lsb_re.match(name) or deb_cron_re.match(name):
  56             yield os.path.join(dirname, name)
  57
  58 def list_files():
  59     yield src_path
  60     for name in run_parts_list(src_dirpath):
  61         yield name
  62
  63 def load_master():
  64     config = dict()
  65     for filename in list_files():
  66         with open(filename) as f:
  67             augment(config, yaml.load(f, loader))
  68     return config
  69
  70 def get_src_mtime():
  71     return max(max(os.path.getmtime(filename) for filename in list_files()),
  72                os.path.getmtime(src_dirpath))
  73
  74 def load(force_refresh = False):
  75     """
  76     Try loading the configuration from the faster-to-load JSON cache at
  77     cache_path.  If it doesn't exist or is outdated, load the configuration
  78     instead from the original YAML file at src_path and regenerate the cache.
  79     I assume I have the permissions to write to the cache directory.
  80     """
  81
  82     # Namespace container for state variables, so that they can be updated by
  83     # closures.
  84     ns = struct()
  85
  86     if force_refresh:
  87         do_refresh = True
  88     else:
  89         src_mtime = get_src_mtime()
  90         try:            cache_mtime = os.path.getmtime(cache_path)
  91         except OSError: do_refresh  = True
  92         else:           do_refresh  = src_mtime + 1 >= cache_mtime
  93
  94         # We chose not to simply say
  95         #
  96         #   do_refresh = src_mtime >= cache_time
  97         #
  98         # because between the getmtime(src_path) and the time the cache is
  99         # rewritten, the master configuration may have been updated, so future
 100         # checks here would find a cache with a newer mtime than the master
 101         # (and thus treat the cache as containing the latest version of the
 102         # master).  The +1 means that for at least a full second following the
 103         # update to the master, this function will refresh the cache, giving us
 104         # 1 second to write the cache.  Note that if it takes longer than 1
 105         # second to write the cache, then this situation could still arise.
 106         #
 107         # The getmtime calls should logically be part of the same transaction
 108         # as the rest of this function (cache read + conditional cache
 109         # refresh), but to wrap everything in an flock would cause the
 110         # following cache read to be less streamlined.
 111
 112     if not do_refresh:
 113         # Try reading from the cache first.  This must be transactionally
 114         # isolated from concurrent writes to prevent reading an incomplete
 115         # (changing) version of the data (but the transaction can share the
 116         # lock with other concurrent reads).  This isolation is accomplished
 117         # using an atomic filesystem rename in the refreshing stage.
 118         try:
 119             with open(cache_path) as f:
 120                 ns.cfg = json.read(f.read())
 121         except: do_refresh = True
 122
 123     if do_refresh:
 124         # Atomically reload the source and regenerate the cache.  The read and
 125         # write must be a single transaction, or a stale version may be
 126         # written (if another read/write of a more recent configuration
 127         # is interleaved).  The final atomic rename is to keep this
 128         # transactionally isolated from the above cache read.  If we fail to
 129         # acquire the lock, just try to load the master configuration.
 130         try:
 131             with invirt.common.open_locked(lock_path):
 132                 ns.cfg = load_master()
 133                 try:
 134                     with open(cache_path + '.tmp', 'w') as f:
 135                         f.write(json.write(ns.cfg))
 136                 except: pass # silent failure
 137                 else: os.rename(cache_path + '.tmp', cache_path)
 138         except IOError:
 139             ns.cfg = load_master()
 140     return ns.cfg
 141
 142 dicts = load()
 143 structs = dicts2struct(dicts, '')