python/invirt/config.py

   1 import json
   2 import os
   3 import re
   4
   5 import yaml
   6 import invirt.common
   7
   8
   9 SRC_PATH    = '/etc/invirt/master.yaml'
  10 SRC_DIRPATH = '/etc/invirt/conf.d'
  11 CACHE_PATH  = '/var/lib/invirt/cache.json'
  12 LOCK_PATH   = '/var/lib/invirt/cache.lock'
  13
  14 def augment(d1, d2):
  15     """
  16     Splice dict-tree d2 into d1.  Return d1.
  17
  18     d2 may be None for an empty dict-tree, because yaml.load produces that.
  19
  20     Example:
  21     >>> d = {'a': {'b': 1}, 'c': 2}
  22     >>> augment(d, {'a': {'d': 3}})
  23     {'a': {'b', 1, 'd': 3}, 'c': 2}
  24     >>> d
  25     {'a': {'b', 1, 'd': 3}, 'c': 2}
  26     """
  27
  28     if d2 is None:
  29         return d1
  30     for k in d2:
  31         if k in d1 and isinstance(d1[k], dict):
  32             augment(d1[k], d2[k])
  33         else:
  34             d1[k] = d2[k]
  35     return d1
  36
  37 def run_parts_list(dirname):
  38     """
  39     Reimplements Debian's run-parts --list.
  40
  41     One difference from run-parts's behavior: run-parts --list /foo/
  42     will give output like /foo//bar, but run_parts_list('/foo/') gives
  43     /foo/bar in deference to Python conventions.
  44
  45     Matches documented behavior of run-parts in debianutils v2.28.2, dated 2007.
  46     """
  47
  48     # From run-parts(8).
  49     lanana_re   = re.compile('^[a-z0-9]+$')
  50     lsb_re      = re.compile('^_?([a-z0-9_.]+-)+[a-z0-9]+$')
  51     deb_cron_re = re.compile('^[a-z0-9][a-z0-9-]*$')
  52     for name in os.listdir(dirname):
  53         if lanana_re.match(name) or lsb_re.match(name) or deb_cron_re.match(name):
  54             yield os.path.join(dirname, name)
  55
  56 def list_files():
  57     yield SRC_PATH
  58     for name in run_parts_list(SRC_DIRPATH):
  59         yield name
  60
  61 def load_master():
  62     config = dict()
  63     for filename in list_files():
  64         with open(filename) as f:
  65             augment(config, yaml.load(f, yaml.CSafeLoader))
  66     return config
  67
  68 def get_src_mtime():
  69     return max(max(os.path.getmtime(filename) for filename in list_files()),
  70                os.path.getmtime(SRC_DIRPATH))
  71
  72 def load(force_refresh=False):
  73     """
  74     Try loading the configuration from the faster-to-load JSON cache at
  75     CACHE_PATH.  If it doesn't exist or is outdated, load the configuration
  76     instead from the original YAML file at SRC_PATH and regenerate the cache.
  77     I assume I have the permissions to write to the cache directory.
  78     """
  79
  80     # Namespace container for state variables, so that they can be updated by
  81     # closures.
  82     ns = struct()
  83
  84     if force_refresh:
  85         do_refresh = True
  86     else:
  87         src_mtime = get_src_mtime()
  88         try:
  89             cache_mtime = os.path.getmtime(CACHE_PATH)
  90         except OSError:
  91             do_refresh  = True
  92         else:
  93             do_refresh  = src_mtime + 1 >= cache_mtime
  94
  95         # We chose not to simply say
  96         #
  97         #   do_refresh = src_mtime >= cache_time
  98         #
  99         # because between the getmtime(SRC_PATH) and the time the cache is
 100         # rewritten, the master configuration may have been updated, so future
 101         # checks here would find a cache with a newer mtime than the master
 102         # (and thus treat the cache as containing the latest version of the
 103         # master).  The +1 means that for at least a full second following the
 104         # update to the master, this function will refresh the cache, giving us
 105         # 1 second to write the cache.  Note that if it takes longer than 1
 106         # second to write the cache, then this situation could still arise.
 107         #
 108         # The getmtime calls should logically be part of the same transaction
 109         # as the rest of this function (cache read + conditional cache
 110         # refresh), but to wrap everything in an flock would cause the
 111         # following cache read to be less streamlined.
 112
 113     if not do_refresh:
 114         # Try reading from the cache first.  This must be transactionally
 115         # isolated from concurrent writes to prevent reading an incomplete
 116         # (changing) version of the data (but the transaction can share the
 117         # lock with other concurrent reads).  This isolation is accomplished
 118         # using an atomic filesystem rename in the refreshing stage.
 119         try:
 120             with open(CACHE_PATH) as f:
 121                 ns.cfg = json.read(f.read())
 122         except:
 123             do_refresh = True
 124
 125     if do_refresh:
 126         # Atomically reload the source and regenerate the cache.  The read and
 127         # write must be a single transaction, or a stale version may be
 128         # written (if another read/write of a more recent configuration
 129         # is interleaved).  The final atomic rename is to keep this
 130         # transactionally isolated from the above cache read.  If we fail to
 131         # acquire the lock, just try to load the master configuration.
 132         try:
 133             with invirt.common.open_locked(LOCK_PATH):
 134                 ns.cfg = load_master()
 135                 try:
 136                     with open(CACHE_PATH + '.tmp', 'w') as f:
 137                         f.write(json.write(ns.cfg))
 138                 except:
 139                     pass # silent failure
 140                 else:
 141                     os.rename(CACHE_PATH + '.tmp', CACHE_PATH)
 142         except IOError:
 143             ns.cfg = load_master()
 144     return ns.cfg
 145
 146 dicts = load()
 147 structs = dicts2struct(dicts, '')