+from __future__ import with_statement
+
import json
from invirt.common import *
+from os import rename
from os.path import getmtime
+from contextlib import closing
default_src_path = '/etc/invirt/master.yaml'
default_cache_path = '/var/lib/invirt/cache.json'
-lock_file = '/var/lib/invirt/cache.lock'
+lock_path = '/var/lib/invirt/cache.lock'
def load(src_path = default_src_path,
cache_path = default_cache_path,
instead from the original YAML file at src_path and regenerate the cache.
I assume I have the permissions to write to the cache directory.
"""
- # Namespace container for various state variables, so that they can be
- # updated by closures.
+
+ # Namespace container for state variables, so that they can be updated by
+ # closures.
ns = struct()
if force_refresh:
- ns.do_refresh = True
+ do_refresh = True
else:
src_mtime = getmtime(src_path)
- try: cache_mtime = getmtime(cache_path)
- except OSError: ns.do_refresh = True
- else: ns.do_refresh = src_mtime > cache_mtime
+ try: cache_mtime = getmtime(cache_path)
+ except OSError: do_refresh = True
+ else: do_refresh = src_mtime + 1 >= cache_mtime
- if not ns.do_refresh:
+ # We chose not to simply say
+ #
+ # do_refresh = src_mtime >= cache_time
+ #
+ # because between the getmtime(src_path) and the time the cache is
+ # rewritten, the master configuration may have been updated, so future
+ # checks here would find a cache with a newer mtime than the master
+ # (and thus treat the cache as containing the latest version of the
+ # master). The +1 means that for at least a full second following the
+ # update to the master, this function will refresh the cache, giving us
+ # 1 second to write the cache. Note that if it takes longer than 1
+ # second to write the cache, then this situation could still arise.
+ #
+ # The getmtime calls should logically be part of the same transaction
+ # as the rest of this function (cache read + conditional cache
+ # refresh), but to wrap everything in an flock would cause the
+ # following cache read to be less streamlined.
+
+ if not do_refresh:
# Try reading from the cache first. This must be transactionally
# isolated from concurrent writes to prevent reading an incomplete
# (changing) version of the data (but the transaction can share the
- # lock with other concurrent reads).
- @with_lock_file(lock_file, False)
- def read_cache():
- try: ns.cfg = with_closing(file(cache_path)) (
- lambda f: json.read(f.read()))
- except: ns.do_refresh = True
-
- if ns.do_refresh:
+ # lock with other concurrent reads). This isolation is accomplished
+ # using an atomic filesystem rename in the refreshing stage.
+ try:
+ with closing(file(cache_path)) as f:
+ ns.cfg = json.read(f.read())
+ except: do_refresh = True
+
+ if do_refresh:
# Atomically reload the source and regenerate the cache. The read and
# write must be a single transaction, or a stale version may be
- # written.
- @with_lock_file(lock_file)
- def refresh_cache():
- import yaml
- ns.cfg = with_closing(file(src_path)) (
- lambda f: yaml.load(f, yaml.CSafeLoader))
- try: with_closing(file(cache_path, 'w')) (
- lambda f: f.write(json.write(ns.cfg)))
- except: pass # silent failure
+ # written (if another read/write of a more recent configuration
+ # is interleaved). The final atomic rename is to keep this
+ # transactionally isolated from the above cache read. If we fail to
+ # acquire the lock, just try to load the master configuration.
+ import yaml
+ try: loader = yaml.CSafeLoader
+ except: loader = yaml.SafeLoader
+ try:
+ with lock_file(lock_path):
+ with closing(file(src_path)) as f:
+ ns.cfg = yaml.load(f, loader)
+ try:
+ with closing(file(cache_path + '.tmp', 'w')) as f:
+ f.write(json.write(ns.cfg))
+ except: pass # silent failure
+ else: rename(cache_path + '.tmp', cache_path)
+ except IOError:
+ with closing(file(src_path)) as f:
+ ns.cfg = yaml.load(f, loader)
return ns.cfg
dicts = load()