First crack at the invirtibuilder.
authorEvan Broder <broder@mit.edu>
Fri, 13 Nov 2009 03:29:16 +0000 (22:29 -0500)
committerEvan Broder <broder@mit.edu>
Fri, 13 Nov 2009 03:29:16 +0000 (22:29 -0500)
No promises of functionality yet.

svn path=/trunk/scripts/git-hooks/; revision=2538

builder/invirtibuilder [new file with mode: 0755]

diff --git a/builder/invirtibuilder b/builder/invirtibuilder
new file mode 100755 (executable)
index 0000000..716d7d7
--- /dev/null
@@ -0,0 +1,512 @@
+#!/usr/bin/python
+
+"""Process the Invirt build queue.
+
+The Invirtibuilder handles package builds and uploads. On demand, it
+attempts to build a particular package.
+
+If the build succeeds, the new version of the package is uploaded to
+the apt repository, tagged in its git repository, and the Invirt
+superrepo is updated to point at the new version.
+
+If the build fails, the Invirtibuilder sends mail with the build log.
+
+The build queue is tracked via files in /var/lib/invirt-dev/queue. In
+order to maintain ordering, all filenames in that directory are the
+timestamp of their creation time.
+
+Each queue file contains a file of the form
+
+    pocket package hash principal
+
+where pocket is one of the pockets globally configured in
+git.pockets. For instance, the pockets in XVM are "prod" and "dev".
+
+principal is the Kerberos principal that requested the build.
+"""
+
+
+import contextlib
+import os
+import re
+import shutil
+import subprocess
+
+import pyinotify
+
+from invirt.config import structs as config
+from invirt import database
+
+
+_QUEUE_DIR = '/var/lib/invirt-dev/queue'
+_REPO_DIR = '/srv/git'
+_LOG_DIR = '/var/log/invirt/builds'
+_HOOKS_DIR = '/usr/share/invirt-dev/build.d'
+
+
+DISTRIBUTION = 'hardy'
+
+
+class InvalidBuild(ValueError):
+    pass
+
+
+def captureOutput(popen_args, stdin_str=None, *args, **kwargs):
+    """Capture stdout from a command.
+
+    This method will proxy the arguments to subprocess.Popen. It
+    returns the output from the command if the call succeeded and
+    raises an exception if the process returns a non-0 value.
+
+    This is intended to be a variant on the subprocess.check_call
+    function that also allows you access to the output from the
+    command.
+    """
+    if 'stdin' not in kwargs:
+        kwargs['stdin'] = subprocess.PIPE
+    if 'stdout' not in kwargs:
+        kwargs['stdout'] = subprocess.PIPE
+    if 'stderr' not in kwargs:
+        kwargs['stderr'] = subprocess.STDOUT
+    p = subprocess.Popen(popen_args, *args, **kwargs)
+    out, _ = p.communicate(stdin_str)
+    if p.returncode:
+        raise subprocess.CalledProcessError(p.returncode, popen_args, out)
+    return out
+
+
+def getRepo(package):
+    """Return the path to the git repo for a given package."""
+    return os.path.join(_REPO_DIR, 'packages', '%s.git' % package)
+
+
+def pocketToGit(pocket):
+    """Map a pocket in the configuration to a git branch."""
+    return config.git.pockets[pocket].get('git', pocket)
+
+
+def pocketToApt(pocket):
+    """Map a pocket in the configuration to an apt repo pocket."""
+    return config.git.pockets[pocket].get('apt', pocket)
+
+
+def getGitFile(package, ref, path):
+    """Return the contents of a path from a git ref in a package."""
+    return captureOutput(['git', 'cat-file', 'blob', '%s:%s' % (ref, path)],
+                         cwd=getRepo(package))
+
+
+def getChangelog(package, ref):
+    """Get a changelog object for a given ref in a given package.
+
+    This returns a debian_bundle.changelog.Changelog object for a
+    given ref of a given package.
+    """
+    return changelog.Changelog(getGitFile(package, ref, 'debian/changelog'))
+
+
+def getVersion(package, ref):
+    """Get the version of a given package at a particular ref."""
+    return getChangelog(package, ref).get_version()
+
+
+def getControl(package, ref):
+    """Get the parsed debian/control file for a given package.
+
+    This returns a list of debian_bundle.deb822.Deb822 objects, one
+    for each section of the debian/control file. Each Deb822 object
+    acts roughly like a dict.
+    """
+    return deb822.Deb822.iter_paragraphs(
+        getGitFile(package, ref, 'debian/control').split('\n'))
+
+
+def getBinaries(package, ref):
+    """Get a list of binary packages in a package at a given ref."""
+    return [p['Package'] for p in getControl(package, ref)
+            if 'Package' in p]
+
+
+def getArches(package, ref):
+    """Get the set of all architectures in any binary package."""
+    arches = set()
+    for section in getControl(package, ref):
+        if 'Architecture' in section:
+            arches.update(section['Architecture'].split())
+    return arches
+
+
+def getDscName(package, ref):
+    """Return the .dsc file that will be generated for this package."""
+    v = getVersion(package, ref)
+    return '%s_%s-%s.dsc' % (
+        package,
+        version.upstream_version,
+        version.debian_version)
+
+
+def validateBuild(pocket, package, commit):
+    """Given the parameters of a new build, validate that build.
+
+    The checks this function performs vary based on whether or not the
+    pocket is configured with allow_backtracking.
+
+    A build of a pocket without allow_backtracking set must be a
+    fast-forward of the previous revision, and the most recent version
+    in the changelog most be strictly greater than the version
+    currently in the repository.
+
+    In all cases, this revision of the package can only have the same
+    version number as any other revision currently in the apt
+    repository if they have the same commit ID.
+
+    If it's unspecified, it is assumed that pocket do not
+    allow_backtracking.
+
+    If this build request fails validation, this function will raise a
+    InvalidBuild exception, with information about why the validation
+    failed.
+
+    If this build request can be satisfied by copying the package from
+    another pocket, then this function returns that pocket. Otherwise,
+    it returns True.
+    """
+    package_repo = getRepo(package)
+    new_version = getVersion(package, commit)
+
+    for p in config.git.pockets:
+        if p == pocket:
+            continue
+
+        b = pocketToGit(p)
+        current_commit = captureOutput(['git', 'rev-parse', b],
+                                       cwd=package_repo)
+        current_version = getVersion(package, b)
+
+        if current_version == new_version:
+            if current_commit == commit:
+                return p
+            else:
+                raise InvalidBuild('Version %s of %s already available in '
+                                   'pocket %s from commit %s' %
+                                   (new_version, package, p, current_commit))
+
+    if config.git.pockets[pocket].get('allow_backtracking', False):
+        branch = pocketToGit(pocket)
+        current_version = getVersion(package, branch)
+        if new_version <= current_version:
+            raise InvalidBuild('New version %s of %s is not newer than '
+                               'version %s currently in pocket %s' %
+                               (new_version, package, current_version, pocket))
+
+        # Almost by definition, A is a fast-forward of B if B..A is
+        # empty
+        if not captureOutput(['git', 'rev-list', '%s..%s' % (commit, branch)]):
+            raise InvalidBuild('New commit %s of %s is not a fast-forward of'
+                               'commit currently in pocket %s' %
+                               (commit, package, pocket))
+
+
+def sanitizeVersion(version):
+    """Sanitize a Debian package version for use as a git tag.
+
+    This function strips the epoch from the version number and
+    replaces any tildes with periods."""
+    v = '%s-%s' % (version.upstream_version,
+                   version.debian_version)
+    return v.replace('~', '.')
+
+
+def aptCopy(packages, dst_pocket, src_pocket):
+    """Copy a package from one pocket to another."""
+    binaries = []
+    for line in getGitFile(package, commit, 'debian/control').split('\n'):
+        m = re.match('Package: (.*)$')
+        if m:
+            binaries.append(m.group(1))
+
+    cpatureOutput(['reprepro-env', 'copy',
+                   pocketToApt(dst_pocket),
+                   pocketToApt(src_pocket),
+                   package] + binaries)
+
+
+def sbuild(package, ref, arch, workdir, arch_all=False):
+    """Build a package for a particular architecture."""
+    args = ['sbuild', '-d', DISTRIBUTION, '--arch', arch]
+    if arch_all:
+        args.append('-A')
+    args.append(getDscName(package, ref))
+    captureOutput(args, cwd=workdir, stdout=None)
+
+
+def sbuildAll(package, ref, workdir):
+    """Build a package for all architectures it supports."""
+    arches = getArches(package, ref)
+    if 'all' in arches or 'any' in arches or 'amd64' in arches:
+        sbuild(package, ref, 'amd64', workdir, arch_all=True)
+    if 'any' in arches or 'i386' in arches:
+        sbuild(package, ref, 'i386', workdir)
+
+
+def tagSubmodule(pocket, package, ref, principal):
+    """Tag a new version of a submodule.
+
+    If this pocket does not allow_backtracking, then this will create
+    a new tag of the version at ref.
+
+    This function doesn't need to care about lock
+    contention. git-receive-pack updates one ref at a time, and only
+    takes out a lock for that ref after it's passed the update
+    hook. Because we reject pushes to tags in the update hook, no push
+    can ever take out a lock on any tags.
+
+    I'm sure that long description gives you great confidence in teh
+    legitimacy of my reasoning.
+    """
+    if config.git.pockets[pocket].get('allow_backtracking', False):
+        env = dict(os.environ)
+        branch = pocketToGit(pocket)
+        version = getVersion(package, ref)
+
+        env['GIT_COMMITTER_NAME'] = config.git.tagger.name
+        env['GIT_COMMITTER_EMAIL'] = config.git.tagger.email
+        tag_msg = ('Tag %s of %s\n\n'
+                   'Requested by %s' % (version.full_version,
+                                        package,
+                                        principal))
+
+        captureOutput(
+            ['git', 'tag', '-m', tag_msg, commit],
+            stdout=None,
+            env=env)
+
+
+def updateSubmoduleBranch(pocket, package, ref):
+    """Update the appropriately named branch in the submodule."""
+    branch = pocketToGit(pocket)
+    captureOutput(
+        ['git', 'update-ref', 'refs/heads/%s' % branch, ref])
+
+
+def uploadBuild(pocket, workdir):
+    """Upload all build products in the work directory."""
+    apt = pocketToApt(pocket)
+    for changes in glob.glob(os.path.join(workdir, '*.changes')):
+        captureOutput(['reprepro-env',
+                       'include',
+                       '--ignore=wrongdistribution',
+                       apt,
+                       changes])
+
+
+def updateSuperrepo(pocket, package, commit, principal):
+    """Update the superrepo.
+
+    This will create a new commit on the branch for the given pocket
+    that sets the commit for the package submodule to commit.
+
+    Note that there's no locking issue here, because we disallow all
+    pushes to the superrepo.
+    """
+    superrepo = os.path.join(_REPO_DIR, 'packages.git')
+    branch = pocketToGit(pocket)
+    tree = captureOutput(['git', 'ls-tree', branch],
+                         cwd=superrepo)
+
+    new_tree = re.compile(
+        r'^(160000 commit )[0-9a-f]*(\t%s)$' % package, re.M).sub(
+        r'\1%s\2' % commit,
+        tree)
+
+    new_tree_id = captureOutput(['git', 'mktree'],
+                                cwd=superrepo,
+                                stdin_str=new_tree)
+
+    commit_msg = ('Update %s to version %s\n\n'
+                  'Requested by %s' % (package,
+                                       version.full_version,
+                                       principal))
+    new_commit = captureOutput(
+        ['git', 'commit-tree', new_tree_hash, '-p', branch],
+        cwd=superrepo,
+        env=env,
+        stdin_str=commit_msg)
+
+    captureOutput(
+        ['git', 'update-ref', 'refs/heads/%s' % branch, new_commit],
+        cwd=superrepo)
+
+
+@contextlib.contextmanager
+def packageWorkdir(package):
+    """Checkout the package in a temporary working directory.
+
+    This context manager returns that working directory. The requested
+    package is checked out into a subdirectory of the working
+    directory with the same name as the package.
+
+    When the context wrapped with this context manager is exited, the
+    working directory is automatically deleted.
+    """
+    workdir = tempfile.mkdtemp()
+    try:
+        p_archive = subprocess.Popen(
+            ['git', 'archive',
+             '--remote=file://%s' % getRepo(package),
+             '--prefix=%s' % package,
+             commit,
+             ],
+            stdout=subprocess.PIPE,
+            )
+        p_tar = subprocess.Popen(
+            ['tar', '-x'],
+            stdin=p_archive.stdout,
+            cwd=workdir,
+            )
+        p_archive.wait()
+        p_tar.wait()
+
+        yield workdir
+    finally:
+        shutil.rmtree(workdir)
+
+
+def reportBuild(build):
+    """Run hooks to report the results of a build attempt."""
+
+    captureOutput(['run-parts',
+                   '--arg=%s' % build.build_id,
+                   '--',
+                   _HOOKS_DIR])
+
+
+def build():
+    """Deal with items in the build queue.
+
+    When triggered, iterate over build queue items one at a time,
+    until there are no more pending build jobs.
+    """
+    while True:
+        stage = 'processing incoming job'
+        queue = os.listdir(_QUEUE_DIR)
+        if not queue:
+            break
+
+        build = min(queue)
+        job = open(os.path.join(_QUEUE_DIR, build)).read().strip()
+        pocket, package, commit, principal = job.split()
+
+        database.session.begin()
+        db = database.Build()
+        db.package = package
+        db.pocket = pocket
+        db.commit = commit
+        db.principal = principal
+        database.session.save_or_update(db)
+        database.commit()
+
+        database.begin()
+
+        try:
+            db.failed_stage = 'validating job'
+            src = validateBuild(pocket, package, commit)
+
+            db.version = str(getVersion(package, commit))
+
+            # If validateBuild returns something other than True, then
+            # it means we should copy from that pocket to our pocket.
+            #
+            # (If the validation failed, validateBuild would have
+            # raised an exception)
+            if src != True:
+                db.failed_stage = 'copying package from another pocket'
+                aptCopy(packages, pocket, src)
+            # If we can't copy the package from somewhere, but
+            # validateBuild didn't raise an exception, then we need to
+            # do the build ourselves
+            else:
+                db.failed_stage = 'checking out package source'
+                with packageWorkdir(package) as workdir:
+                    db.failed_stage = 'preparing source package'
+                    packagedir = os.path.join(workdir, package)
+
+                    # We should be more clever about dealing with
+                    # things like non-Debian-native packages than we
+                    # are.
+                    #
+                    # If we were, we could use debuild and get nice
+                    # environment scrubbing. Since we're not, debuild
+                    # complains about not having an orig.tar.gz
+                    captureOutput(['dpkg-buildpackage', '-us', '-uc', '-S'],
+                                  cwd=packagedir,
+                                  stdout=None)
+
+                    try:
+                        db.failed_stage = 'building binary packages'
+                        sbuildAll(package, commit, workdir)
+                    finally:
+                        logdir = os.path.join(_LOG_DIR, db.build_id)
+                        if not os.path.exists(logdir):
+                            os.makedirs(logdir)
+
+                        for log in glob.glob(os.path.join(workdir, '*.build')):
+                            os.copy2(log, logdir)
+                    db.failed_stage = 'tagging submodule'
+                    tagSubmodule(pocket, package, commit, principal)
+                    db.failed_stage = 'updating submodule branches'
+                    updateSubmoduleBranch(pocket, package, commit)
+                    db.failed_stage = 'updating superrepo'
+                    updateSuperrepo(pocket, package, commit, principal)
+                    db.failed_stage = 'uploading packages to apt repo'
+                    uploadBuild(pocket, workdir)
+
+                    db.failed_stage = 'cleaning up'
+
+                # Finally, now that everything is done, remove the
+                # build queue item
+                os.unlink(os.path.join(_QUEUE_DIR, build))
+        except:
+            db.traceback = traceback.format_exc()
+        else:
+            db.succeeded = True
+            db.failed_stage = None
+        finally:
+            database.session.save_or_update(db)
+            database.session.commit()
+
+            reportBuild(db)
+
+
+class Invirtibuilder(pyinotify.ProcessEvent):
+    """Process inotify triggers to build new packages."""
+    def process_IN_CREATE(self, event):
+        """Handle a created file or directory.
+
+        When an IN_CREATE event comes in, trigger the builder.
+        """
+        build()
+
+
+def main():
+    """Initialize the inotifications and start the main loop."""
+    database.connect()
+
+    watch_manager = pyinotify.WatchManager()
+    invirtibuilder = Invirtibuilder()
+    notifier = pyinotify.Notifier(watch_manager, invirtibuilder)
+    watch_manager.add_watch(_QUEUE_DIR,
+                            pyinotify.EventsCodes.ALL_FLAGS['IN_CREATE'])
+
+    # Before inotifying, run any pending builds; otherwise we won't
+    # get notified for them.
+    build()
+
+    while True:
+        notifier.process_events()
+        if notifier.check_events():
+            notifier.read_events()
+
+
+if __name__ == '__main__':
+    main()