#!/usr/bin/python import glob import os import sys import subprocess import shutil def tagBase(pkg): p = subprocess.Popen(['git', 'tag', '-l', 'base'], cwd='%s.git' % pkg, stdout=subprocess.PIPE) p.wait() if p.stdout.read().strip() != '': return p = subprocess.Popen(['git', 'rev-list', '--reverse', 'master'], cwd='%s.git' % pkg, stdout=subprocess.PIPE) p.wait() base = p.stdout.read().split()[0] subprocess.check_call(['git', 'tag', 'base', base], cwd='%s.git' % pkg) def clonePackage(base, repo_path): pkg = os.path.basename(repo_path) if not os.path.isdir('%s.git' % pkg): if os.path.isdir(pkg): shutil.rmtree(pkg) args = [] args.append('-Ttrunk/%s' % repo_path) if repo_path.startswith('packages/'): args.append('-tpackage_tags/%s' % pkg) args.append(base) args.append(pkg) # Use --no-follow-parent because we're going to handle that with # grafts. subprocess.check_call(['git', 'svn', 'clone', '--no-follow-parent', '-Aauthors', '-q', '--no-metadata'] + args, stdout=subprocess.PIPE) # Then make the repository bare, because git-svn can't do this shutil.move('%s/.git' % pkg, '%s.git' % pkg) shutil.rmtree(pkg) subprocess.check_call(['git', 'config', 'core.bare', 'true'], cwd='%s.git' % pkg) # Some of these repos have a rev where everything was deleted # as a result of the move. We don't want that rev to exist. p = subprocess.Popen(['git', 'ls-tree', 'HEAD'], cwd='%s.git' % pkg, stdout=subprocess.PIPE) p.wait() if len(p.stdout.read()) == 0: subprocess.check_call(['git', 'reset', '--soft', 'HEAD^'], cwd='%s.git' % pkg) # Early in the project's history, there were a bunch of double # directory trees - i.e. the source was actually in # trunk/packages/$package/$package. Correct for that cwd = os.getcwd() os.environ['PACKAGE'] = pkg p = subprocess.check_call(['git', 'filter-branch', '--commit-filter', '%s "$@"' % os.path.join(cwd, 'filter-subdirs'), '--tag-name-filter', 'cat', '--', '--all'], cwd='%s.git' % pkg) shutil.rmtree('%s.git/refs/original' % pkg, True) tagBase(pkg) def cloneAllPackages(base): for pkg in open('package-list'): clonePackage(base, pkg.strip()) def mergeHistory(old_pkg, new_pkg, n): n = int(n) subprocess.check_call(['git', 'push', '../%s.git' % new_pkg, 'master:refs/heads/%s' % old_pkg], cwd='%s.git' % old_pkg) # Find the merge commit if n == 0: p = subprocess.Popen(['git', 'rev-parse', 'base'], cwd='%s.git' % new_pkg, stdout=subprocess.PIPE) else: p = subprocess.Popen(['git', 'rev-list', '--reverse', '--boundary', '--skip=%s' % (n - 1), 'base..master'], cwd='%s.git' % new_pkg, stdout=subprocess.PIPE) p.wait() new_rev = p.stdout.read().split()[0].strip('-') # Find any other parents of the merge commit p = subprocess.Popen(['git', 'log', '-1', '--pretty=format:%P', new_rev], cwd='%s.git' % new_pkg, stdout=subprocess.PIPE) p.wait() parents = p.stdout.read().split() # Find the additional parent we're adding p = subprocess.Popen(['git', 'rev-parse', old_pkg], cwd='%s.git' % new_pkg, stdout=subprocess.PIPE) p.wait() parents.append(p.stdout.read().strip()) # Write out the grafts file f = open('%s.git/info/grafts' % new_pkg, 'a') print >>f, '%s %s' % (new_rev, ' '.join(parents)) f.close() # Run filter-branch subprocess.call(['git', 'filter-branch', '--tag-name-filter', 'cat', '--', '--all'], cwd='%s.git' % new_pkg) subprocess.call(['git', 'branch', '-D', old_pkg], cwd='%s.git' % new_pkg) shutil.rmtree('%s.git/refs/original' % new_pkg, True) def mergeHistories(): merges = [] for line in open('merges'): line = line.strip() if line == '' or line[0] == '#': continue merges.append(line.split()) for merge in merges: mergeHistory(*merge) for merge in merges: shutil.rmtree('%s.git' % merge[0]) def cleanupRepos(): for pkg in glob.glob('*.git'): subprocess.check_call(['git', 'tag', '-d', 'base'], cwd='%s' % pkg) subprocess.check_call(['git', 'gc'], cwd='%s' % pkg) if __name__ == '__main__': try: base = sys.argv[1] except: base = 'svn://invirt.mit.edu' cloneAllPackages(base) mergeHistories() cleanupRepos()