Use templates to compactify the list of quashed revisions.
[invirt/scripts/git-migration.git] / git-migrate
index edd916c..52204b2 100755 (executable)
@@ -1,22 +1,55 @@
 #!/usr/bin/python
 
+import glob
 import os
 import sys
 import subprocess
 import shutil
 
-def clonePackage(base, pkg):
+def tagBase(pkg):
+    p = subprocess.Popen(['git', 'tag',
+                          '-l',
+                          'base'],
+                         cwd='%s.git' % pkg,
+                         stdout=subprocess.PIPE)
+    p.wait()
+    if p.stdout.read().strip() != '':
+        return
+    
+    p = subprocess.Popen(['git', 'rev-list',
+                          '--reverse',
+                          'master'],
+                         cwd='%s.git' % pkg,
+                         stdout=subprocess.PIPE)
+    p.wait()
+    base = p.stdout.read().split()[0]
+    
+    subprocess.check_call(['git', 'tag',
+                           'base',
+                           base],
+                          cwd='%s.git' % pkg)
+
+def clonePackage(base, repo_path):
+    pkg = os.path.basename(repo_path)
+    
     if not os.path.isdir('%s.git' % pkg):
         if os.path.isdir(pkg):
             shutil.rmtree(pkg)
+        
+        args = []
+        args.append('-Ttrunk/%s' % repo_path)
+        if repo_path.startswith('packages/'):
+            args.append('-tpackage_tags/%s' % pkg)
+        args.append(base)
+        args.append(pkg)
+        
         # Use --no-follow-parent because we're going to handle that with
         # grafts.
         subprocess.check_call(['git', 'svn', 'clone',
                                '--no-follow-parent',
                                '-Aauthors',
                                '-q',
-                               '--no-metadata',
-                               '%s/packages/%s' % (base, pkg)],
+                               '--no-metadata'] + args,
                               stdout=subprocess.PIPE)
         
         # Then make the repository bare, because git-svn can't do this
@@ -27,35 +60,124 @@ def clonePackage(base, pkg):
         
     # Some of these repos have a rev where everything was deleted
     # as a result of the move. We don't want that rev to exist.
-    p = subprocess.Popen(['git', 'ls-files'],
+    p = subprocess.Popen(['git', 'ls-tree', 'HEAD'],
                          cwd='%s.git' % pkg,
                          stdout=subprocess.PIPE)
     p.wait()
     if len(p.stdout.read()) == 0:
-        subprocess.check_call(['git', 'reset', 'HEAD^'],
+        subprocess.check_call(['git', 'reset', '--soft', 'HEAD^'],
                               cwd='%s.git' % pkg)
+    
+    # Early in the project's history, there were a bunch of double
+    # directory trees - i.e. the source was actually in
+    # trunk/packages/$package/$package. Correct for that
+    cwd = os.getcwd()
+    os.environ['PACKAGE'] = pkg
+    p = subprocess.check_call(['git', 'filter-branch',
+                               '--commit-filter', '%s "$@"' % os.path.join(cwd, 'filter-subdirs'),
+                               '--tag-name-filter', 'cat',
+                               '--',
+                               '--all'],
+                              cwd='%s.git' % pkg)
+    
+    shutil.rmtree('%s.git/refs/original' % pkg, True)
+    
+    tagBase(pkg)
 
 def cloneAllPackages(base):
     for pkg in open('package-list'):
         clonePackage(base, pkg.strip())
 
 def mergeHistory(old_pkg, new_pkg, n):
-    pass
+    n = int(n)
+    
+    subprocess.check_call(['git', 'push',
+                           '../%s.git' % new_pkg,
+                           'master:refs/heads/%s' % old_pkg],
+                          cwd='%s.git' % old_pkg)
+    
+    # Find the merge commit
+    if n == 0:
+        p = subprocess.Popen(['git', 'rev-parse',
+                              'base'],
+                             cwd='%s.git' % new_pkg,
+                             stdout=subprocess.PIPE)
+    else:
+        p = subprocess.Popen(['git', 'rev-list',
+                              '--reverse',
+                              '--boundary',
+                              '--skip=%s' % (n - 1),
+                              'base..master'],
+                             cwd='%s.git' % new_pkg,
+                             stdout=subprocess.PIPE)
+    p.wait()
+    new_rev = p.stdout.read().split()[0].strip('-')
+    
+    # Find any other parents of the merge commit
+    p = subprocess.Popen(['git', 'log',
+                          '-1',
+                          '--pretty=format:%P',
+                          new_rev],
+                         cwd='%s.git' % new_pkg,
+                         stdout=subprocess.PIPE)
+    p.wait()
+    parents = p.stdout.read().split()
+    
+    # Find the additional parent we're adding
+    p = subprocess.Popen(['git', 'rev-parse',
+                          old_pkg],
+                         cwd='%s.git' % new_pkg,
+                         stdout=subprocess.PIPE)
+    p.wait()
+    parents.append(p.stdout.read().strip())
+    
+    # Write out the grafts file
+    f = open('%s.git/info/grafts' % new_pkg, 'a')
+    print >>f, '%s %s' % (new_rev, ' '.join(parents))
+    f.close()
+    
+    # Run filter-branch
+    subprocess.call(['git', 'filter-branch',
+                     '--tag-name-filter', 'cat',
+                     '--',
+                     '--all'],
+                    cwd='%s.git' % new_pkg)
+    
+    subprocess.call(['git', 'branch',
+                     '-D',
+                     old_pkg],
+                    cwd='%s.git' % new_pkg)
+    shutil.rmtree('%s.git/refs/original' % new_pkg, True)
 
 def mergeHistories():
-    for line in open('grafts'):
+    merges = []
+    for line in open('merges'):
         line = line.strip()
-        if line[0] == '#' or line == '':
+        if line == '' or line[0] == '#':
             continue
         
-        old_pkg, new_pkg, n = line.split()
-        mergeHistory(old_pkg, new_pkg, int(n))
+        merges.append(line.split())
+    
+    for merge in merges:
+        mergeHistory(*merge)
+    
+    for merge in merges:
+        shutil.rmtree('%s.git' % merge[0])
+
+def cleanupRepos():
+    for pkg in glob.glob('*.git'):
+        subprocess.check_call(['git', 'tag', '-d', 'base'],
+                              cwd='%s' % pkg)
+        
+        subprocess.check_call(['git', 'gc'],
+                              cwd='%s' % pkg)
 
 if __name__ == '__main__':
     try:
         base = sys.argv[1]
     except:
-        base = 'svn://invirt.mit.edu/trunk'
+        base = 'svn://invirt.mit.edu'
     
     cloneAllPackages(base)
     mergeHistories()
+    cleanupRepos()