Partial work on a svn-all-fast-export based Git migrator.
[invirt/scripts/git-migration.git] / git-migrate
1 #!/usr/bin/python
2
3 import glob
4 import os
5 import sys
6 import subprocess
7 import shutil
8
9 def tagBase(pkg):
10     p = subprocess.Popen(['git', 'tag',
11                           '-l',
12                           'base'],
13                          cwd='%s.git' % pkg,
14                          stdout=subprocess.PIPE)
15     p.wait()
16     if p.stdout.read().strip() != '':
17         return
18     
19     p = subprocess.Popen(['git', 'rev-list',
20                           '--reverse',
21                           'master'],
22                          cwd='%s.git' % pkg,
23                          stdout=subprocess.PIPE)
24     p.wait()
25     base = p.stdout.read().split()[0]
26     
27     subprocess.check_call(['git', 'tag',
28                            'base',
29                            base],
30                           cwd='%s.git' % pkg)
31
32 def clonePackage(base, repo_path):
33     pkg = os.path.basename(repo_path)
34     
35     if not os.path.isdir('%s.git' % pkg):
36         if os.path.isdir(pkg):
37             shutil.rmtree(pkg)
38         
39         args = []
40         args.append('-Ttrunk/%s' % repo_path)
41         if repo_path.startswith('packages/'):
42             args.append('-tpackage_tags/%s' % pkg)
43         args.append(base)
44         args.append(pkg)
45         
46         # Use --no-follow-parent because we're going to handle that with
47         # grafts.
48         subprocess.check_call(['git', 'svn', 'clone',
49                                '--no-follow-parent',
50                                '-Aauthors',
51                                '-q',
52                                '--no-metadata'] + args,
53                               stdout=subprocess.PIPE)
54         
55         # Then make the repository bare, because git-svn can't do this
56         shutil.move('%s/.git' % pkg, '%s.git' % pkg)
57         shutil.rmtree(pkg)
58         subprocess.check_call(['git', 'config', 'core.bare', 'true'],
59                               cwd='%s.git' % pkg)
60         
61     # Some of these repos have a rev where everything was deleted
62     # as a result of the move. We don't want that rev to exist.
63     p = subprocess.Popen(['git', 'ls-tree', 'HEAD'],
64                          cwd='%s.git' % pkg,
65                          stdout=subprocess.PIPE)
66     p.wait()
67     if len(p.stdout.read()) == 0:
68         subprocess.check_call(['git', 'reset', '--soft', 'HEAD^'],
69                               cwd='%s.git' % pkg)
70     
71     # Early in the project's history, there were a bunch of double
72     # directory trees - i.e. the source was actually in
73     # trunk/packages/$package/$package. Correct for that
74     cwd = os.getcwd()
75     os.environ['PACKAGE'] = pkg
76     p = subprocess.check_call(['git', 'filter-branch',
77                                '--commit-filter', '%s "$@"' % os.path.join(cwd, 'filter-subdirs'),
78                                '--tag-name-filter', 'cat',
79                                '--',
80                                '--all'],
81                               cwd='%s.git' % pkg)
82     
83     shutil.rmtree('%s.git/refs/original' % pkg, True)
84     
85     tagBase(pkg)
86
87 def cloneAllPackages(base):
88     for pkg in open('package-list'):
89         clonePackage(base, pkg.strip())
90
91 def mergeHistory(old_pkg, new_pkg, n):
92     n = int(n)
93     
94     subprocess.check_call(['git', 'push',
95                            '../%s.git' % new_pkg,
96                            'master:refs/heads/%s' % old_pkg],
97                           cwd='%s.git' % old_pkg)
98     
99     # Find the merge commit
100     if n == 0:
101         p = subprocess.Popen(['git', 'rev-parse',
102                               'base'],
103                              cwd='%s.git' % new_pkg,
104                              stdout=subprocess.PIPE)
105     else:
106         p = subprocess.Popen(['git', 'rev-list',
107                               '--reverse',
108                               '--boundary',
109                               '--skip=%s' % (n - 1),
110                               'base..master'],
111                              cwd='%s.git' % new_pkg,
112                              stdout=subprocess.PIPE)
113     p.wait()
114     new_rev = p.stdout.read().split()[0].strip('-')
115     
116     # Find any other parents of the merge commit
117     p = subprocess.Popen(['git', 'log',
118                           '-1',
119                           '--pretty=format:%P',
120                           new_rev],
121                          cwd='%s.git' % new_pkg,
122                          stdout=subprocess.PIPE)
123     p.wait()
124     parents = p.stdout.read().split()
125     
126     # Find the additional parent we're adding
127     p = subprocess.Popen(['git', 'rev-parse',
128                           old_pkg],
129                          cwd='%s.git' % new_pkg,
130                          stdout=subprocess.PIPE)
131     p.wait()
132     parents.append(p.stdout.read().strip())
133     
134     # Write out the grafts file
135     f = open('%s.git/info/grafts' % new_pkg, 'a')
136     print >>f, '%s %s' % (new_rev, ' '.join(parents))
137     f.close()
138     
139     # Run filter-branch
140     subprocess.call(['git', 'filter-branch',
141                      '--tag-name-filter', 'cat',
142                      '--',
143                      '--all'],
144                     cwd='%s.git' % new_pkg)
145     
146     subprocess.call(['git', 'branch',
147                      '-D',
148                      old_pkg],
149                     cwd='%s.git' % new_pkg)
150     shutil.rmtree('%s.git/refs/original' % new_pkg, True)
151
152 def mergeHistories():
153     merges = []
154     for line in open('merges'):
155         line = line.strip()
156         if line == '' or line[0] == '#':
157             continue
158         
159         merges.append(line.split())
160     
161     for merge in merges:
162         mergeHistory(*merge)
163     
164     for merge in merges:
165         shutil.rmtree('%s.git' % merge[0])
166
167 def cleanupRepos():
168     for pkg in glob.glob('*.git'):
169         subprocess.check_call(['git', 'tag', '-d', 'base'],
170                               cwd='%s' % pkg)
171         
172         subprocess.check_call(['git', 'gc'],
173                               cwd='%s' % pkg)
174
175 if __name__ == '__main__':
176     try:
177         base = sys.argv[1]
178     except:
179         base = 'svn://invirt.mit.edu'
180     
181     cloneAllPackages(base)
182     mergeHistories()
183     cleanupRepos()