Use filter-branch to correct the trunk/packages/$package/$package
[invirt/scripts/git-migration.git] / git-migrate
1 #!/usr/bin/python
2
3 import glob
4 import os
5 import sys
6 import subprocess
7 import shutil
8
9 def tagBase(pkg):
10     p = subprocess.Popen(['git', 'tag',
11                           '-l',
12                           'base'],
13                          cwd='%s.git' % pkg,
14                          stdout=subprocess.PIPE)
15     p.wait()
16     if p.stdout.read().strip() != '':
17         return
18     
19     p = subprocess.Popen(['git', 'rev-list',
20                           '--reverse',
21                           'master'],
22                          cwd='%s.git' % pkg,
23                          stdout=subprocess.PIPE)
24     p.wait()
25     base = p.stdout.read().split()[0]
26     
27     subprocess.check_call(['git', 'tag',
28                            'base',
29                            base],
30                           cwd='%s.git' % pkg)
31
32 def clonePackage(base, pkg):
33     path = '%s/%s' % (base, pkg)
34     pkg = os.path.basename(pkg)
35     
36     if not os.path.isdir('%s.git' % pkg):
37         if os.path.isdir(pkg):
38             shutil.rmtree(pkg)
39         # Use --no-follow-parent because we're going to handle that with
40         # grafts.
41         subprocess.check_call(['git', 'svn', 'clone',
42                                '--no-follow-parent',
43                                '-Aauthors',
44                                '-q',
45                                '--no-metadata',
46                                '%s' % path],
47                               stdout=subprocess.PIPE)
48         
49         # Then make the repository bare, because git-svn can't do this
50         shutil.move('%s/.git' % pkg, '%s.git' % pkg)
51         shutil.rmtree(pkg)
52         subprocess.check_call(['git', 'config', 'core.bare', 'true'],
53                               cwd='%s.git' % pkg)
54         
55     # Some of these repos have a rev where everything was deleted
56     # as a result of the move. We don't want that rev to exist.
57     p = subprocess.Popen(['git', 'ls-tree', 'HEAD'],
58                          cwd='%s.git' % pkg,
59                          stdout=subprocess.PIPE)
60     p.wait()
61     if len(p.stdout.read()) == 0:
62         subprocess.check_call(['git', 'reset', '--soft', 'HEAD^'],
63                               cwd='%s.git' % pkg)
64     
65     # Early in the project's history, there were a bunch of double
66     # directory trees - i.e. the source was actually in
67     # trunk/packages/$package/$package. Correct for that
68     cwd = os.getcwd()
69     os.environ['PACKAGE'] = pkg
70     p = subprocess.check_call(['git', 'filter-branch',
71                                '--commit-filter', '%s "$@"' % os.path.join(cwd, 'filter-subdirs'),
72                                '--tag-name-filter', 'cat',
73                                '--',
74                                '--all'],
75                               cwd='%s.git' % pkg)
76     
77     tagBase(pkg)
78
79 def cloneAllPackages(base):
80     for pkg in open('package-list'):
81         clonePackage(base, pkg.strip())
82
83 def mergeHistory(old_pkg, new_pkg, n):
84     n = int(n)
85     
86     subprocess.check_call(['git', 'push',
87                            '../%s.git' % new_pkg,
88                            'master:refs/heads/%s' % old_pkg],
89                           cwd='%s.git' % old_pkg)
90     
91     # Find the merge commit
92     if n == 0:
93         p = subprocess.Popen(['git', 'rev-parse',
94                               'base'],
95                              cwd='%s.git' % new_pkg,
96                              stdout=subprocess.PIPE)
97     else:
98         p = subprocess.Popen(['git', 'rev-list',
99                               '--reverse',
100                               '--boundary',
101                               '--skip=%s' % (n - 1),
102                               'base..master'],
103                              cwd='%s.git' % new_pkg,
104                              stdout=subprocess.PIPE)
105     p.wait()
106     new_rev = p.stdout.read().split()[0].strip('-')
107     
108     # Find any other parents of the merge commit
109     p = subprocess.Popen(['git', 'log',
110                           '-1',
111                           '--pretty=format:%P',
112                           new_rev],
113                          cwd='%s.git' % new_pkg,
114                          stdout=subprocess.PIPE)
115     p.wait()
116     parents = p.stdout.read().split()
117     
118     # Find the additional parent we're adding
119     p = subprocess.Popen(['git', 'rev-parse',
120                           old_pkg],
121                          cwd='%s.git' % new_pkg,
122                          stdout=subprocess.PIPE)
123     p.wait()
124     parents.append(p.stdout.read().strip())
125     
126     # Write out the grafts file
127     f = open('%s.git/info/grafts' % new_pkg, 'a')
128     print >>f, '%s %s' % (new_rev, ' '.join(parents))
129     f.close()
130     
131     # Run filter-branch
132     subprocess.call(['git', 'filter-branch',
133                      '--tag-name-filter', 'cat',
134                      '--',
135                      '--all'],
136                     cwd='%s.git' % new_pkg)
137     
138     subprocess.call(['git', 'branch',
139                      '-D',
140                      old_pkg],
141                     cwd='%s.git' % new_pkg)
142     shutil.rmtree('%s.git/refs/original' % new_pkg, True)
143
144 def mergeHistories():
145     merges = []
146     for line in open('merges'):
147         line = line.strip()
148         if line == '' or line[0] == '#':
149             continue
150         
151         merges.append(line.split())
152     
153     for merge in merges:
154         mergeHistory(*merge)
155     
156     for merge in merges:
157         shutil.rmtree('%s.git' % merge[0])
158
159 def cleanupRepos():
160     for pkg in glob.glob('*.git'):
161         subprocess.check_call(['git', 'tag', '-d', 'base'],
162                               cwd='%s' % pkg)
163         
164         subprocess.check_call(['git', 'gc'],
165                               cwd='%s' % pkg)
166
167 if __name__ == '__main__':
168     try:
169         base = sys.argv[1]
170     except:
171         base = 'svn://invirt.mit.edu/trunk'
172     
173     cloneAllPackages(base)
174     mergeHistories()
175     cleanupRepos()