Remove the refs/original from the first round of filter-branch.
[invirt/scripts/git-migration.git] / git-migrate
1 #!/usr/bin/python
2
3 import glob
4 import os
5 import sys
6 import subprocess
7 import shutil
8
9 def tagBase(pkg):
10     p = subprocess.Popen(['git', 'tag',
11                           '-l',
12                           'base'],
13                          cwd='%s.git' % pkg,
14                          stdout=subprocess.PIPE)
15     p.wait()
16     if p.stdout.read().strip() != '':
17         return
18     
19     p = subprocess.Popen(['git', 'rev-list',
20                           '--reverse',
21                           'master'],
22                          cwd='%s.git' % pkg,
23                          stdout=subprocess.PIPE)
24     p.wait()
25     base = p.stdout.read().split()[0]
26     
27     subprocess.check_call(['git', 'tag',
28                            'base',
29                            base],
30                           cwd='%s.git' % pkg)
31
32 def clonePackage(base, pkg):
33     path = '%s/%s' % (base, pkg)
34     pkg = os.path.basename(pkg)
35     
36     if not os.path.isdir('%s.git' % pkg):
37         if os.path.isdir(pkg):
38             shutil.rmtree(pkg)
39         # Use --no-follow-parent because we're going to handle that with
40         # grafts.
41         subprocess.check_call(['git', 'svn', 'clone',
42                                '--no-follow-parent',
43                                '-Aauthors',
44                                '-q',
45                                '--no-metadata',
46                                '%s' % path],
47                               stdout=subprocess.PIPE)
48         
49         # Then make the repository bare, because git-svn can't do this
50         shutil.move('%s/.git' % pkg, '%s.git' % pkg)
51         shutil.rmtree(pkg)
52         subprocess.check_call(['git', 'config', 'core.bare', 'true'],
53                               cwd='%s.git' % pkg)
54         
55     # Some of these repos have a rev where everything was deleted
56     # as a result of the move. We don't want that rev to exist.
57     p = subprocess.Popen(['git', 'ls-tree', 'HEAD'],
58                          cwd='%s.git' % pkg,
59                          stdout=subprocess.PIPE)
60     p.wait()
61     if len(p.stdout.read()) == 0:
62         subprocess.check_call(['git', 'reset', '--soft', 'HEAD^'],
63                               cwd='%s.git' % pkg)
64     
65     # Early in the project's history, there were a bunch of double
66     # directory trees - i.e. the source was actually in
67     # trunk/packages/$package/$package. Correct for that
68     cwd = os.getcwd()
69     os.environ['PACKAGE'] = pkg
70     p = subprocess.check_call(['git', 'filter-branch',
71                                '--commit-filter', '%s "$@"' % os.path.join(cwd, 'filter-subdirs'),
72                                '--tag-name-filter', 'cat',
73                                '--',
74                                '--all'],
75                               cwd='%s.git' % pkg)
76     
77     shutil.rmtree('%s.git/refs/original' % pkg, True)
78     
79     tagBase(pkg)
80
81 def cloneAllPackages(base):
82     for pkg in open('package-list'):
83         clonePackage(base, pkg.strip())
84
85 def mergeHistory(old_pkg, new_pkg, n):
86     n = int(n)
87     
88     subprocess.check_call(['git', 'push',
89                            '../%s.git' % new_pkg,
90                            'master:refs/heads/%s' % old_pkg],
91                           cwd='%s.git' % old_pkg)
92     
93     # Find the merge commit
94     if n == 0:
95         p = subprocess.Popen(['git', 'rev-parse',
96                               'base'],
97                              cwd='%s.git' % new_pkg,
98                              stdout=subprocess.PIPE)
99     else:
100         p = subprocess.Popen(['git', 'rev-list',
101                               '--reverse',
102                               '--boundary',
103                               '--skip=%s' % (n - 1),
104                               'base..master'],
105                              cwd='%s.git' % new_pkg,
106                              stdout=subprocess.PIPE)
107     p.wait()
108     new_rev = p.stdout.read().split()[0].strip('-')
109     
110     # Find any other parents of the merge commit
111     p = subprocess.Popen(['git', 'log',
112                           '-1',
113                           '--pretty=format:%P',
114                           new_rev],
115                          cwd='%s.git' % new_pkg,
116                          stdout=subprocess.PIPE)
117     p.wait()
118     parents = p.stdout.read().split()
119     
120     # Find the additional parent we're adding
121     p = subprocess.Popen(['git', 'rev-parse',
122                           old_pkg],
123                          cwd='%s.git' % new_pkg,
124                          stdout=subprocess.PIPE)
125     p.wait()
126     parents.append(p.stdout.read().strip())
127     
128     # Write out the grafts file
129     f = open('%s.git/info/grafts' % new_pkg, 'a')
130     print >>f, '%s %s' % (new_rev, ' '.join(parents))
131     f.close()
132     
133     # Run filter-branch
134     subprocess.call(['git', 'filter-branch',
135                      '--tag-name-filter', 'cat',
136                      '--',
137                      '--all'],
138                     cwd='%s.git' % new_pkg)
139     
140     subprocess.call(['git', 'branch',
141                      '-D',
142                      old_pkg],
143                     cwd='%s.git' % new_pkg)
144     shutil.rmtree('%s.git/refs/original' % new_pkg, True)
145
146 def mergeHistories():
147     merges = []
148     for line in open('merges'):
149         line = line.strip()
150         if line == '' or line[0] == '#':
151             continue
152         
153         merges.append(line.split())
154     
155     for merge in merges:
156         mergeHistory(*merge)
157     
158     for merge in merges:
159         shutil.rmtree('%s.git' % merge[0])
160
161 def cleanupRepos():
162     for pkg in glob.glob('*.git'):
163         subprocess.check_call(['git', 'tag', '-d', 'base'],
164                               cwd='%s' % pkg)
165         
166         subprocess.check_call(['git', 'gc'],
167                               cwd='%s' % pkg)
168
169 if __name__ == '__main__':
170     try:
171         base = sys.argv[1]
172     except:
173         base = 'svn://invirt.mit.edu/trunk'
174     
175     cloneAllPackages(base)
176     mergeHistories()
177     cleanupRepos()