1#!/usr/bin/perl
2
3# This tool is copyright (c) 2005, Matthias Urlichs.
4# It is released under the Gnu Public License, version 2.
5#
6# The basic idea is to aggregate CVS check-ins into related changes.
7# Fortunately, "cvsps" does that for us; all we have to do is to parse
8# its output.
9#
10# Checking out the files is done by a single long-running CVS connection
11# / server process.
12#
13# The head revision is on branch "origin" by default.
14# You can change that with the '-o' option.
15
16use 5.008;
17use strict;
18use warnings;
19use Getopt::Long;
20use File::Spec;
21use File::Temp qw(tempfile tmpnam);
22use File::Path qw(mkpath);
23use File::Basename qw(basename dirname);
24use Time::Local;
25use IO::Socket;
26use IO::Pipe;
27use POSIX qw(strftime dup2 ENOENT);
28use IPC::Open2;
29
30$SIG{'PIPE'}="IGNORE";
31$ENV{'TZ'}="UTC";
32
33our ($opt_h,$opt_o,$opt_v,$opt_k,$opt_u,$opt_d,$opt_p,$opt_C,$opt_z,$opt_i,$opt_P, $opt_s,$opt_m,@opt_M,$opt_A,$opt_S,$opt_L, $opt_a, $opt_r, $opt_R);
34my (%conv_author_name, %conv_author_email);
35
36sub usage(;$) {
37 my $msg = shift;
38 print(STDERR "Error: $msg\n") if $msg;
39 print STDERR <<END;
40Usage: git cvsimport # fetch/update GIT from CVS
41 [-o branch-for-HEAD] [-h] [-v] [-d CVSROOT] [-A author-conv-file]
42 [-p opts-for-cvsps] [-P file] [-C GIT_repository] [-z fuzz] [-i] [-k]
43 [-u] [-s subst] [-a] [-m] [-M regex] [-S regex] [-L commitlimit]
44 [-r remote] [-R] [CVS_module]
45END
46 exit(1);
47}
48
49sub read_author_info($) {
50 my ($file) = @_;
51 my $user;
52 open my $f, '<', "$file" or die("Failed to open $file: $!\n");
53
54 while (<$f>) {
55 # Expected format is this:
56 # exon=Andreas Ericsson <ae@op5.se>
57 if (m/^(\S+?)\s*=\s*(.+?)\s*<(.+)>\s*$/) {
58 $user = $1;
59 $conv_author_name{$user} = $2;
60 $conv_author_email{$user} = $3;
61 }
62 # However, we also read from CVSROOT/users format
63 # to ease migration.
64 elsif (/^(\w+):(['"]?)(.+?)\2\s*$/) {
65 my $mapped;
66 ($user, $mapped) = ($1, $3);
67 if ($mapped =~ /^\s*(.*?)\s*<(.*)>\s*$/) {
68 $conv_author_name{$user} = $1;
69 $conv_author_email{$user} = $2;
70 }
71 elsif ($mapped =~ /^<?(.*)>?$/) {
72 $conv_author_name{$user} = $user;
73 $conv_author_email{$user} = $1;
74 }
75 }
76 # NEEDSWORK: Maybe warn on unrecognized lines?
77 }
78 close ($f);
79}
80
81sub write_author_info($) {
82 my ($file) = @_;
83 open my $f, '>', $file or
84 die("Failed to open $file for writing: $!");
85
86 foreach (keys %conv_author_name) {
87 print $f "$_=$conv_author_name{$_} <$conv_author_email{$_}>\n";
88 }
89 close ($f);
90}
91
92# convert getopts specs for use by git config
93my %longmap = (
94 'A:' => 'authors-file',
95 'M:' => 'merge-regex',
96 'P:' => undef,
97 'R' => 'track-revisions',
98 'S:' => 'ignore-paths',
99);
100
101sub read_repo_config {
102 # Split the string between characters, unless there is a ':'
103 # So "abc:de" becomes ["a", "b", "c:", "d", "e"]
104 my @opts = split(/ *(?!:)/, shift);
105 foreach my $o (@opts) {
106 my $key = $o;
107 $key =~ s/://g;
108 my $arg = 'git config';
109 $arg .= ' --bool' if ($o !~ /:$/);
110 my $ckey = $key;
111
112 if (exists $longmap{$o}) {
113 # An uppercase option like -R cannot be
114 # expressed in the configuration, as the
115 # variable names are downcased.
116 $ckey = $longmap{$o};
117 next if (! defined $ckey);
118 $ckey =~ s/-//g;
119 }
120 chomp(my $tmp = `$arg --get cvsimport.$ckey`);
121 if ($tmp && !($arg =~ /--bool/ && $tmp eq 'false')) {
122 no strict 'refs';
123 my $opt_name = "opt_" . $key;
124 if (!$$opt_name) {
125 $$opt_name = $tmp;
126 }
127 }
128 }
129}
130
131my $opts = "haivmkuo:d:p:r:C:z:s:M:P:A:S:L:R";
132read_repo_config($opts);
133Getopt::Long::Configure( 'no_ignore_case', 'bundling' );
134
135# turn the Getopt::Std specification in a Getopt::Long one,
136# with support for multiple -M options
137GetOptions( map { s/:/=s/; /M/ ? "$_\@" : $_ } split( /(?!:)/, $opts ) )
138 or usage();
139usage if $opt_h;
140
141if (@ARGV == 0) {
142 chomp(my $module = `git config --get cvsimport.module`);
143 push(@ARGV, $module) if $? == 0;
144}
145@ARGV <= 1 or usage("You can't specify more than one CVS module");
146
147if ($opt_d) {
148 $ENV{"CVSROOT"} = $opt_d;
149} elsif (-f 'CVS/Root') {
150 open my $f, '<', 'CVS/Root' or die 'Failed to open CVS/Root';
151 $opt_d = <$f>;
152 chomp $opt_d;
153 close $f;
154 $ENV{"CVSROOT"} = $opt_d;
155} elsif ($ENV{"CVSROOT"}) {
156 $opt_d = $ENV{"CVSROOT"};
157} else {
158 usage("CVSROOT needs to be set");
159}
160$opt_s ||= "-";
161$opt_a ||= 0;
162
163my $git_tree = $opt_C;
164$git_tree ||= ".";
165
166my $remote;
167if (defined $opt_r) {
168 $remote = 'refs/remotes/' . $opt_r;
169 $opt_o ||= "master";
170} else {
171 $opt_o ||= "origin";
172 $remote = 'refs/heads';
173}
174
175my $cvs_tree;
176if ($#ARGV == 0) {
177 $cvs_tree = $ARGV[0];
178} elsif (-f 'CVS/Repository') {
179 open my $f, '<', 'CVS/Repository' or
180 die 'Failed to open CVS/Repository';
181 $cvs_tree = <$f>;
182 chomp $cvs_tree;
183 close $f;
184} else {
185 usage("CVS module has to be specified");
186}
187
188our @mergerx = ();
189if ($opt_m) {
190 @mergerx = ( qr/\b(?:from|of|merge|merging|merged) ([-\w]+)/i );
191}
192if (@opt_M) {
193 push (@mergerx, map { qr/$_/ } @opt_M);
194}
195
196# Remember UTC of our starting time
197# we'll want to avoid importing commits
198# that are too recent
199our $starttime = time();
200
201select(STDERR); $|=1; select(STDOUT);
202
203
204package CVSconn;
205# Basic CVS dialog.
206# We're only interested in connecting and downloading, so ...
207
208use File::Spec;
209use File::Temp qw(tempfile);
210use POSIX qw(strftime dup2);
211
212sub new {
213 my ($what,$repo,$subdir) = @_;
214 $what=ref($what) if ref($what);
215
216 my $self = {};
217 $self->{'buffer'} = "";
218 bless($self,$what);
219
220 $repo =~ s#/+$##;
221 $self->{'fullrep'} = $repo;
222 $self->conn();
223
224 $self->{'subdir'} = $subdir;
225 $self->{'lines'} = undef;
226
227 return $self;
228}
229
230sub find_password_entry {
231 my ($cvspass, @cvsroot) = @_;
232 my ($file, $delim) = @$cvspass;
233 my $pass;
234 local ($_);
235
236 if (open(my $fh, $file)) {
237 # :pserver:cvs@mea.tmt.tele.fi:/cvsroot/zmailer Ah<Z
238 CVSPASSFILE:
239 while (<$fh>) {
240 chomp;
241 s/^\/\d+\s+//;
242 my ($w, $p) = split($delim,$_,2);
243 for my $cvsroot (@cvsroot) {
244 if ($w eq $cvsroot) {
245 $pass = $p;
246 last CVSPASSFILE;
247 }
248 }
249 }
250 close($fh);
251 }
252 return $pass;
253}
254
255sub conn {
256 my $self = shift;
257 my $repo = $self->{'fullrep'};
258 if ($repo =~ s/^:pserver(?:([^:]*)):(?:(.*?)(?::(.*?))?@)?([^:\/]*)(?::(\d*))?//) {
259 my ($param,$user,$pass,$serv,$port) = ($1,$2,$3,$4,$5);
260
261 my ($proxyhost,$proxyport);
262 if ($param && ($param =~ m/proxy=([^;]+)/)) {
263 $proxyhost = $1;
264 # Default proxyport, if not specified, is 8080.
265 $proxyport = 8080;
266 if ($ENV{"CVS_PROXY_PORT"}) {
267 $proxyport = $ENV{"CVS_PROXY_PORT"};
268 }
269 if ($param =~ m/proxyport=([^;]+)/) {
270 $proxyport = $1;
271 }
272 }
273 $repo ||= '/';
274
275 # if username is not explicit in CVSROOT, then use current user, as cvs would
276 $user=(getlogin() || $ENV{'LOGNAME'} || $ENV{'USER'} || "anonymous") unless $user;
277 my $rr2 = "-";
278 unless ($port) {
279 $rr2 = ":pserver:$user\@$serv:$repo";
280 $port=2401;
281 }
282 my $rr = ":pserver:$user\@$serv:$port$repo";
283
284 if ($pass) {
285 $pass = $self->_scramble($pass);
286 } else {
287 my @cvspass = ([$ENV{'HOME'}."/.cvspass", qr/\s/],
288 [$ENV{'HOME'}."/.cvs/cvspass", qr/=/]);
289 my @loc = ();
290 foreach my $cvspass (@cvspass) {
291 my $p = find_password_entry($cvspass, $rr, $rr2);
292 if ($p) {
293 push @loc, $cvspass->[0];
294 $pass = $p;
295 }
296 }
297
298 if (1 < @loc) {
299 die("Multiple cvs password files have ".
300 "entries for CVSROOT $opt_d: @loc");
301 } elsif (!$pass) {
302 $pass = "A";
303 }
304 }
305
306 my ($s, $rep);
307 if ($proxyhost) {
308
309 # Use a HTTP Proxy. Only works for HTTP proxies that
310 # don't require user authentication
311 #
312 # See: http://www.ietf.org/rfc/rfc2817.txt
313
314 $s = IO::Socket::INET->new(PeerHost => $proxyhost, PeerPort => $proxyport);
315 die "Socket to $proxyhost: $!\n" unless defined $s;
316 $s->write("CONNECT $serv:$port HTTP/1.1\r\nHost: $serv:$port\r\n\r\n")
317 or die "Write to $proxyhost: $!\n";
318 $s->flush();
319
320 $rep = <$s>;
321
322 # The answer should look like 'HTTP/1.x 2yy ....'
323 if (!($rep =~ m#^HTTP/1\.. 2[0-9][0-9]#)) {
324 die "Proxy connect: $rep\n";
325 }
326 # Skip up to the empty line of the proxy server output
327 # including the response headers.
328 while ($rep = <$s>) {
329 last if (!defined $rep ||
330 $rep eq "\n" ||
331 $rep eq "\r\n");
332 }
333 } else {
334 $s = IO::Socket::INET->new(PeerHost => $serv, PeerPort => $port);
335 die "Socket to $serv: $!\n" unless defined $s;
336 }
337
338 $s->write("BEGIN AUTH REQUEST\n$repo\n$user\n$pass\nEND AUTH REQUEST\n")
339 or die "Write to $serv: $!\n";
340 $s->flush();
341
342 $rep = <$s>;
343
344 if ($rep ne "I LOVE YOU\n") {
345 $rep="<unknown>" unless $rep;
346 die "AuthReply: $rep\n";
347 }
348 $self->{'socketo'} = $s;
349 $self->{'socketi'} = $s;
350 } else { # local or ext: Fork off our own cvs server.
351 my $pr = IO::Pipe->new();
352 my $pw = IO::Pipe->new();
353 my $pid = fork();
354 die "Fork: $!\n" unless defined $pid;
355 my $cvs = 'cvs';
356 $cvs = $ENV{CVS_SERVER} if exists $ENV{CVS_SERVER};
357 my $rsh = 'rsh';
358 $rsh = $ENV{CVS_RSH} if exists $ENV{CVS_RSH};
359
360 my @cvs = ($cvs, 'server');
361 my ($local, $user, $host);
362 $local = $repo =~ s/:local://;
363 if (!$local) {
364 $repo =~ s/:ext://;
365 $local = !($repo =~ s/^(?:([^\@:]+)\@)?([^:]+)://);
366 ($user, $host) = ($1, $2);
367 }
368 if (!$local) {
369 if ($user) {
370 unshift @cvs, $rsh, '-l', $user, $host;
371 } else {
372 unshift @cvs, $rsh, $host;
373 }
374 }
375
376 unless ($pid) {
377 $pr->writer();
378 $pw->reader();
379 dup2($pw->fileno(),0);
380 dup2($pr->fileno(),1);
381 $pr->close();
382 $pw->close();
383 exec(@cvs);
384 }
385 $pw->writer();
386 $pr->reader();
387 $self->{'socketo'} = $pw;
388 $self->{'socketi'} = $pr;
389 }
390 $self->{'socketo'}->write("Root $repo\n");
391
392 # Trial and error says that this probably is the minimum set
393 $self->{'socketo'}->write("Valid-responses ok error Valid-requests Mode M Mbinary E Checked-in Created Updated Merged Removed\n");
394
395 $self->{'socketo'}->write("valid-requests\n");
396 $self->{'socketo'}->flush();
397
398 chomp(my $rep=$self->readline());
399 if ($rep !~ s/^Valid-requests\s*//) {
400 $rep="<unknown>" unless $rep;
401 die "Expected Valid-requests from server, but got: $rep\n";
402 }
403 chomp(my $res=$self->readline());
404 die "validReply: $res\n" if $res ne "ok";
405
406 $self->{'socketo'}->write("UseUnchanged\n") if $rep =~ /\bUseUnchanged\b/;
407 $self->{'repo'} = $repo;
408}
409
410sub readline {
411 my ($self) = @_;
412 return $self->{'socketi'}->getline();
413}
414
415sub _file {
416 # Request a file with a given revision.
417 # Trial and error says this is a good way to do it. :-/
418 my ($self,$fn,$rev) = @_;
419 $self->{'socketo'}->write("Argument -N\n") or return undef;
420 $self->{'socketo'}->write("Argument -P\n") or return undef;
421 # -kk: Linus' version doesn't use it - defaults to off
422 if ($opt_k) {
423 $self->{'socketo'}->write("Argument -kk\n") or return undef;
424 }
425 $self->{'socketo'}->write("Argument -r\n") or return undef;
426 $self->{'socketo'}->write("Argument $rev\n") or return undef;
427 $self->{'socketo'}->write("Argument --\n") or return undef;
428 $self->{'socketo'}->write("Argument $self->{'subdir'}/$fn\n") or return undef;
429 $self->{'socketo'}->write("Directory .\n") or return undef;
430 $self->{'socketo'}->write("$self->{'repo'}\n") or return undef;
431 # $self->{'socketo'}->write("Sticky T1.0\n") or return undef;
432 $self->{'socketo'}->write("co\n") or return undef;
433 $self->{'socketo'}->flush() or return undef;
434 $self->{'lines'} = 0;
435 return 1;
436}
437sub _line {
438 # Read a line from the server.
439 # ... except that 'line' may be an entire file. ;-)
440 my ($self, $fh) = @_;
441 die "Not in lines" unless defined $self->{'lines'};
442
443 my $line;
444 my $res=0;
445 while (defined($line = $self->readline())) {
446 # M U gnupg-cvs-rep/AUTHORS
447 # Updated gnupg-cvs-rep/
448 # /daten/src/rsync/gnupg-cvs-rep/AUTHORS
449 # /AUTHORS/1.1///T1.1
450 # u=rw,g=rw,o=rw
451 # 0
452 # ok
453
454 if ($line =~ s/^(?:Created|Updated) //) {
455 $line = $self->readline(); # path
456 $line = $self->readline(); # Entries line
457 my $mode = $self->readline(); chomp $mode;
458 $self->{'mode'} = $mode;
459 defined (my $cnt = $self->readline())
460 or die "EOF from server after 'Changed'\n";
461 chomp $cnt;
462 die "Duh: Filesize $cnt" if $cnt !~ /^\d+$/;
463 $line="";
464 $res = $self->_fetchfile($fh, $cnt);
465 } elsif ($line =~ s/^ //) {
466 print $fh $line;
467 $res += length($line);
468 } elsif ($line =~ /^M\b/) {
469 # output, do nothing
470 } elsif ($line =~ /^Mbinary\b/) {
471 my $cnt;
472 die "EOF from server after 'Mbinary'" unless defined ($cnt = $self->readline());
473 chomp $cnt;
474 die "Duh: Mbinary $cnt" if $cnt !~ /^\d+$/ or $cnt<1;
475 $line="";
476 $res += $self->_fetchfile($fh, $cnt);
477 } else {
478 chomp $line;
479 if ($line eq "ok") {
480 # print STDERR "S: ok (".length($res).")\n";
481 return $res;
482 } elsif ($line =~ s/^E //) {
483 # print STDERR "S: $line\n";
484 } elsif ($line =~ /^(Remove-entry|Removed) /i) {
485 $line = $self->readline(); # filename
486 $line = $self->readline(); # OK
487 chomp $line;
488 die "Unknown: $line" if $line ne "ok";
489 return -1;
490 } else {
491 die "Unknown: $line\n";
492 }
493 }
494 }
495 return undef;
496}
497sub file {
498 my ($self,$fn,$rev) = @_;
499 my $res;
500
501 my ($fh, $name) = tempfile('gitcvs.XXXXXX',
502 DIR => File::Spec->tmpdir(), UNLINK => 1);
503
504 $self->_file($fn,$rev) and $res = $self->_line($fh);
505
506 if (!defined $res) {
507 print STDERR "Server has gone away while fetching $fn $rev, retrying...\n";
508 truncate $fh, 0;
509 $self->conn();
510 $self->_file($fn,$rev) or die "No file command send";
511 $res = $self->_line($fh);
512 die "Retry failed" unless defined $res;
513 }
514 close ($fh);
515
516 return ($name, $res);
517}
518sub _fetchfile {
519 my ($self, $fh, $cnt) = @_;
520 my $res = 0;
521 my $bufsize = 1024 * 1024;
522 while ($cnt) {
523 if ($bufsize > $cnt) {
524 $bufsize = $cnt;
525 }
526 my $buf;
527 my $num = $self->{'socketi'}->read($buf,$bufsize);
528 die "Server: Filesize $cnt: $num: $!\n" if not defined $num or $num<=0;
529 print $fh $buf;
530 $res += $num;
531 $cnt -= $num;
532 }
533 return $res;
534}
535
536sub _scramble {
537 my ($self, $pass) = @_;
538 my $scrambled = "A";
539
540 return $scrambled unless $pass;
541
542 my $pass_len = length($pass);
543 my @pass_arr = split("", $pass);
544 my $i;
545
546 # from cvs/src/scramble.c
547 my @shifts = (
548 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
549 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
550 114,120, 53, 79, 96,109, 72,108, 70, 64, 76, 67,116, 74, 68, 87,
551 111, 52, 75,119, 49, 34, 82, 81, 95, 65,112, 86,118,110,122,105,
552 41, 57, 83, 43, 46,102, 40, 89, 38,103, 45, 50, 42,123, 91, 35,
553 125, 55, 54, 66,124,126, 59, 47, 92, 71,115, 78, 88,107,106, 56,
554 36,121,117,104,101,100, 69, 73, 99, 63, 94, 93, 39, 37, 61, 48,
555 58,113, 32, 90, 44, 98, 60, 51, 33, 97, 62, 77, 84, 80, 85,223,
556 225,216,187,166,229,189,222,188,141,249,148,200,184,136,248,190,
557 199,170,181,204,138,232,218,183,255,234,220,247,213,203,226,193,
558 174,172,228,252,217,201,131,230,197,211,145,238,161,179,160,212,
559 207,221,254,173,202,146,224,151,140,196,205,130,135,133,143,246,
560 192,159,244,239,185,168,215,144,139,165,180,157,147,186,214,176,
561 227,231,219,169,175,156,206,198,129,164,150,210,154,177,134,127,
562 182,128,158,208,162,132,167,209,149,241,153,251,237,236,171,195,
563 243,233,253,240,194,250,191,155,142,137,245,235,163,242,178,152
564 );
565
566 for ($i = 0; $i < $pass_len; $i++) {
567 $scrambled .= pack("C", $shifts[ord($pass_arr[$i])]);
568 }
569
570 return $scrambled;
571}
572
573package main;
574
575my $cvs = CVSconn->new($opt_d, $cvs_tree);
576
577
578sub pdate($) {
579 my ($d) = @_;
580 m#(\d{2,4})/(\d\d)/(\d\d)\s(\d\d):(\d\d)(?::(\d\d))?#
581 or die "Unparseable date: $d\n";
582 my $y=$1; $y-=1900 if $y>1900;
583 return timegm($6||0,$5,$4,$3,$2-1,$y);
584}
585
586sub pmode($) {
587 my ($mode) = @_;
588 my $m = 0;
589 my $mm = 0;
590 my $um = 0;
591 for my $x(split(//,$mode)) {
592 if ($x eq ",") {
593 $m |= $mm&$um;
594 $mm = 0;
595 $um = 0;
596 } elsif ($x eq "u") { $um |= 0700;
597 } elsif ($x eq "g") { $um |= 0070;
598 } elsif ($x eq "o") { $um |= 0007;
599 } elsif ($x eq "r") { $mm |= 0444;
600 } elsif ($x eq "w") { $mm |= 0222;
601 } elsif ($x eq "x") { $mm |= 0111;
602 } elsif ($x eq "=") { # do nothing
603 } else { die "Unknown mode: $mode\n";
604 }
605 }
606 $m |= $mm&$um;
607 return $m;
608}
609
610sub getwd() {
611 my $pwd = `pwd`;
612 chomp $pwd;
613 return $pwd;
614}
615
616sub is_sha1 {
617 my $s = shift;
618 return $s =~ /^[a-f0-9]{40}$/;
619}
620
621sub get_headref ($) {
622 my $name = shift;
623 my $r = `git rev-parse --verify '$name' 2>/dev/null`;
624 return undef unless $? == 0;
625 chomp $r;
626 return $r;
627}
628
629my $user_filename_prepend = '';
630sub munge_user_filename {
631 my $name = shift;
632 return File::Spec->file_name_is_absolute($name) ?
633 $name :
634 $user_filename_prepend . $name;
635}
636
637-d $git_tree
638 or mkdir($git_tree,0777)
639 or die "Could not create $git_tree: $!";
640if ($git_tree ne '.') {
641 $user_filename_prepend = getwd() . '/';
642 chdir($git_tree);
643}
644
645my $last_branch = "";
646my $orig_branch = "";
647my %branch_date;
648my $tip_at_start = undef;
649
650my $git_dir = $ENV{"GIT_DIR"} || ".git";
651$git_dir = getwd()."/".$git_dir unless $git_dir =~ m#^/#;
652$ENV{"GIT_DIR"} = $git_dir;
653my $orig_git_index;
654$orig_git_index = $ENV{GIT_INDEX_FILE} if exists $ENV{GIT_INDEX_FILE};
655
656my %index; # holds filenames of one index per branch
657
658unless (-d $git_dir) {
659 system(qw(git init));
660 die "Cannot init the GIT db at $git_tree: $?\n" if $?;
661 system(qw(git read-tree --empty));
662 die "Cannot init an empty tree: $?\n" if $?;
663
664 $last_branch = $opt_o;
665 $orig_branch = "";
666} else {
667 open(F, "-|", qw(git symbolic-ref HEAD)) or
668 die "Cannot run git symbolic-ref: $!\n";
669 chomp ($last_branch = <F>);
670 $last_branch = basename($last_branch);
671 close(F);
672 unless ($last_branch) {
673 warn "Cannot read the last branch name: $! -- assuming 'master'\n";
674 $last_branch = "master";
675 }
676 $orig_branch = $last_branch;
677 $tip_at_start = `git rev-parse --verify HEAD`;
678
679 # Get the last import timestamps
680 my $fmt = '($ref, $author) = (%(refname), %(author));';
681 my @cmd = ('git', 'for-each-ref', '--perl', "--format=$fmt", $remote);
682 open(H, "-|", @cmd) or die "Cannot run git for-each-ref: $!\n";
683 while (defined(my $entry = <H>)) {
684 my ($ref, $author);
685 eval($entry) || die "cannot eval refs list: $@";
686 my ($head) = ($ref =~ m|^$remote/(.*)|);
687 $author =~ /^.*\s(\d+)\s[-+]\d{4}$/;
688 $branch_date{$head} = $1;
689 }
690 close(H);
691 if (!exists $branch_date{$opt_o}) {
692 die "Branch '$opt_o' does not exist.\n".
693 "Either use the correct '-o branch' option,\n".
694 "or import to a new repository.\n";
695 }
696}
697
698-d $git_dir
699 or die "Could not create git subdir ($git_dir).\n";
700
701# now we read (and possibly save) author-info as well
702-f "$git_dir/cvs-authors" and
703 read_author_info("$git_dir/cvs-authors");
704if ($opt_A) {
705 read_author_info(munge_user_filename($opt_A));
706 write_author_info("$git_dir/cvs-authors");
707}
708
709# open .git/cvs-revisions, if requested
710open my $revision_map, '>>', "$git_dir/cvs-revisions"
711 or die "Can't open $git_dir/cvs-revisions for appending: $!\n"
712 if defined $opt_R;
713
714
715#
716# run cvsps into a file unless we are getting
717# it passed as a file via $opt_P
718#
719my $cvspsfile;
720unless ($opt_P) {
721 print "Running cvsps...\n" if $opt_v;
722 my $pid = open(CVSPS,"-|");
723 my $cvspsfh;
724 die "Cannot fork: $!\n" unless defined $pid;
725 unless ($pid) {
726 my @opt;
727 @opt = split(/,/,$opt_p) if defined $opt_p;
728 unshift @opt, '-z', $opt_z if defined $opt_z;
729 unshift @opt, '-q' unless defined $opt_v;
730 unless (defined($opt_p) && $opt_p =~ m/--no-cvs-direct/) {
731 push @opt, '--cvs-direct';
732 }
733 exec("cvsps","--norc",@opt,"-u","-A",'--root',$opt_d,$cvs_tree);
734 die "Could not start cvsps: $!\n";
735 }
736 ($cvspsfh, $cvspsfile) = tempfile('gitXXXXXX', SUFFIX => '.cvsps',
737 DIR => File::Spec->tmpdir());
738 while (<CVSPS>) {
739 print $cvspsfh $_;
740 }
741 close CVSPS;
742 $? == 0 or die "git cvsimport: fatal: cvsps reported error\n";
743 close $cvspsfh;
744} else {
745 $cvspsfile = munge_user_filename($opt_P);
746}
747
748open(CVS, "<$cvspsfile") or die $!;
749
750## cvsps output:
751#---------------------
752#PatchSet 314
753#Date: 1999/09/18 13:03:59
754#Author: wkoch
755#Branch: STABLE-BRANCH-1-0
756#Ancestor branch: HEAD
757#Tag: (none)
758#Log:
759# See ChangeLog: Sat Sep 18 13:03:28 CEST 1999 Werner Koch
760#Members:
761# README:1.57->1.57.2.1
762# VERSION:1.96->1.96.2.1
763#
764#---------------------
765
766my $state = 0;
767
768sub update_index (\@\@) {
769 my $old = shift;
770 my $new = shift;
771 open(my $fh, '|-', qw(git update-index -z --index-info))
772 or die "unable to open git update-index: $!";
773 print $fh
774 (map { "0 0000000000000000000000000000000000000000\t$_\0" }
775 @$old),
776 (map { '100' . sprintf('%o', $_->[0]) . " $_->[1]\t$_->[2]\0" }
777 @$new)
778 or die "unable to write to git update-index: $!";
779 close $fh
780 or die "unable to write to git update-index: $!";
781 $? and die "git update-index reported error: $?";
782}
783
784sub write_tree () {
785 open(my $fh, '-|', qw(git write-tree))
786 or die "unable to open git write-tree: $!";
787 chomp(my $tree = <$fh>);
788 is_sha1($tree)
789 or die "Cannot get tree id ($tree): $!";
790 close($fh)
791 or die "Error running git write-tree: $?\n";
792 print "Tree ID $tree\n" if $opt_v;
793 return $tree;
794}
795
796my ($patchset,$date,$author_name,$author_email,$branch,$ancestor,$tag,$logmsg);
797my (@old,@new,@skipped,%ignorebranch,@commit_revisions);
798
799# commits that cvsps cannot place anywhere...
800$ignorebranch{'#CVSPS_NO_BRANCH'} = 1;
801
802sub commit {
803 if ($branch eq $opt_o && !$index{branch} &&
804 !get_headref("$remote/$branch")) {
805 # looks like an initial commit
806 # use the index primed by git init
807 $ENV{GIT_INDEX_FILE} = "$git_dir/index";
808 $index{$branch} = "$git_dir/index";
809 } else {
810 # use an index per branch to speed up
811 # imports of projects with many branches
812 unless ($index{$branch}) {
813 $index{$branch} = tmpnam();
814 $ENV{GIT_INDEX_FILE} = $index{$branch};
815 if ($ancestor) {
816 system("git", "read-tree", "$remote/$ancestor");
817 } else {
818 system("git", "read-tree", "$remote/$branch");
819 }
820 die "read-tree failed: $?\n" if $?;
821 }
822 }
823 $ENV{GIT_INDEX_FILE} = $index{$branch};
824
825 update_index(@old, @new);
826 @old = @new = ();
827 my $tree = write_tree();
828 my $parent = get_headref("$remote/$last_branch");
829 print "Parent ID " . ($parent ? $parent : "(empty)") . "\n" if $opt_v;
830
831 my @commit_args;
832 push @commit_args, ("-p", $parent) if $parent;
833
834 # loose detection of merges
835 # based on the commit msg
836 foreach my $rx (@mergerx) {
837 next unless $logmsg =~ $rx && $1;
838 my $mparent = $1 eq 'HEAD' ? $opt_o : $1;
839 if (my $sha1 = get_headref("$remote/$mparent")) {
840 push @commit_args, '-p', "$remote/$mparent";
841 print "Merge parent branch: $mparent\n" if $opt_v;
842 }
843 }
844
845 my $commit_date = strftime("+0000 %Y-%m-%d %H:%M:%S",gmtime($date));
846 $ENV{GIT_AUTHOR_NAME} = $author_name;
847 $ENV{GIT_AUTHOR_EMAIL} = $author_email;
848 $ENV{GIT_AUTHOR_DATE} = $commit_date;
849 $ENV{GIT_COMMITTER_NAME} = $author_name;
850 $ENV{GIT_COMMITTER_EMAIL} = $author_email;
851 $ENV{GIT_COMMITTER_DATE} = $commit_date;
852 my $pid = open2(my $commit_read, my $commit_write,
853 'git', 'commit-tree', $tree, @commit_args);
854
855 # compatibility with git2cvs
856 substr($logmsg,32767) = "" if length($logmsg) > 32767;
857 $logmsg =~ s/[\s\n]+\z//;
858
859 if (@skipped) {
860 $logmsg .= "\n\n\nSKIPPED:\n\t";
861 $logmsg .= join("\n\t", @skipped) . "\n";
862 @skipped = ();
863 }
864
865 print($commit_write "$logmsg\n") && close($commit_write)
866 or die "Error writing to git commit-tree: $!\n";
867
868 print "Committed patch $patchset ($branch $commit_date)\n" if $opt_v;
869 chomp(my $cid = <$commit_read>);
870 is_sha1($cid) or die "Cannot get commit id ($cid): $!\n";
871 print "Commit ID $cid\n" if $opt_v;
872 close($commit_read);
873
874 waitpid($pid,0);
875 die "Error running git commit-tree: $?\n" if $?;
876
877 system('git' , 'update-ref', "$remote/$branch", $cid) == 0
878 or die "Cannot write branch $branch for update: $!\n";
879
880 if ($revision_map) {
881 print $revision_map "@$_ $cid\n" for @commit_revisions;
882 }
883 @commit_revisions = ();
884
885 if ($tag) {
886 my ($xtag) = $tag;
887 $xtag =~ s/\s+\*\*.*$//; # Remove stuff like ** INVALID ** and ** FUNKY **
888 $xtag =~ tr/_/\./ if ( $opt_u );
889 $xtag =~ s/[\/]/$opt_s/g;
890 $xtag =~ s/\[//g;
891
892 system('git' , 'tag', '-f', $xtag, $cid) == 0
893 or die "Cannot create tag $xtag: $!\n";
894
895 print "Created tag '$xtag' on '$branch'\n" if $opt_v;
896 }
897};
898
899my $commitcount = 1;
900while (<CVS>) {
901 chomp;
902 if ($state == 0 and /^-+$/) {
903 $state = 1;
904 } elsif ($state == 0) {
905 $state = 1;
906 redo;
907 } elsif (($state==0 or $state==1) and s/^PatchSet\s+//) {
908 $patchset = 0+$_;
909 $state=2;
910 } elsif ($state == 2 and s/^Date:\s+//) {
911 $date = pdate($_);
912 unless ($date) {
913 print STDERR "Could not parse date: $_\n";
914 $state=0;
915 next;
916 }
917 $state=3;
918 } elsif ($state == 3 and s/^Author:\s+//) {
919 s/\s+$//;
920 if (/^(.*?)\s+<(.*)>/) {
921 ($author_name, $author_email) = ($1, $2);
922 } elsif ($conv_author_name{$_}) {
923 $author_name = $conv_author_name{$_};
924 $author_email = $conv_author_email{$_};
925 } else {
926 $author_name = $author_email = $_;
927 }
928 $state = 4;
929 } elsif ($state == 4 and s/^Branch:\s+//) {
930 s/\s+$//;
931 tr/_/\./ if ( $opt_u );
932 s/[\/]/$opt_s/g;
933 $branch = $_;
934 $state = 5;
935 } elsif ($state == 5 and s/^Ancestor branch:\s+//) {
936 s/\s+$//;
937 $ancestor = $_;
938 $ancestor = $opt_o if $ancestor eq "HEAD";
939 $state = 6;
940 } elsif ($state == 5) {
941 $ancestor = undef;
942 $state = 6;
943 redo;
944 } elsif ($state == 6 and s/^Tag:\s+//) {
945 s/\s+$//;
946 if ($_ eq "(none)") {
947 $tag = undef;
948 } else {
949 $tag = $_;
950 }
951 $state = 7;
952 } elsif ($state == 7 and /^Log:/) {
953 $logmsg = "";
954 $state = 8;
955 } elsif ($state == 8 and /^Members:/) {
956 $branch = $opt_o if $branch eq "HEAD";
957 if (defined $branch_date{$branch} and $branch_date{$branch} >= $date) {
958 # skip
959 print "skip patchset $patchset: $date before $branch_date{$branch}\n" if $opt_v;
960 $state = 11;
961 next;
962 }
963 if (!$opt_a && $starttime - 300 - (defined $opt_z ? $opt_z : 300) <= $date) {
964 # skip if the commit is too recent
965 # given that the cvsps default fuzz is 300s, we give ourselves another
966 # 300s just in case -- this also prevents skipping commits
967 # due to server clock drift
968 print "skip patchset $patchset: $date too recent\n" if $opt_v;
969 $state = 11;
970 next;
971 }
972 if (exists $ignorebranch{$branch}) {
973 print STDERR "Skipping $branch\n";
974 $state = 11;
975 next;
976 }
977 if ($ancestor) {
978 if ($ancestor eq $branch) {
979 print STDERR "Branch $branch erroneously stems from itself -- changed ancestor to $opt_o\n";
980 $ancestor = $opt_o;
981 }
982 if (defined get_headref("$remote/$branch")) {
983 print STDERR "Branch $branch already exists!\n";
984 $state=11;
985 next;
986 }
987 my $id = get_headref("$remote/$ancestor");
988 if (!$id) {
989 print STDERR "Branch $ancestor does not exist!\n";
990 $ignorebranch{$branch} = 1;
991 $state=11;
992 next;
993 }
994
995 system(qw(git update-ref -m cvsimport),
996 "$remote/$branch", $id);
997 if($? != 0) {
998 print STDERR "Could not create branch $branch\n";
999 $ignorebranch{$branch} = 1;
1000 $state=11;
1001 next;
1002 }
1003 }
1004 $last_branch = $branch if $branch ne $last_branch;
1005 $state = 9;
1006 } elsif ($state == 8) {
1007 $logmsg .= "$_\n";
1008 } elsif ($state == 9 and /^\s+(.+?):(INITIAL|\d+(?:\.\d+)+)->(\d+(?:\.\d+)+)\s*$/) {
1009# VERSION:1.96->1.96.2.1
1010 my $init = ($2 eq "INITIAL");
1011 my $fn = $1;
1012 my $rev = $3;
1013 $fn =~ s#^/+##;
1014 if ($opt_S && $fn =~ m/$opt_S/) {
1015 print "SKIPPING $fn v $rev\n";
1016 push(@skipped, $fn);
1017 next;
1018 }
1019 push @commit_revisions, [$fn, $rev];
1020 print "Fetching $fn v $rev\n" if $opt_v;
1021 my ($tmpname, $size) = $cvs->file($fn,$rev);
1022 if ($size == -1) {
1023 push(@old,$fn);
1024 print "Drop $fn\n" if $opt_v;
1025 } else {
1026 print "".($init ? "New" : "Update")." $fn: $size bytes\n" if $opt_v;
1027 my $pid = open(my $F, '-|');
1028 die $! unless defined $pid;
1029 if (!$pid) {
1030 exec("git", "hash-object", "-w", $tmpname)
1031 or die "Cannot create object: $!\n";
1032 }
1033 my $sha = <$F>;
1034 chomp $sha;
1035 close $F;
1036 my $mode = pmode($cvs->{'mode'});
1037 push(@new,[$mode, $sha, $fn]); # may be resurrected!
1038 }
1039 unlink($tmpname);
1040 } elsif ($state == 9 and /^\s+(.+?):\d+(?:\.\d+)+->(\d+(?:\.\d+)+)\(DEAD\)\s*$/) {
1041 my $fn = $1;
1042 my $rev = $2;
1043 $fn =~ s#^/+##;
1044 push @commit_revisions, [$fn, $rev];
1045 push(@old,$fn);
1046 print "Delete $fn\n" if $opt_v;
1047 } elsif ($state == 9 and /^\s*$/) {
1048 $state = 10;
1049 } elsif (($state == 9 or $state == 10) and /^-+$/) {
1050 $commitcount++;
1051 if ($opt_L && $commitcount > $opt_L) {
1052 last;
1053 }
1054 commit();
1055 if (($commitcount & 1023) == 0) {
1056 system(qw(git repack -a -d));
1057 }
1058 $state = 1;
1059 } elsif ($state == 11 and /^-+$/) {
1060 $state = 1;
1061 } elsif (/^-+$/) { # end of unknown-line processing
1062 $state = 1;
1063 } elsif ($state != 11) { # ignore stuff when skipping
1064 print STDERR "* UNKNOWN LINE * $_\n";
1065 }
1066}
1067commit() if $branch and $state != 11;
1068
1069unless ($opt_P) {
1070 unlink($cvspsfile);
1071}
1072
1073# The heuristic of repacking every 1024 commits can leave a
1074# lot of unpacked data. If there is more than 1MB worth of
1075# not-packed objects, repack once more.
1076my $line = `git count-objects`;
1077if ($line =~ /^(\d+) objects, (\d+) kilobytes$/) {
1078 my ($n_objects, $kb) = ($1, $2);
1079 1024 < $kb
1080 and system(qw(git repack -a -d));
1081}
1082
1083foreach my $git_index (values %index) {
1084 if ($git_index ne "$git_dir/index") {
1085 unlink($git_index);
1086 }
1087}
1088
1089if (defined $orig_git_index) {
1090 $ENV{GIT_INDEX_FILE} = $orig_git_index;
1091} else {
1092 delete $ENV{GIT_INDEX_FILE};
1093}
1094
1095# Now switch back to the branch we were in before all of this happened
1096if ($orig_branch) {
1097 print "DONE.\n" if $opt_v;
1098 if ($opt_i) {
1099 exit 0;
1100 }
1101 my $tip_at_end = `git rev-parse --verify HEAD`;
1102 if ($tip_at_start ne $tip_at_end) {
1103 for ($tip_at_start, $tip_at_end) { chomp; }
1104 print "Fetched into the current branch.\n" if $opt_v;
1105 system(qw(git read-tree -u -m),
1106 $tip_at_start, $tip_at_end);
1107 die "Fast-forward update failed: $?\n" if $?;
1108 }
1109 else {
1110 system(qw(git merge cvsimport HEAD), "$remote/$opt_o");
1111 die "Could not merge $opt_o into the current branch.\n" if $?;
1112 }
1113} else {
1114 $orig_branch = "master";
1115 print "DONE; creating $orig_branch branch\n" if $opt_v;
1116 system("git", "update-ref", "refs/heads/master", "$remote/$opt_o")
1117 unless defined get_headref('refs/heads/master');
1118 system("git", "symbolic-ref", "$remote/HEAD", "$remote/$opt_o")
1119 if ($opt_r && $opt_o ne 'HEAD');
1120 system('git', 'update-ref', 'HEAD', "$orig_branch");
1121 unless ($opt_i) {
1122 system(qw(git checkout -f));
1123 die "checkout failed: $?\n" if $?;
1124 }
1125}