git-cvsimport.perlon commit cvsimport: handle the parsing of uppercase config options (60d5985)
   1#!/usr/bin/perl -w
   2
   3# This tool is copyright (c) 2005, Matthias Urlichs.
   4# It is released under the Gnu Public License, version 2.
   5#
   6# The basic idea is to aggregate CVS check-ins into related changes.
   7# Fortunately, "cvsps" does that for us; all we have to do is to parse
   8# its output.
   9#
  10# Checking out the files is done by a single long-running CVS connection
  11# / server process.
  12#
  13# The head revision is on branch "origin" by default.
  14# You can change that with the '-o' option.
  15
  16use strict;
  17use warnings;
  18use Getopt::Long;
  19use File::Spec;
  20use File::Temp qw(tempfile tmpnam);
  21use File::Path qw(mkpath);
  22use File::Basename qw(basename dirname);
  23use Time::Local;
  24use IO::Socket;
  25use IO::Pipe;
  26use POSIX qw(strftime dup2 ENOENT);
  27use IPC::Open2;
  28
  29$SIG{'PIPE'}="IGNORE";
  30$ENV{'TZ'}="UTC";
  31
  32our ($opt_h,$opt_o,$opt_v,$opt_k,$opt_u,$opt_d,$opt_p,$opt_C,$opt_z,$opt_i,$opt_P, $opt_s,$opt_m,@opt_M,$opt_A,$opt_S,$opt_L, $opt_a, $opt_r, $opt_R);
  33my (%conv_author_name, %conv_author_email);
  34
  35sub usage(;$) {
  36        my $msg = shift;
  37        print(STDERR "Error: $msg\n") if $msg;
  38        print STDERR <<END;
  39Usage: git cvsimport     # fetch/update GIT from CVS
  40       [-o branch-for-HEAD] [-h] [-v] [-d CVSROOT] [-A author-conv-file]
  41       [-p opts-for-cvsps] [-P file] [-C GIT_repository] [-z fuzz] [-i] [-k]
  42       [-u] [-s subst] [-a] [-m] [-M regex] [-S regex] [-L commitlimit]
  43       [-r remote] [-R] [CVS_module]
  44END
  45        exit(1);
  46}
  47
  48sub read_author_info($) {
  49        my ($file) = @_;
  50        my $user;
  51        open my $f, '<', "$file" or die("Failed to open $file: $!\n");
  52
  53        while (<$f>) {
  54                # Expected format is this:
  55                #   exon=Andreas Ericsson <ae@op5.se>
  56                if (m/^(\S+?)\s*=\s*(.+?)\s*<(.+)>\s*$/) {
  57                        $user = $1;
  58                        $conv_author_name{$user} = $2;
  59                        $conv_author_email{$user} = $3;
  60                }
  61                # However, we also read from CVSROOT/users format
  62                # to ease migration.
  63                elsif (/^(\w+):(['"]?)(.+?)\2\s*$/) {
  64                        my $mapped;
  65                        ($user, $mapped) = ($1, $3);
  66                        if ($mapped =~ /^\s*(.*?)\s*<(.*)>\s*$/) {
  67                                $conv_author_name{$user} = $1;
  68                                $conv_author_email{$user} = $2;
  69                        }
  70                        elsif ($mapped =~ /^<?(.*)>?$/) {
  71                                $conv_author_name{$user} = $user;
  72                                $conv_author_email{$user} = $1;
  73                        }
  74                }
  75                # NEEDSWORK: Maybe warn on unrecognized lines?
  76        }
  77        close ($f);
  78}
  79
  80sub write_author_info($) {
  81        my ($file) = @_;
  82        open my $f, '>', $file or
  83          die("Failed to open $file for writing: $!");
  84
  85        foreach (keys %conv_author_name) {
  86                print $f "$_=$conv_author_name{$_} <$conv_author_email{$_}>\n";
  87        }
  88        close ($f);
  89}
  90
  91# convert getopts specs for use by git config
  92my %longmap = (
  93        'A:' => 'authors-file',
  94        'M:' => 'merge-regex',
  95        'P:' => undef,
  96        'R' => 'track-revisions',
  97        'S:' => 'ignore-paths',
  98);
  99
 100sub read_repo_config {
 101        # Split the string between characters, unless there is a ':'
 102        # So "abc:de" becomes ["a", "b", "c:", "d", "e"]
 103        my @opts = split(/ *(?!:)/, shift);
 104        foreach my $o (@opts) {
 105                my $key = $o;
 106                $key =~ s/://g;
 107                my $arg = 'git config';
 108                $arg .= ' --bool' if ($o !~ /:$/);
 109                my $ckey = $key;
 110
 111                if (exists $longmap{$o}) {
 112                        # An uppercase option like -R cannot be
 113                        # expressed in the configuration, as the
 114                        # variable names are downcased.
 115                        $ckey = $longmap{$o};
 116                        next if (! defined $ckey);
 117                        $ckey =~ s/-//g;
 118                }
 119                chomp(my $tmp = `$arg --get cvsimport.$ckey`);
 120                if ($tmp && !($arg =~ /--bool/ && $tmp eq 'false')) {
 121                        no strict 'refs';
 122                        my $opt_name = "opt_" . $key;
 123                        if (!$$opt_name) {
 124                                $$opt_name = $tmp;
 125                        }
 126                }
 127        }
 128}
 129
 130my $opts = "haivmkuo:d:p:r:C:z:s:M:P:A:S:L:R";
 131read_repo_config($opts);
 132Getopt::Long::Configure( 'no_ignore_case', 'bundling' );
 133
 134# turn the Getopt::Std specification in a Getopt::Long one,
 135# with support for multiple -M options
 136GetOptions( map { s/:/=s/; /M/ ? "$_\@" : $_ } split( /(?!:)/, $opts ) )
 137    or usage();
 138usage if $opt_h;
 139
 140if (@ARGV == 0) {
 141                chomp(my $module = `git config --get cvsimport.module`);
 142                push(@ARGV, $module) if $? == 0;
 143}
 144@ARGV <= 1 or usage("You can't specify more than one CVS module");
 145
 146if ($opt_d) {
 147        $ENV{"CVSROOT"} = $opt_d;
 148} elsif (-f 'CVS/Root') {
 149        open my $f, '<', 'CVS/Root' or die 'Failed to open CVS/Root';
 150        $opt_d = <$f>;
 151        chomp $opt_d;
 152        close $f;
 153        $ENV{"CVSROOT"} = $opt_d;
 154} elsif ($ENV{"CVSROOT"}) {
 155        $opt_d = $ENV{"CVSROOT"};
 156} else {
 157        usage("CVSROOT needs to be set");
 158}
 159$opt_s ||= "-";
 160$opt_a ||= 0;
 161
 162my $git_tree = $opt_C;
 163$git_tree ||= ".";
 164
 165my $remote;
 166if (defined $opt_r) {
 167        $remote = 'refs/remotes/' . $opt_r;
 168        $opt_o ||= "master";
 169} else {
 170        $opt_o ||= "origin";
 171        $remote = 'refs/heads';
 172}
 173
 174my $cvs_tree;
 175if ($#ARGV == 0) {
 176        $cvs_tree = $ARGV[0];
 177} elsif (-f 'CVS/Repository') {
 178        open my $f, '<', 'CVS/Repository' or
 179            die 'Failed to open CVS/Repository';
 180        $cvs_tree = <$f>;
 181        chomp $cvs_tree;
 182        close $f;
 183} else {
 184        usage("CVS module has to be specified");
 185}
 186
 187our @mergerx = ();
 188if ($opt_m) {
 189        @mergerx = ( qr/\b(?:from|of|merge|merging|merged) ([-\w]+)/i );
 190}
 191if (@opt_M) {
 192        push (@mergerx, map { qr/$_/ } @opt_M);
 193}
 194
 195# Remember UTC of our starting time
 196# we'll want to avoid importing commits
 197# that are too recent
 198our $starttime = time();
 199
 200select(STDERR); $|=1; select(STDOUT);
 201
 202
 203package CVSconn;
 204# Basic CVS dialog.
 205# We're only interested in connecting and downloading, so ...
 206
 207use File::Spec;
 208use File::Temp qw(tempfile);
 209use POSIX qw(strftime dup2);
 210
 211sub new {
 212        my ($what,$repo,$subdir) = @_;
 213        $what=ref($what) if ref($what);
 214
 215        my $self = {};
 216        $self->{'buffer'} = "";
 217        bless($self,$what);
 218
 219        $repo =~ s#/+$##;
 220        $self->{'fullrep'} = $repo;
 221        $self->conn();
 222
 223        $self->{'subdir'} = $subdir;
 224        $self->{'lines'} = undef;
 225
 226        return $self;
 227}
 228
 229sub conn {
 230        my $self = shift;
 231        my $repo = $self->{'fullrep'};
 232        if ($repo =~ s/^:pserver(?:([^:]*)):(?:(.*?)(?::(.*?))?@)?([^:\/]*)(?::(\d*))?//) {
 233                my ($param,$user,$pass,$serv,$port) = ($1,$2,$3,$4,$5);
 234
 235                my ($proxyhost,$proxyport);
 236                if ($param && ($param =~ m/proxy=([^;]+)/)) {
 237                        $proxyhost = $1;
 238                        # Default proxyport, if not specified, is 8080.
 239                        $proxyport = 8080;
 240                        if ($ENV{"CVS_PROXY_PORT"}) {
 241                                $proxyport = $ENV{"CVS_PROXY_PORT"};
 242                        }
 243                        if ($param =~ m/proxyport=([^;]+)/) {
 244                                $proxyport = $1;
 245                        }
 246                }
 247                $repo ||= '/';
 248
 249                # if username is not explicit in CVSROOT, then use current user, as cvs would
 250                $user=(getlogin() || $ENV{'LOGNAME'} || $ENV{'USER'} || "anonymous") unless $user;
 251                my $rr2 = "-";
 252                unless ($port) {
 253                        $rr2 = ":pserver:$user\@$serv:$repo";
 254                        $port=2401;
 255                }
 256                my $rr = ":pserver:$user\@$serv:$port$repo";
 257
 258                if ($pass) {
 259                        $pass = $self->_scramble($pass);
 260                } else {
 261                        open(H,$ENV{'HOME'}."/.cvspass") and do {
 262                                # :pserver:cvs@mea.tmt.tele.fi:/cvsroot/zmailer Ah<Z
 263                                while (<H>) {
 264                                        chomp;
 265                                        s/^\/\d+\s+//;
 266                                        my ($w,$p) = split(/\s/,$_,2);
 267                                        if ($w eq $rr or $w eq $rr2) {
 268                                                $pass = $p;
 269                                                last;
 270                                        }
 271                                }
 272                        };
 273                        $pass = "A" unless $pass;
 274                }
 275
 276                my ($s, $rep);
 277                if ($proxyhost) {
 278
 279                        # Use a HTTP Proxy. Only works for HTTP proxies that
 280                        # don't require user authentication
 281                        #
 282                        # See: http://www.ietf.org/rfc/rfc2817.txt
 283
 284                        $s = IO::Socket::INET->new(PeerHost => $proxyhost, PeerPort => $proxyport);
 285                        die "Socket to $proxyhost: $!\n" unless defined $s;
 286                        $s->write("CONNECT $serv:$port HTTP/1.1\r\nHost: $serv:$port\r\n\r\n")
 287                                or die "Write to $proxyhost: $!\n";
 288                        $s->flush();
 289
 290                        $rep = <$s>;
 291
 292                        # The answer should look like 'HTTP/1.x 2yy ....'
 293                        if (!($rep =~ m#^HTTP/1\.. 2[0-9][0-9]#)) {
 294                                die "Proxy connect: $rep\n";
 295                        }
 296                        # Skip up to the empty line of the proxy server output
 297                        # including the response headers.
 298                        while ($rep = <$s>) {
 299                                last if (!defined $rep ||
 300                                         $rep eq "\n" ||
 301                                         $rep eq "\r\n");
 302                        }
 303                } else {
 304                        $s = IO::Socket::INET->new(PeerHost => $serv, PeerPort => $port);
 305                        die "Socket to $serv: $!\n" unless defined $s;
 306                }
 307
 308                $s->write("BEGIN AUTH REQUEST\n$repo\n$user\n$pass\nEND AUTH REQUEST\n")
 309                        or die "Write to $serv: $!\n";
 310                $s->flush();
 311
 312                $rep = <$s>;
 313
 314                if ($rep ne "I LOVE YOU\n") {
 315                        $rep="<unknown>" unless $rep;
 316                        die "AuthReply: $rep\n";
 317                }
 318                $self->{'socketo'} = $s;
 319                $self->{'socketi'} = $s;
 320        } else { # local or ext: Fork off our own cvs server.
 321                my $pr = IO::Pipe->new();
 322                my $pw = IO::Pipe->new();
 323                my $pid = fork();
 324                die "Fork: $!\n" unless defined $pid;
 325                my $cvs = 'cvs';
 326                $cvs = $ENV{CVS_SERVER} if exists $ENV{CVS_SERVER};
 327                my $rsh = 'rsh';
 328                $rsh = $ENV{CVS_RSH} if exists $ENV{CVS_RSH};
 329
 330                my @cvs = ($cvs, 'server');
 331                my ($local, $user, $host);
 332                $local = $repo =~ s/:local://;
 333                if (!$local) {
 334                    $repo =~ s/:ext://;
 335                    $local = !($repo =~ s/^(?:([^\@:]+)\@)?([^:]+)://);
 336                    ($user, $host) = ($1, $2);
 337                }
 338                if (!$local) {
 339                    if ($user) {
 340                        unshift @cvs, $rsh, '-l', $user, $host;
 341                    } else {
 342                        unshift @cvs, $rsh, $host;
 343                    }
 344                }
 345
 346                unless ($pid) {
 347                        $pr->writer();
 348                        $pw->reader();
 349                        dup2($pw->fileno(),0);
 350                        dup2($pr->fileno(),1);
 351                        $pr->close();
 352                        $pw->close();
 353                        exec(@cvs);
 354                }
 355                $pw->writer();
 356                $pr->reader();
 357                $self->{'socketo'} = $pw;
 358                $self->{'socketi'} = $pr;
 359        }
 360        $self->{'socketo'}->write("Root $repo\n");
 361
 362        # Trial and error says that this probably is the minimum set
 363        $self->{'socketo'}->write("Valid-responses ok error Valid-requests Mode M Mbinary E Checked-in Created Updated Merged Removed\n");
 364
 365        $self->{'socketo'}->write("valid-requests\n");
 366        $self->{'socketo'}->flush();
 367
 368        chomp(my $rep=$self->readline());
 369        if ($rep !~ s/^Valid-requests\s*//) {
 370                $rep="<unknown>" unless $rep;
 371                die "Expected Valid-requests from server, but got: $rep\n";
 372        }
 373        chomp(my $res=$self->readline());
 374        die "validReply: $res\n" if $res ne "ok";
 375
 376        $self->{'socketo'}->write("UseUnchanged\n") if $rep =~ /\bUseUnchanged\b/;
 377        $self->{'repo'} = $repo;
 378}
 379
 380sub readline {
 381        my ($self) = @_;
 382        return $self->{'socketi'}->getline();
 383}
 384
 385sub _file {
 386        # Request a file with a given revision.
 387        # Trial and error says this is a good way to do it. :-/
 388        my ($self,$fn,$rev) = @_;
 389        $self->{'socketo'}->write("Argument -N\n") or return undef;
 390        $self->{'socketo'}->write("Argument -P\n") or return undef;
 391        # -kk: Linus' version doesn't use it - defaults to off
 392        if ($opt_k) {
 393            $self->{'socketo'}->write("Argument -kk\n") or return undef;
 394        }
 395        $self->{'socketo'}->write("Argument -r\n") or return undef;
 396        $self->{'socketo'}->write("Argument $rev\n") or return undef;
 397        $self->{'socketo'}->write("Argument --\n") or return undef;
 398        $self->{'socketo'}->write("Argument $self->{'subdir'}/$fn\n") or return undef;
 399        $self->{'socketo'}->write("Directory .\n") or return undef;
 400        $self->{'socketo'}->write("$self->{'repo'}\n") or return undef;
 401        # $self->{'socketo'}->write("Sticky T1.0\n") or return undef;
 402        $self->{'socketo'}->write("co\n") or return undef;
 403        $self->{'socketo'}->flush() or return undef;
 404        $self->{'lines'} = 0;
 405        return 1;
 406}
 407sub _line {
 408        # Read a line from the server.
 409        # ... except that 'line' may be an entire file. ;-)
 410        my ($self, $fh) = @_;
 411        die "Not in lines" unless defined $self->{'lines'};
 412
 413        my $line;
 414        my $res=0;
 415        while (defined($line = $self->readline())) {
 416                # M U gnupg-cvs-rep/AUTHORS
 417                # Updated gnupg-cvs-rep/
 418                # /daten/src/rsync/gnupg-cvs-rep/AUTHORS
 419                # /AUTHORS/1.1///T1.1
 420                # u=rw,g=rw,o=rw
 421                # 0
 422                # ok
 423
 424                if ($line =~ s/^(?:Created|Updated) //) {
 425                        $line = $self->readline(); # path
 426                        $line = $self->readline(); # Entries line
 427                        my $mode = $self->readline(); chomp $mode;
 428                        $self->{'mode'} = $mode;
 429                        defined (my $cnt = $self->readline())
 430                                or die "EOF from server after 'Changed'\n";
 431                        chomp $cnt;
 432                        die "Duh: Filesize $cnt" if $cnt !~ /^\d+$/;
 433                        $line="";
 434                        $res = $self->_fetchfile($fh, $cnt);
 435                } elsif ($line =~ s/^ //) {
 436                        print $fh $line;
 437                        $res += length($line);
 438                } elsif ($line =~ /^M\b/) {
 439                        # output, do nothing
 440                } elsif ($line =~ /^Mbinary\b/) {
 441                        my $cnt;
 442                        die "EOF from server after 'Mbinary'" unless defined ($cnt = $self->readline());
 443                        chomp $cnt;
 444                        die "Duh: Mbinary $cnt" if $cnt !~ /^\d+$/ or $cnt<1;
 445                        $line="";
 446                        $res += $self->_fetchfile($fh, $cnt);
 447                } else {
 448                        chomp $line;
 449                        if ($line eq "ok") {
 450                                # print STDERR "S: ok (".length($res).")\n";
 451                                return $res;
 452                        } elsif ($line =~ s/^E //) {
 453                                # print STDERR "S: $line\n";
 454                        } elsif ($line =~ /^(Remove-entry|Removed) /i) {
 455                                $line = $self->readline(); # filename
 456                                $line = $self->readline(); # OK
 457                                chomp $line;
 458                                die "Unknown: $line" if $line ne "ok";
 459                                return -1;
 460                        } else {
 461                                die "Unknown: $line\n";
 462                        }
 463                }
 464        }
 465        return undef;
 466}
 467sub file {
 468        my ($self,$fn,$rev) = @_;
 469        my $res;
 470
 471        my ($fh, $name) = tempfile('gitcvs.XXXXXX',
 472                    DIR => File::Spec->tmpdir(), UNLINK => 1);
 473
 474        $self->_file($fn,$rev) and $res = $self->_line($fh);
 475
 476        if (!defined $res) {
 477            print STDERR "Server has gone away while fetching $fn $rev, retrying...\n";
 478            truncate $fh, 0;
 479            $self->conn();
 480            $self->_file($fn,$rev) or die "No file command send";
 481            $res = $self->_line($fh);
 482            die "Retry failed" unless defined $res;
 483        }
 484        close ($fh);
 485
 486        return ($name, $res);
 487}
 488sub _fetchfile {
 489        my ($self, $fh, $cnt) = @_;
 490        my $res = 0;
 491        my $bufsize = 1024 * 1024;
 492        while ($cnt) {
 493            if ($bufsize > $cnt) {
 494                $bufsize = $cnt;
 495            }
 496            my $buf;
 497            my $num = $self->{'socketi'}->read($buf,$bufsize);
 498            die "Server: Filesize $cnt: $num: $!\n" if not defined $num or $num<=0;
 499            print $fh $buf;
 500            $res += $num;
 501            $cnt -= $num;
 502        }
 503        return $res;
 504}
 505
 506sub _scramble {
 507        my ($self, $pass) = @_;
 508        my $scrambled = "A";
 509
 510        return $scrambled unless $pass;
 511
 512        my $pass_len = length($pass);
 513        my @pass_arr = split("", $pass);
 514        my $i;
 515
 516        # from cvs/src/scramble.c
 517        my @shifts = (
 518                  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15,
 519                 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
 520                114,120, 53, 79, 96,109, 72,108, 70, 64, 76, 67,116, 74, 68, 87,
 521                111, 52, 75,119, 49, 34, 82, 81, 95, 65,112, 86,118,110,122,105,
 522                 41, 57, 83, 43, 46,102, 40, 89, 38,103, 45, 50, 42,123, 91, 35,
 523                125, 55, 54, 66,124,126, 59, 47, 92, 71,115, 78, 88,107,106, 56,
 524                 36,121,117,104,101,100, 69, 73, 99, 63, 94, 93, 39, 37, 61, 48,
 525                 58,113, 32, 90, 44, 98, 60, 51, 33, 97, 62, 77, 84, 80, 85,223,
 526                225,216,187,166,229,189,222,188,141,249,148,200,184,136,248,190,
 527                199,170,181,204,138,232,218,183,255,234,220,247,213,203,226,193,
 528                174,172,228,252,217,201,131,230,197,211,145,238,161,179,160,212,
 529                207,221,254,173,202,146,224,151,140,196,205,130,135,133,143,246,
 530                192,159,244,239,185,168,215,144,139,165,180,157,147,186,214,176,
 531                227,231,219,169,175,156,206,198,129,164,150,210,154,177,134,127,
 532                182,128,158,208,162,132,167,209,149,241,153,251,237,236,171,195,
 533                243,233,253,240,194,250,191,155,142,137,245,235,163,242,178,152
 534        );
 535
 536        for ($i = 0; $i < $pass_len; $i++) {
 537                $scrambled .= pack("C", $shifts[ord($pass_arr[$i])]);
 538        }
 539
 540        return $scrambled;
 541}
 542
 543package main;
 544
 545my $cvs = CVSconn->new($opt_d, $cvs_tree);
 546
 547
 548sub pdate($) {
 549        my ($d) = @_;
 550        m#(\d{2,4})/(\d\d)/(\d\d)\s(\d\d):(\d\d)(?::(\d\d))?#
 551                or die "Unparseable date: $d\n";
 552        my $y=$1; $y-=1900 if $y>1900;
 553        return timegm($6||0,$5,$4,$3,$2-1,$y);
 554}
 555
 556sub pmode($) {
 557        my ($mode) = @_;
 558        my $m = 0;
 559        my $mm = 0;
 560        my $um = 0;
 561        for my $x(split(//,$mode)) {
 562                if ($x eq ",") {
 563                        $m |= $mm&$um;
 564                        $mm = 0;
 565                        $um = 0;
 566                } elsif ($x eq "u") { $um |= 0700;
 567                } elsif ($x eq "g") { $um |= 0070;
 568                } elsif ($x eq "o") { $um |= 0007;
 569                } elsif ($x eq "r") { $mm |= 0444;
 570                } elsif ($x eq "w") { $mm |= 0222;
 571                } elsif ($x eq "x") { $mm |= 0111;
 572                } elsif ($x eq "=") { # do nothing
 573                } else { die "Unknown mode: $mode\n";
 574                }
 575        }
 576        $m |= $mm&$um;
 577        return $m;
 578}
 579
 580sub getwd() {
 581        my $pwd = `pwd`;
 582        chomp $pwd;
 583        return $pwd;
 584}
 585
 586sub is_sha1 {
 587        my $s = shift;
 588        return $s =~ /^[a-f0-9]{40}$/;
 589}
 590
 591sub get_headref ($) {
 592        my $name = shift;
 593        my $r = `git rev-parse --verify '$name' 2>/dev/null`;
 594        return undef unless $? == 0;
 595        chomp $r;
 596        return $r;
 597}
 598
 599my $user_filename_prepend = '';
 600sub munge_user_filename {
 601        my $name = shift;
 602        return File::Spec->file_name_is_absolute($name) ?
 603                $name :
 604                $user_filename_prepend . $name;
 605}
 606
 607-d $git_tree
 608        or mkdir($git_tree,0777)
 609        or die "Could not create $git_tree: $!";
 610if ($git_tree ne '.') {
 611        $user_filename_prepend = getwd() . '/';
 612        chdir($git_tree);
 613}
 614
 615my $last_branch = "";
 616my $orig_branch = "";
 617my %branch_date;
 618my $tip_at_start = undef;
 619
 620my $git_dir = $ENV{"GIT_DIR"} || ".git";
 621$git_dir = getwd()."/".$git_dir unless $git_dir =~ m#^/#;
 622$ENV{"GIT_DIR"} = $git_dir;
 623my $orig_git_index;
 624$orig_git_index = $ENV{GIT_INDEX_FILE} if exists $ENV{GIT_INDEX_FILE};
 625
 626my %index; # holds filenames of one index per branch
 627
 628unless (-d $git_dir) {
 629        system(qw(git init));
 630        die "Cannot init the GIT db at $git_tree: $?\n" if $?;
 631        system(qw(git read-tree --empty));
 632        die "Cannot init an empty tree: $?\n" if $?;
 633
 634        $last_branch = $opt_o;
 635        $orig_branch = "";
 636} else {
 637        open(F, "-|", qw(git symbolic-ref HEAD)) or
 638                die "Cannot run git symbolic-ref: $!\n";
 639        chomp ($last_branch = <F>);
 640        $last_branch = basename($last_branch);
 641        close(F);
 642        unless ($last_branch) {
 643                warn "Cannot read the last branch name: $! -- assuming 'master'\n";
 644                $last_branch = "master";
 645        }
 646        $orig_branch = $last_branch;
 647        $tip_at_start = `git rev-parse --verify HEAD`;
 648
 649        # Get the last import timestamps
 650        my $fmt = '($ref, $author) = (%(refname), %(author));';
 651        my @cmd = ('git', 'for-each-ref', '--perl', "--format=$fmt", $remote);
 652        open(H, "-|", @cmd) or die "Cannot run git for-each-ref: $!\n";
 653        while (defined(my $entry = <H>)) {
 654                my ($ref, $author);
 655                eval($entry) || die "cannot eval refs list: $@";
 656                my ($head) = ($ref =~ m|^$remote/(.*)|);
 657                $author =~ /^.*\s(\d+)\s[-+]\d{4}$/;
 658                $branch_date{$head} = $1;
 659        }
 660        close(H);
 661        if (!exists $branch_date{$opt_o}) {
 662                die "Branch '$opt_o' does not exist.\n".
 663                       "Either use the correct '-o branch' option,\n".
 664                       "or import to a new repository.\n";
 665        }
 666}
 667
 668-d $git_dir
 669        or die "Could not create git subdir ($git_dir).\n";
 670
 671# now we read (and possibly save) author-info as well
 672-f "$git_dir/cvs-authors" and
 673  read_author_info("$git_dir/cvs-authors");
 674if ($opt_A) {
 675        read_author_info(munge_user_filename($opt_A));
 676        write_author_info("$git_dir/cvs-authors");
 677}
 678
 679# open .git/cvs-revisions, if requested
 680open my $revision_map, '>>', "$git_dir/cvs-revisions"
 681    or die "Can't open $git_dir/cvs-revisions for appending: $!\n"
 682        if defined $opt_R;
 683
 684
 685#
 686# run cvsps into a file unless we are getting
 687# it passed as a file via $opt_P
 688#
 689my $cvspsfile;
 690unless ($opt_P) {
 691        print "Running cvsps...\n" if $opt_v;
 692        my $pid = open(CVSPS,"-|");
 693        my $cvspsfh;
 694        die "Cannot fork: $!\n" unless defined $pid;
 695        unless ($pid) {
 696                my @opt;
 697                @opt = split(/,/,$opt_p) if defined $opt_p;
 698                unshift @opt, '-z', $opt_z if defined $opt_z;
 699                unshift @opt, '-q'         unless defined $opt_v;
 700                unless (defined($opt_p) && $opt_p =~ m/--no-cvs-direct/) {
 701                        push @opt, '--cvs-direct';
 702                }
 703                exec("cvsps","--norc",@opt,"-u","-A",'--root',$opt_d,$cvs_tree);
 704                die "Could not start cvsps: $!\n";
 705        }
 706        ($cvspsfh, $cvspsfile) = tempfile('gitXXXXXX', SUFFIX => '.cvsps',
 707                                          DIR => File::Spec->tmpdir());
 708        while (<CVSPS>) {
 709            print $cvspsfh $_;
 710        }
 711        close CVSPS;
 712        $? == 0 or die "git cvsimport: fatal: cvsps reported error\n";
 713        close $cvspsfh;
 714} else {
 715        $cvspsfile = munge_user_filename($opt_P);
 716}
 717
 718open(CVS, "<$cvspsfile") or die $!;
 719
 720## cvsps output:
 721#---------------------
 722#PatchSet 314
 723#Date: 1999/09/18 13:03:59
 724#Author: wkoch
 725#Branch: STABLE-BRANCH-1-0
 726#Ancestor branch: HEAD
 727#Tag: (none)
 728#Log:
 729#    See ChangeLog: Sat Sep 18 13:03:28 CEST 1999  Werner Koch
 730#Members:
 731#       README:1.57->1.57.2.1
 732#       VERSION:1.96->1.96.2.1
 733#
 734#---------------------
 735
 736my $state = 0;
 737
 738sub update_index (\@\@) {
 739        my $old = shift;
 740        my $new = shift;
 741        open(my $fh, '|-', qw(git update-index -z --index-info))
 742                or die "unable to open git update-index: $!";
 743        print $fh
 744                (map { "0 0000000000000000000000000000000000000000\t$_\0" }
 745                        @$old),
 746                (map { '100' . sprintf('%o', $_->[0]) . " $_->[1]\t$_->[2]\0" }
 747                        @$new)
 748                or die "unable to write to git update-index: $!";
 749        close $fh
 750                or die "unable to write to git update-index: $!";
 751        $? and die "git update-index reported error: $?";
 752}
 753
 754sub write_tree () {
 755        open(my $fh, '-|', qw(git write-tree))
 756                or die "unable to open git write-tree: $!";
 757        chomp(my $tree = <$fh>);
 758        is_sha1($tree)
 759                or die "Cannot get tree id ($tree): $!";
 760        close($fh)
 761                or die "Error running git write-tree: $?\n";
 762        print "Tree ID $tree\n" if $opt_v;
 763        return $tree;
 764}
 765
 766my ($patchset,$date,$author_name,$author_email,$branch,$ancestor,$tag,$logmsg);
 767my (@old,@new,@skipped,%ignorebranch,@commit_revisions);
 768
 769# commits that cvsps cannot place anywhere...
 770$ignorebranch{'#CVSPS_NO_BRANCH'} = 1;
 771
 772sub commit {
 773        if ($branch eq $opt_o && !$index{branch} &&
 774                !get_headref("$remote/$branch")) {
 775            # looks like an initial commit
 776            # use the index primed by git init
 777            $ENV{GIT_INDEX_FILE} = "$git_dir/index";
 778            $index{$branch} = "$git_dir/index";
 779        } else {
 780            # use an index per branch to speed up
 781            # imports of projects with many branches
 782            unless ($index{$branch}) {
 783                $index{$branch} = tmpnam();
 784                $ENV{GIT_INDEX_FILE} = $index{$branch};
 785                if ($ancestor) {
 786                    system("git", "read-tree", "$remote/$ancestor");
 787                } else {
 788                    system("git", "read-tree", "$remote/$branch");
 789                }
 790                die "read-tree failed: $?\n" if $?;
 791            }
 792        }
 793        $ENV{GIT_INDEX_FILE} = $index{$branch};
 794
 795        update_index(@old, @new);
 796        @old = @new = ();
 797        my $tree = write_tree();
 798        my $parent = get_headref("$remote/$last_branch");
 799        print "Parent ID " . ($parent ? $parent : "(empty)") . "\n" if $opt_v;
 800
 801        my @commit_args;
 802        push @commit_args, ("-p", $parent) if $parent;
 803
 804        # loose detection of merges
 805        # based on the commit msg
 806        foreach my $rx (@mergerx) {
 807                next unless $logmsg =~ $rx && $1;
 808                my $mparent = $1 eq 'HEAD' ? $opt_o : $1;
 809                if (my $sha1 = get_headref("$remote/$mparent")) {
 810                        push @commit_args, '-p', "$remote/$mparent";
 811                        print "Merge parent branch: $mparent\n" if $opt_v;
 812                }
 813        }
 814
 815        my $commit_date = strftime("+0000 %Y-%m-%d %H:%M:%S",gmtime($date));
 816        $ENV{GIT_AUTHOR_NAME} = $author_name;
 817        $ENV{GIT_AUTHOR_EMAIL} = $author_email;
 818        $ENV{GIT_AUTHOR_DATE} = $commit_date;
 819        $ENV{GIT_COMMITTER_NAME} = $author_name;
 820        $ENV{GIT_COMMITTER_EMAIL} = $author_email;
 821        $ENV{GIT_COMMITTER_DATE} = $commit_date;
 822        my $pid = open2(my $commit_read, my $commit_write,
 823                'git', 'commit-tree', $tree, @commit_args);
 824
 825        # compatibility with git2cvs
 826        substr($logmsg,32767) = "" if length($logmsg) > 32767;
 827        $logmsg =~ s/[\s\n]+\z//;
 828
 829        if (@skipped) {
 830            $logmsg .= "\n\n\nSKIPPED:\n\t";
 831            $logmsg .= join("\n\t", @skipped) . "\n";
 832            @skipped = ();
 833        }
 834
 835        print($commit_write "$logmsg\n") && close($commit_write)
 836                or die "Error writing to git commit-tree: $!\n";
 837
 838        print "Committed patch $patchset ($branch $commit_date)\n" if $opt_v;
 839        chomp(my $cid = <$commit_read>);
 840        is_sha1($cid) or die "Cannot get commit id ($cid): $!\n";
 841        print "Commit ID $cid\n" if $opt_v;
 842        close($commit_read);
 843
 844        waitpid($pid,0);
 845        die "Error running git commit-tree: $?\n" if $?;
 846
 847        system('git' , 'update-ref', "$remote/$branch", $cid) == 0
 848                or die "Cannot write branch $branch for update: $!\n";
 849
 850        if ($revision_map) {
 851                print $revision_map "@$_ $cid\n" for @commit_revisions;
 852        }
 853        @commit_revisions = ();
 854
 855        if ($tag) {
 856                my ($xtag) = $tag;
 857                $xtag =~ s/\s+\*\*.*$//; # Remove stuff like ** INVALID ** and ** FUNKY **
 858                $xtag =~ tr/_/\./ if ( $opt_u );
 859                $xtag =~ s/[\/]/$opt_s/g;
 860                $xtag =~ s/\[//g;
 861
 862                system('git' , 'tag', '-f', $xtag, $cid) == 0
 863                        or die "Cannot create tag $xtag: $!\n";
 864
 865                print "Created tag '$xtag' on '$branch'\n" if $opt_v;
 866        }
 867};
 868
 869my $commitcount = 1;
 870while (<CVS>) {
 871        chomp;
 872        if ($state == 0 and /^-+$/) {
 873                $state = 1;
 874        } elsif ($state == 0) {
 875                $state = 1;
 876                redo;
 877        } elsif (($state==0 or $state==1) and s/^PatchSet\s+//) {
 878                $patchset = 0+$_;
 879                $state=2;
 880        } elsif ($state == 2 and s/^Date:\s+//) {
 881                $date = pdate($_);
 882                unless ($date) {
 883                        print STDERR "Could not parse date: $_\n";
 884                        $state=0;
 885                        next;
 886                }
 887                $state=3;
 888        } elsif ($state == 3 and s/^Author:\s+//) {
 889                s/\s+$//;
 890                if (/^(.*?)\s+<(.*)>/) {
 891                    ($author_name, $author_email) = ($1, $2);
 892                } elsif ($conv_author_name{$_}) {
 893                        $author_name = $conv_author_name{$_};
 894                        $author_email = $conv_author_email{$_};
 895                } else {
 896                    $author_name = $author_email = $_;
 897                }
 898                $state = 4;
 899        } elsif ($state == 4 and s/^Branch:\s+//) {
 900                s/\s+$//;
 901                tr/_/\./ if ( $opt_u );
 902                s/[\/]/$opt_s/g;
 903                $branch = $_;
 904                $state = 5;
 905        } elsif ($state == 5 and s/^Ancestor branch:\s+//) {
 906                s/\s+$//;
 907                $ancestor = $_;
 908                $ancestor = $opt_o if $ancestor eq "HEAD";
 909                $state = 6;
 910        } elsif ($state == 5) {
 911                $ancestor = undef;
 912                $state = 6;
 913                redo;
 914        } elsif ($state == 6 and s/^Tag:\s+//) {
 915                s/\s+$//;
 916                if ($_ eq "(none)") {
 917                        $tag = undef;
 918                } else {
 919                        $tag = $_;
 920                }
 921                $state = 7;
 922        } elsif ($state == 7 and /^Log:/) {
 923                $logmsg = "";
 924                $state = 8;
 925        } elsif ($state == 8 and /^Members:/) {
 926                $branch = $opt_o if $branch eq "HEAD";
 927                if (defined $branch_date{$branch} and $branch_date{$branch} >= $date) {
 928                        # skip
 929                        print "skip patchset $patchset: $date before $branch_date{$branch}\n" if $opt_v;
 930                        $state = 11;
 931                        next;
 932                }
 933                if (!$opt_a && $starttime - 300 - (defined $opt_z ? $opt_z : 300) <= $date) {
 934                        # skip if the commit is too recent
 935                        # given that the cvsps default fuzz is 300s, we give ourselves another
 936                        # 300s just in case -- this also prevents skipping commits
 937                        # due to server clock drift
 938                        print "skip patchset $patchset: $date too recent\n" if $opt_v;
 939                        $state = 11;
 940                        next;
 941                }
 942                if (exists $ignorebranch{$branch}) {
 943                        print STDERR "Skipping $branch\n";
 944                        $state = 11;
 945                        next;
 946                }
 947                if ($ancestor) {
 948                        if ($ancestor eq $branch) {
 949                                print STDERR "Branch $branch erroneously stems from itself -- changed ancestor to $opt_o\n";
 950                                $ancestor = $opt_o;
 951                        }
 952                        if (defined get_headref("$remote/$branch")) {
 953                                print STDERR "Branch $branch already exists!\n";
 954                                $state=11;
 955                                next;
 956                        }
 957                        my $id = get_headref("$remote/$ancestor");
 958                        if (!$id) {
 959                                print STDERR "Branch $ancestor does not exist!\n";
 960                                $ignorebranch{$branch} = 1;
 961                                $state=11;
 962                                next;
 963                        }
 964
 965                        system(qw(git update-ref -m cvsimport),
 966                                "$remote/$branch", $id);
 967                        if($? != 0) {
 968                                print STDERR "Could not create branch $branch\n";
 969                                $ignorebranch{$branch} = 1;
 970                                $state=11;
 971                                next;
 972                        }
 973                }
 974                $last_branch = $branch if $branch ne $last_branch;
 975                $state = 9;
 976        } elsif ($state == 8) {
 977                $logmsg .= "$_\n";
 978        } elsif ($state == 9 and /^\s+(.+?):(INITIAL|\d+(?:\.\d+)+)->(\d+(?:\.\d+)+)\s*$/) {
 979#       VERSION:1.96->1.96.2.1
 980                my $init = ($2 eq "INITIAL");
 981                my $fn = $1;
 982                my $rev = $3;
 983                $fn =~ s#^/+##;
 984                if ($opt_S && $fn =~ m/$opt_S/) {
 985                    print "SKIPPING $fn v $rev\n";
 986                    push(@skipped, $fn);
 987                    next;
 988                }
 989                push @commit_revisions, [$fn, $rev];
 990                print "Fetching $fn   v $rev\n" if $opt_v;
 991                my ($tmpname, $size) = $cvs->file($fn,$rev);
 992                if ($size == -1) {
 993                        push(@old,$fn);
 994                        print "Drop $fn\n" if $opt_v;
 995                } else {
 996                        print "".($init ? "New" : "Update")." $fn: $size bytes\n" if $opt_v;
 997                        my $pid = open(my $F, '-|');
 998                        die $! unless defined $pid;
 999                        if (!$pid) {
1000                            exec("git", "hash-object", "-w", $tmpname)
1001                                or die "Cannot create object: $!\n";
1002                        }
1003                        my $sha = <$F>;
1004                        chomp $sha;
1005                        close $F;
1006                        my $mode = pmode($cvs->{'mode'});
1007                        push(@new,[$mode, $sha, $fn]); # may be resurrected!
1008                }
1009                unlink($tmpname);
1010        } elsif ($state == 9 and /^\s+(.+?):\d+(?:\.\d+)+->(\d+(?:\.\d+)+)\(DEAD\)\s*$/) {
1011                my $fn = $1;
1012                my $rev = $2;
1013                $fn =~ s#^/+##;
1014                push @commit_revisions, [$fn, $rev];
1015                push(@old,$fn);
1016                print "Delete $fn\n" if $opt_v;
1017        } elsif ($state == 9 and /^\s*$/) {
1018                $state = 10;
1019        } elsif (($state == 9 or $state == 10) and /^-+$/) {
1020                $commitcount++;
1021                if ($opt_L && $commitcount > $opt_L) {
1022                        last;
1023                }
1024                commit();
1025                if (($commitcount & 1023) == 0) {
1026                        system(qw(git repack -a -d));
1027                }
1028                $state = 1;
1029        } elsif ($state == 11 and /^-+$/) {
1030                $state = 1;
1031        } elsif (/^-+$/) { # end of unknown-line processing
1032                $state = 1;
1033        } elsif ($state != 11) { # ignore stuff when skipping
1034                print STDERR "* UNKNOWN LINE * $_\n";
1035        }
1036}
1037commit() if $branch and $state != 11;
1038
1039unless ($opt_P) {
1040        unlink($cvspsfile);
1041}
1042
1043# The heuristic of repacking every 1024 commits can leave a
1044# lot of unpacked data.  If there is more than 1MB worth of
1045# not-packed objects, repack once more.
1046my $line = `git count-objects`;
1047if ($line =~ /^(\d+) objects, (\d+) kilobytes$/) {
1048  my ($n_objects, $kb) = ($1, $2);
1049  1024 < $kb
1050    and system(qw(git repack -a -d));
1051}
1052
1053foreach my $git_index (values %index) {
1054    if ($git_index ne "$git_dir/index") {
1055        unlink($git_index);
1056    }
1057}
1058
1059if (defined $orig_git_index) {
1060        $ENV{GIT_INDEX_FILE} = $orig_git_index;
1061} else {
1062        delete $ENV{GIT_INDEX_FILE};
1063}
1064
1065# Now switch back to the branch we were in before all of this happened
1066if ($orig_branch) {
1067        print "DONE.\n" if $opt_v;
1068        if ($opt_i) {
1069                exit 0;
1070        }
1071        my $tip_at_end = `git rev-parse --verify HEAD`;
1072        if ($tip_at_start ne $tip_at_end) {
1073                for ($tip_at_start, $tip_at_end) { chomp; }
1074                print "Fetched into the current branch.\n" if $opt_v;
1075                system(qw(git read-tree -u -m),
1076                       $tip_at_start, $tip_at_end);
1077                die "Fast-forward update failed: $?\n" if $?;
1078        }
1079        else {
1080                system(qw(git merge cvsimport HEAD), "$remote/$opt_o");
1081                die "Could not merge $opt_o into the current branch.\n" if $?;
1082        }
1083} else {
1084        $orig_branch = "master";
1085        print "DONE; creating $orig_branch branch\n" if $opt_v;
1086        system("git", "update-ref", "refs/heads/master", "$remote/$opt_o")
1087                unless defined get_headref('refs/heads/master');
1088        system("git", "symbolic-ref", "$remote/HEAD", "$remote/$opt_o")
1089                if ($opt_r && $opt_o ne 'HEAD');
1090        system('git', 'update-ref', 'HEAD', "$orig_branch");
1091        unless ($opt_i) {
1092                system(qw(git checkout -f));
1093                die "checkout failed: $?\n" if $?;
1094        }
1095}