git-cvsimport.perlon commit Look for password in both CVS and CVSNT password files. (58fdef0)
   1#!/usr/bin/perl
   2
   3# This tool is copyright (c) 2005, Matthias Urlichs.
   4# It is released under the Gnu Public License, version 2.
   5#
   6# The basic idea is to aggregate CVS check-ins into related changes.
   7# Fortunately, "cvsps" does that for us; all we have to do is to parse
   8# its output.
   9#
  10# Checking out the files is done by a single long-running CVS connection
  11# / server process.
  12#
  13# The head revision is on branch "origin" by default.
  14# You can change that with the '-o' option.
  15
  16use 5.008;
  17use strict;
  18use warnings;
  19use Getopt::Long;
  20use File::Spec;
  21use File::Temp qw(tempfile tmpnam);
  22use File::Path qw(mkpath);
  23use File::Basename qw(basename dirname);
  24use Time::Local;
  25use IO::Socket;
  26use IO::Pipe;
  27use POSIX qw(strftime dup2 ENOENT);
  28use IPC::Open2;
  29
  30$SIG{'PIPE'}="IGNORE";
  31$ENV{'TZ'}="UTC";
  32
  33our ($opt_h,$opt_o,$opt_v,$opt_k,$opt_u,$opt_d,$opt_p,$opt_C,$opt_z,$opt_i,$opt_P, $opt_s,$opt_m,@opt_M,$opt_A,$opt_S,$opt_L, $opt_a, $opt_r, $opt_R);
  34my (%conv_author_name, %conv_author_email);
  35
  36sub usage(;$) {
  37        my $msg = shift;
  38        print(STDERR "Error: $msg\n") if $msg;
  39        print STDERR <<END;
  40Usage: git cvsimport     # fetch/update GIT from CVS
  41       [-o branch-for-HEAD] [-h] [-v] [-d CVSROOT] [-A author-conv-file]
  42       [-p opts-for-cvsps] [-P file] [-C GIT_repository] [-z fuzz] [-i] [-k]
  43       [-u] [-s subst] [-a] [-m] [-M regex] [-S regex] [-L commitlimit]
  44       [-r remote] [-R] [CVS_module]
  45END
  46        exit(1);
  47}
  48
  49sub read_author_info($) {
  50        my ($file) = @_;
  51        my $user;
  52        open my $f, '<', "$file" or die("Failed to open $file: $!\n");
  53
  54        while (<$f>) {
  55                # Expected format is this:
  56                #   exon=Andreas Ericsson <ae@op5.se>
  57                if (m/^(\S+?)\s*=\s*(.+?)\s*<(.+)>\s*$/) {
  58                        $user = $1;
  59                        $conv_author_name{$user} = $2;
  60                        $conv_author_email{$user} = $3;
  61                }
  62                # However, we also read from CVSROOT/users format
  63                # to ease migration.
  64                elsif (/^(\w+):(['"]?)(.+?)\2\s*$/) {
  65                        my $mapped;
  66                        ($user, $mapped) = ($1, $3);
  67                        if ($mapped =~ /^\s*(.*?)\s*<(.*)>\s*$/) {
  68                                $conv_author_name{$user} = $1;
  69                                $conv_author_email{$user} = $2;
  70                        }
  71                        elsif ($mapped =~ /^<?(.*)>?$/) {
  72                                $conv_author_name{$user} = $user;
  73                                $conv_author_email{$user} = $1;
  74                        }
  75                }
  76                # NEEDSWORK: Maybe warn on unrecognized lines?
  77        }
  78        close ($f);
  79}
  80
  81sub write_author_info($) {
  82        my ($file) = @_;
  83        open my $f, '>', $file or
  84          die("Failed to open $file for writing: $!");
  85
  86        foreach (keys %conv_author_name) {
  87                print $f "$_=$conv_author_name{$_} <$conv_author_email{$_}>\n";
  88        }
  89        close ($f);
  90}
  91
  92# convert getopts specs for use by git config
  93my %longmap = (
  94        'A:' => 'authors-file',
  95        'M:' => 'merge-regex',
  96        'P:' => undef,
  97        'R' => 'track-revisions',
  98        'S:' => 'ignore-paths',
  99);
 100
 101sub read_repo_config {
 102        # Split the string between characters, unless there is a ':'
 103        # So "abc:de" becomes ["a", "b", "c:", "d", "e"]
 104        my @opts = split(/ *(?!:)/, shift);
 105        foreach my $o (@opts) {
 106                my $key = $o;
 107                $key =~ s/://g;
 108                my $arg = 'git config';
 109                $arg .= ' --bool' if ($o !~ /:$/);
 110                my $ckey = $key;
 111
 112                if (exists $longmap{$o}) {
 113                        # An uppercase option like -R cannot be
 114                        # expressed in the configuration, as the
 115                        # variable names are downcased.
 116                        $ckey = $longmap{$o};
 117                        next if (! defined $ckey);
 118                        $ckey =~ s/-//g;
 119                }
 120                chomp(my $tmp = `$arg --get cvsimport.$ckey`);
 121                if ($tmp && !($arg =~ /--bool/ && $tmp eq 'false')) {
 122                        no strict 'refs';
 123                        my $opt_name = "opt_" . $key;
 124                        if (!$$opt_name) {
 125                                $$opt_name = $tmp;
 126                        }
 127                }
 128        }
 129}
 130
 131my $opts = "haivmkuo:d:p:r:C:z:s:M:P:A:S:L:R";
 132read_repo_config($opts);
 133Getopt::Long::Configure( 'no_ignore_case', 'bundling' );
 134
 135# turn the Getopt::Std specification in a Getopt::Long one,
 136# with support for multiple -M options
 137GetOptions( map { s/:/=s/; /M/ ? "$_\@" : $_ } split( /(?!:)/, $opts ) )
 138    or usage();
 139usage if $opt_h;
 140
 141if (@ARGV == 0) {
 142                chomp(my $module = `git config --get cvsimport.module`);
 143                push(@ARGV, $module) if $? == 0;
 144}
 145@ARGV <= 1 or usage("You can't specify more than one CVS module");
 146
 147if ($opt_d) {
 148        $ENV{"CVSROOT"} = $opt_d;
 149} elsif (-f 'CVS/Root') {
 150        open my $f, '<', 'CVS/Root' or die 'Failed to open CVS/Root';
 151        $opt_d = <$f>;
 152        chomp $opt_d;
 153        close $f;
 154        $ENV{"CVSROOT"} = $opt_d;
 155} elsif ($ENV{"CVSROOT"}) {
 156        $opt_d = $ENV{"CVSROOT"};
 157} else {
 158        usage("CVSROOT needs to be set");
 159}
 160$opt_s ||= "-";
 161$opt_a ||= 0;
 162
 163my $git_tree = $opt_C;
 164$git_tree ||= ".";
 165
 166my $remote;
 167if (defined $opt_r) {
 168        $remote = 'refs/remotes/' . $opt_r;
 169        $opt_o ||= "master";
 170} else {
 171        $opt_o ||= "origin";
 172        $remote = 'refs/heads';
 173}
 174
 175my $cvs_tree;
 176if ($#ARGV == 0) {
 177        $cvs_tree = $ARGV[0];
 178} elsif (-f 'CVS/Repository') {
 179        open my $f, '<', 'CVS/Repository' or
 180            die 'Failed to open CVS/Repository';
 181        $cvs_tree = <$f>;
 182        chomp $cvs_tree;
 183        close $f;
 184} else {
 185        usage("CVS module has to be specified");
 186}
 187
 188our @mergerx = ();
 189if ($opt_m) {
 190        @mergerx = ( qr/\b(?:from|of|merge|merging|merged) ([-\w]+)/i );
 191}
 192if (@opt_M) {
 193        push (@mergerx, map { qr/$_/ } @opt_M);
 194}
 195
 196# Remember UTC of our starting time
 197# we'll want to avoid importing commits
 198# that are too recent
 199our $starttime = time();
 200
 201select(STDERR); $|=1; select(STDOUT);
 202
 203
 204package CVSconn;
 205# Basic CVS dialog.
 206# We're only interested in connecting and downloading, so ...
 207
 208use File::Spec;
 209use File::Temp qw(tempfile);
 210use POSIX qw(strftime dup2);
 211
 212sub new {
 213        my ($what,$repo,$subdir) = @_;
 214        $what=ref($what) if ref($what);
 215
 216        my $self = {};
 217        $self->{'buffer'} = "";
 218        bless($self,$what);
 219
 220        $repo =~ s#/+$##;
 221        $self->{'fullrep'} = $repo;
 222        $self->conn();
 223
 224        $self->{'subdir'} = $subdir;
 225        $self->{'lines'} = undef;
 226
 227        return $self;
 228}
 229
 230sub find_password_entry {
 231        my ($cvspass, @cvsroot) = @_;
 232        my ($file, $delim) = @$cvspass;
 233        my $pass;
 234        local ($_);
 235
 236        if (open(my $fh, $file)) {
 237                # :pserver:cvs@mea.tmt.tele.fi:/cvsroot/zmailer Ah<Z
 238                CVSPASSFILE:
 239                while (<$fh>) {
 240                        chomp;
 241                        s/^\/\d+\s+//;
 242                        my ($w, $p) = split($delim,$_,2);
 243                        for my $cvsroot (@cvsroot) {
 244                                if ($w eq $cvsroot) {
 245                                        $pass = $p;
 246                                        last CVSPASSFILE;
 247                                }
 248                        }
 249                }
 250                close($fh);
 251        }
 252        return $pass;
 253}
 254
 255sub conn {
 256        my $self = shift;
 257        my $repo = $self->{'fullrep'};
 258        if ($repo =~ s/^:pserver(?:([^:]*)):(?:(.*?)(?::(.*?))?@)?([^:\/]*)(?::(\d*))?//) {
 259                my ($param,$user,$pass,$serv,$port) = ($1,$2,$3,$4,$5);
 260
 261                my ($proxyhost,$proxyport);
 262                if ($param && ($param =~ m/proxy=([^;]+)/)) {
 263                        $proxyhost = $1;
 264                        # Default proxyport, if not specified, is 8080.
 265                        $proxyport = 8080;
 266                        if ($ENV{"CVS_PROXY_PORT"}) {
 267                                $proxyport = $ENV{"CVS_PROXY_PORT"};
 268                        }
 269                        if ($param =~ m/proxyport=([^;]+)/) {
 270                                $proxyport = $1;
 271                        }
 272                }
 273                $repo ||= '/';
 274
 275                # if username is not explicit in CVSROOT, then use current user, as cvs would
 276                $user=(getlogin() || $ENV{'LOGNAME'} || $ENV{'USER'} || "anonymous") unless $user;
 277                my $rr2 = "-";
 278                unless ($port) {
 279                        $rr2 = ":pserver:$user\@$serv:$repo";
 280                        $port=2401;
 281                }
 282                my $rr = ":pserver:$user\@$serv:$port$repo";
 283
 284                if ($pass) {
 285                        $pass = $self->_scramble($pass);
 286                } else {
 287                        my @cvspass = ([$ENV{'HOME'}."/.cvspass", qr/\s/],
 288                                       [$ENV{'HOME'}."/.cvs/cvspass", qr/=/]);
 289                        my @loc = ();
 290                        foreach my $cvspass (@cvspass) {
 291                                my $p = find_password_entry($cvspass, $rr, $rr2);
 292                                if ($p) {
 293                                        push @loc, $cvspass->[0];
 294                                        $pass = $p;
 295                                }
 296                        }
 297
 298                        if (1 < @loc) {
 299                                die("Multiple cvs password files have ".
 300                                    "entries for CVSROOT $opt_d: @loc");
 301                        } elsif (!$pass) {
 302                                $pass = "A";
 303                        }
 304                }
 305
 306                my ($s, $rep);
 307                if ($proxyhost) {
 308
 309                        # Use a HTTP Proxy. Only works for HTTP proxies that
 310                        # don't require user authentication
 311                        #
 312                        # See: http://www.ietf.org/rfc/rfc2817.txt
 313
 314                        $s = IO::Socket::INET->new(PeerHost => $proxyhost, PeerPort => $proxyport);
 315                        die "Socket to $proxyhost: $!\n" unless defined $s;
 316                        $s->write("CONNECT $serv:$port HTTP/1.1\r\nHost: $serv:$port\r\n\r\n")
 317                                or die "Write to $proxyhost: $!\n";
 318                        $s->flush();
 319
 320                        $rep = <$s>;
 321
 322                        # The answer should look like 'HTTP/1.x 2yy ....'
 323                        if (!($rep =~ m#^HTTP/1\.. 2[0-9][0-9]#)) {
 324                                die "Proxy connect: $rep\n";
 325                        }
 326                        # Skip up to the empty line of the proxy server output
 327                        # including the response headers.
 328                        while ($rep = <$s>) {
 329                                last if (!defined $rep ||
 330                                         $rep eq "\n" ||
 331                                         $rep eq "\r\n");
 332                        }
 333                } else {
 334                        $s = IO::Socket::INET->new(PeerHost => $serv, PeerPort => $port);
 335                        die "Socket to $serv: $!\n" unless defined $s;
 336                }
 337
 338                $s->write("BEGIN AUTH REQUEST\n$repo\n$user\n$pass\nEND AUTH REQUEST\n")
 339                        or die "Write to $serv: $!\n";
 340                $s->flush();
 341
 342                $rep = <$s>;
 343
 344                if ($rep ne "I LOVE YOU\n") {
 345                        $rep="<unknown>" unless $rep;
 346                        die "AuthReply: $rep\n";
 347                }
 348                $self->{'socketo'} = $s;
 349                $self->{'socketi'} = $s;
 350        } else { # local or ext: Fork off our own cvs server.
 351                my $pr = IO::Pipe->new();
 352                my $pw = IO::Pipe->new();
 353                my $pid = fork();
 354                die "Fork: $!\n" unless defined $pid;
 355                my $cvs = 'cvs';
 356                $cvs = $ENV{CVS_SERVER} if exists $ENV{CVS_SERVER};
 357                my $rsh = 'rsh';
 358                $rsh = $ENV{CVS_RSH} if exists $ENV{CVS_RSH};
 359
 360                my @cvs = ($cvs, 'server');
 361                my ($local, $user, $host);
 362                $local = $repo =~ s/:local://;
 363                if (!$local) {
 364                    $repo =~ s/:ext://;
 365                    $local = !($repo =~ s/^(?:([^\@:]+)\@)?([^:]+)://);
 366                    ($user, $host) = ($1, $2);
 367                }
 368                if (!$local) {
 369                    if ($user) {
 370                        unshift @cvs, $rsh, '-l', $user, $host;
 371                    } else {
 372                        unshift @cvs, $rsh, $host;
 373                    }
 374                }
 375
 376                unless ($pid) {
 377                        $pr->writer();
 378                        $pw->reader();
 379                        dup2($pw->fileno(),0);
 380                        dup2($pr->fileno(),1);
 381                        $pr->close();
 382                        $pw->close();
 383                        exec(@cvs);
 384                }
 385                $pw->writer();
 386                $pr->reader();
 387                $self->{'socketo'} = $pw;
 388                $self->{'socketi'} = $pr;
 389        }
 390        $self->{'socketo'}->write("Root $repo\n");
 391
 392        # Trial and error says that this probably is the minimum set
 393        $self->{'socketo'}->write("Valid-responses ok error Valid-requests Mode M Mbinary E Checked-in Created Updated Merged Removed\n");
 394
 395        $self->{'socketo'}->write("valid-requests\n");
 396        $self->{'socketo'}->flush();
 397
 398        chomp(my $rep=$self->readline());
 399        if ($rep !~ s/^Valid-requests\s*//) {
 400                $rep="<unknown>" unless $rep;
 401                die "Expected Valid-requests from server, but got: $rep\n";
 402        }
 403        chomp(my $res=$self->readline());
 404        die "validReply: $res\n" if $res ne "ok";
 405
 406        $self->{'socketo'}->write("UseUnchanged\n") if $rep =~ /\bUseUnchanged\b/;
 407        $self->{'repo'} = $repo;
 408}
 409
 410sub readline {
 411        my ($self) = @_;
 412        return $self->{'socketi'}->getline();
 413}
 414
 415sub _file {
 416        # Request a file with a given revision.
 417        # Trial and error says this is a good way to do it. :-/
 418        my ($self,$fn,$rev) = @_;
 419        $self->{'socketo'}->write("Argument -N\n") or return undef;
 420        $self->{'socketo'}->write("Argument -P\n") or return undef;
 421        # -kk: Linus' version doesn't use it - defaults to off
 422        if ($opt_k) {
 423            $self->{'socketo'}->write("Argument -kk\n") or return undef;
 424        }
 425        $self->{'socketo'}->write("Argument -r\n") or return undef;
 426        $self->{'socketo'}->write("Argument $rev\n") or return undef;
 427        $self->{'socketo'}->write("Argument --\n") or return undef;
 428        $self->{'socketo'}->write("Argument $self->{'subdir'}/$fn\n") or return undef;
 429        $self->{'socketo'}->write("Directory .\n") or return undef;
 430        $self->{'socketo'}->write("$self->{'repo'}\n") or return undef;
 431        # $self->{'socketo'}->write("Sticky T1.0\n") or return undef;
 432        $self->{'socketo'}->write("co\n") or return undef;
 433        $self->{'socketo'}->flush() or return undef;
 434        $self->{'lines'} = 0;
 435        return 1;
 436}
 437sub _line {
 438        # Read a line from the server.
 439        # ... except that 'line' may be an entire file. ;-)
 440        my ($self, $fh) = @_;
 441        die "Not in lines" unless defined $self->{'lines'};
 442
 443        my $line;
 444        my $res=0;
 445        while (defined($line = $self->readline())) {
 446                # M U gnupg-cvs-rep/AUTHORS
 447                # Updated gnupg-cvs-rep/
 448                # /daten/src/rsync/gnupg-cvs-rep/AUTHORS
 449                # /AUTHORS/1.1///T1.1
 450                # u=rw,g=rw,o=rw
 451                # 0
 452                # ok
 453
 454                if ($line =~ s/^(?:Created|Updated) //) {
 455                        $line = $self->readline(); # path
 456                        $line = $self->readline(); # Entries line
 457                        my $mode = $self->readline(); chomp $mode;
 458                        $self->{'mode'} = $mode;
 459                        defined (my $cnt = $self->readline())
 460                                or die "EOF from server after 'Changed'\n";
 461                        chomp $cnt;
 462                        die "Duh: Filesize $cnt" if $cnt !~ /^\d+$/;
 463                        $line="";
 464                        $res = $self->_fetchfile($fh, $cnt);
 465                } elsif ($line =~ s/^ //) {
 466                        print $fh $line;
 467                        $res += length($line);
 468                } elsif ($line =~ /^M\b/) {
 469                        # output, do nothing
 470                } elsif ($line =~ /^Mbinary\b/) {
 471                        my $cnt;
 472                        die "EOF from server after 'Mbinary'" unless defined ($cnt = $self->readline());
 473                        chomp $cnt;
 474                        die "Duh: Mbinary $cnt" if $cnt !~ /^\d+$/ or $cnt<1;
 475                        $line="";
 476                        $res += $self->_fetchfile($fh, $cnt);
 477                } else {
 478                        chomp $line;
 479                        if ($line eq "ok") {
 480                                # print STDERR "S: ok (".length($res).")\n";
 481                                return $res;
 482                        } elsif ($line =~ s/^E //) {
 483                                # print STDERR "S: $line\n";
 484                        } elsif ($line =~ /^(Remove-entry|Removed) /i) {
 485                                $line = $self->readline(); # filename
 486                                $line = $self->readline(); # OK
 487                                chomp $line;
 488                                die "Unknown: $line" if $line ne "ok";
 489                                return -1;
 490                        } else {
 491                                die "Unknown: $line\n";
 492                        }
 493                }
 494        }
 495        return undef;
 496}
 497sub file {
 498        my ($self,$fn,$rev) = @_;
 499        my $res;
 500
 501        my ($fh, $name) = tempfile('gitcvs.XXXXXX',
 502                    DIR => File::Spec->tmpdir(), UNLINK => 1);
 503
 504        $self->_file($fn,$rev) and $res = $self->_line($fh);
 505
 506        if (!defined $res) {
 507            print STDERR "Server has gone away while fetching $fn $rev, retrying...\n";
 508            truncate $fh, 0;
 509            $self->conn();
 510            $self->_file($fn,$rev) or die "No file command send";
 511            $res = $self->_line($fh);
 512            die "Retry failed" unless defined $res;
 513        }
 514        close ($fh);
 515
 516        return ($name, $res);
 517}
 518sub _fetchfile {
 519        my ($self, $fh, $cnt) = @_;
 520        my $res = 0;
 521        my $bufsize = 1024 * 1024;
 522        while ($cnt) {
 523            if ($bufsize > $cnt) {
 524                $bufsize = $cnt;
 525            }
 526            my $buf;
 527            my $num = $self->{'socketi'}->read($buf,$bufsize);
 528            die "Server: Filesize $cnt: $num: $!\n" if not defined $num or $num<=0;
 529            print $fh $buf;
 530            $res += $num;
 531            $cnt -= $num;
 532        }
 533        return $res;
 534}
 535
 536sub _scramble {
 537        my ($self, $pass) = @_;
 538        my $scrambled = "A";
 539
 540        return $scrambled unless $pass;
 541
 542        my $pass_len = length($pass);
 543        my @pass_arr = split("", $pass);
 544        my $i;
 545
 546        # from cvs/src/scramble.c
 547        my @shifts = (
 548                  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15,
 549                 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
 550                114,120, 53, 79, 96,109, 72,108, 70, 64, 76, 67,116, 74, 68, 87,
 551                111, 52, 75,119, 49, 34, 82, 81, 95, 65,112, 86,118,110,122,105,
 552                 41, 57, 83, 43, 46,102, 40, 89, 38,103, 45, 50, 42,123, 91, 35,
 553                125, 55, 54, 66,124,126, 59, 47, 92, 71,115, 78, 88,107,106, 56,
 554                 36,121,117,104,101,100, 69, 73, 99, 63, 94, 93, 39, 37, 61, 48,
 555                 58,113, 32, 90, 44, 98, 60, 51, 33, 97, 62, 77, 84, 80, 85,223,
 556                225,216,187,166,229,189,222,188,141,249,148,200,184,136,248,190,
 557                199,170,181,204,138,232,218,183,255,234,220,247,213,203,226,193,
 558                174,172,228,252,217,201,131,230,197,211,145,238,161,179,160,212,
 559                207,221,254,173,202,146,224,151,140,196,205,130,135,133,143,246,
 560                192,159,244,239,185,168,215,144,139,165,180,157,147,186,214,176,
 561                227,231,219,169,175,156,206,198,129,164,150,210,154,177,134,127,
 562                182,128,158,208,162,132,167,209,149,241,153,251,237,236,171,195,
 563                243,233,253,240,194,250,191,155,142,137,245,235,163,242,178,152
 564        );
 565
 566        for ($i = 0; $i < $pass_len; $i++) {
 567                $scrambled .= pack("C", $shifts[ord($pass_arr[$i])]);
 568        }
 569
 570        return $scrambled;
 571}
 572
 573package main;
 574
 575my $cvs = CVSconn->new($opt_d, $cvs_tree);
 576
 577
 578sub pdate($) {
 579        my ($d) = @_;
 580        m#(\d{2,4})/(\d\d)/(\d\d)\s(\d\d):(\d\d)(?::(\d\d))?#
 581                or die "Unparseable date: $d\n";
 582        my $y=$1; $y-=1900 if $y>1900;
 583        return timegm($6||0,$5,$4,$3,$2-1,$y);
 584}
 585
 586sub pmode($) {
 587        my ($mode) = @_;
 588        my $m = 0;
 589        my $mm = 0;
 590        my $um = 0;
 591        for my $x(split(//,$mode)) {
 592                if ($x eq ",") {
 593                        $m |= $mm&$um;
 594                        $mm = 0;
 595                        $um = 0;
 596                } elsif ($x eq "u") { $um |= 0700;
 597                } elsif ($x eq "g") { $um |= 0070;
 598                } elsif ($x eq "o") { $um |= 0007;
 599                } elsif ($x eq "r") { $mm |= 0444;
 600                } elsif ($x eq "w") { $mm |= 0222;
 601                } elsif ($x eq "x") { $mm |= 0111;
 602                } elsif ($x eq "=") { # do nothing
 603                } else { die "Unknown mode: $mode\n";
 604                }
 605        }
 606        $m |= $mm&$um;
 607        return $m;
 608}
 609
 610sub getwd() {
 611        my $pwd = `pwd`;
 612        chomp $pwd;
 613        return $pwd;
 614}
 615
 616sub is_sha1 {
 617        my $s = shift;
 618        return $s =~ /^[a-f0-9]{40}$/;
 619}
 620
 621sub get_headref ($) {
 622        my $name = shift;
 623        my $r = `git rev-parse --verify '$name' 2>/dev/null`;
 624        return undef unless $? == 0;
 625        chomp $r;
 626        return $r;
 627}
 628
 629my $user_filename_prepend = '';
 630sub munge_user_filename {
 631        my $name = shift;
 632        return File::Spec->file_name_is_absolute($name) ?
 633                $name :
 634                $user_filename_prepend . $name;
 635}
 636
 637-d $git_tree
 638        or mkdir($git_tree,0777)
 639        or die "Could not create $git_tree: $!";
 640if ($git_tree ne '.') {
 641        $user_filename_prepend = getwd() . '/';
 642        chdir($git_tree);
 643}
 644
 645my $last_branch = "";
 646my $orig_branch = "";
 647my %branch_date;
 648my $tip_at_start = undef;
 649
 650my $git_dir = $ENV{"GIT_DIR"} || ".git";
 651$git_dir = getwd()."/".$git_dir unless $git_dir =~ m#^/#;
 652$ENV{"GIT_DIR"} = $git_dir;
 653my $orig_git_index;
 654$orig_git_index = $ENV{GIT_INDEX_FILE} if exists $ENV{GIT_INDEX_FILE};
 655
 656my %index; # holds filenames of one index per branch
 657
 658unless (-d $git_dir) {
 659        system(qw(git init));
 660        die "Cannot init the GIT db at $git_tree: $?\n" if $?;
 661        system(qw(git read-tree --empty));
 662        die "Cannot init an empty tree: $?\n" if $?;
 663
 664        $last_branch = $opt_o;
 665        $orig_branch = "";
 666} else {
 667        open(F, "-|", qw(git symbolic-ref HEAD)) or
 668                die "Cannot run git symbolic-ref: $!\n";
 669        chomp ($last_branch = <F>);
 670        $last_branch = basename($last_branch);
 671        close(F);
 672        unless ($last_branch) {
 673                warn "Cannot read the last branch name: $! -- assuming 'master'\n";
 674                $last_branch = "master";
 675        }
 676        $orig_branch = $last_branch;
 677        $tip_at_start = `git rev-parse --verify HEAD`;
 678
 679        # Get the last import timestamps
 680        my $fmt = '($ref, $author) = (%(refname), %(author));';
 681        my @cmd = ('git', 'for-each-ref', '--perl', "--format=$fmt", $remote);
 682        open(H, "-|", @cmd) or die "Cannot run git for-each-ref: $!\n";
 683        while (defined(my $entry = <H>)) {
 684                my ($ref, $author);
 685                eval($entry) || die "cannot eval refs list: $@";
 686                my ($head) = ($ref =~ m|^$remote/(.*)|);
 687                $author =~ /^.*\s(\d+)\s[-+]\d{4}$/;
 688                $branch_date{$head} = $1;
 689        }
 690        close(H);
 691        if (!exists $branch_date{$opt_o}) {
 692                die "Branch '$opt_o' does not exist.\n".
 693                       "Either use the correct '-o branch' option,\n".
 694                       "or import to a new repository.\n";
 695        }
 696}
 697
 698-d $git_dir
 699        or die "Could not create git subdir ($git_dir).\n";
 700
 701# now we read (and possibly save) author-info as well
 702-f "$git_dir/cvs-authors" and
 703  read_author_info("$git_dir/cvs-authors");
 704if ($opt_A) {
 705        read_author_info(munge_user_filename($opt_A));
 706        write_author_info("$git_dir/cvs-authors");
 707}
 708
 709# open .git/cvs-revisions, if requested
 710open my $revision_map, '>>', "$git_dir/cvs-revisions"
 711    or die "Can't open $git_dir/cvs-revisions for appending: $!\n"
 712        if defined $opt_R;
 713
 714
 715#
 716# run cvsps into a file unless we are getting
 717# it passed as a file via $opt_P
 718#
 719my $cvspsfile;
 720unless ($opt_P) {
 721        print "Running cvsps...\n" if $opt_v;
 722        my $pid = open(CVSPS,"-|");
 723        my $cvspsfh;
 724        die "Cannot fork: $!\n" unless defined $pid;
 725        unless ($pid) {
 726                my @opt;
 727                @opt = split(/,/,$opt_p) if defined $opt_p;
 728                unshift @opt, '-z', $opt_z if defined $opt_z;
 729                unshift @opt, '-q'         unless defined $opt_v;
 730                unless (defined($opt_p) && $opt_p =~ m/--no-cvs-direct/) {
 731                        push @opt, '--cvs-direct';
 732                }
 733                exec("cvsps","--norc",@opt,"-u","-A",'--root',$opt_d,$cvs_tree);
 734                die "Could not start cvsps: $!\n";
 735        }
 736        ($cvspsfh, $cvspsfile) = tempfile('gitXXXXXX', SUFFIX => '.cvsps',
 737                                          DIR => File::Spec->tmpdir());
 738        while (<CVSPS>) {
 739            print $cvspsfh $_;
 740        }
 741        close CVSPS;
 742        $? == 0 or die "git cvsimport: fatal: cvsps reported error\n";
 743        close $cvspsfh;
 744} else {
 745        $cvspsfile = munge_user_filename($opt_P);
 746}
 747
 748open(CVS, "<$cvspsfile") or die $!;
 749
 750## cvsps output:
 751#---------------------
 752#PatchSet 314
 753#Date: 1999/09/18 13:03:59
 754#Author: wkoch
 755#Branch: STABLE-BRANCH-1-0
 756#Ancestor branch: HEAD
 757#Tag: (none)
 758#Log:
 759#    See ChangeLog: Sat Sep 18 13:03:28 CEST 1999  Werner Koch
 760#Members:
 761#       README:1.57->1.57.2.1
 762#       VERSION:1.96->1.96.2.1
 763#
 764#---------------------
 765
 766my $state = 0;
 767
 768sub update_index (\@\@) {
 769        my $old = shift;
 770        my $new = shift;
 771        open(my $fh, '|-', qw(git update-index -z --index-info))
 772                or die "unable to open git update-index: $!";
 773        print $fh
 774                (map { "0 0000000000000000000000000000000000000000\t$_\0" }
 775                        @$old),
 776                (map { '100' . sprintf('%o', $_->[0]) . " $_->[1]\t$_->[2]\0" }
 777                        @$new)
 778                or die "unable to write to git update-index: $!";
 779        close $fh
 780                or die "unable to write to git update-index: $!";
 781        $? and die "git update-index reported error: $?";
 782}
 783
 784sub write_tree () {
 785        open(my $fh, '-|', qw(git write-tree))
 786                or die "unable to open git write-tree: $!";
 787        chomp(my $tree = <$fh>);
 788        is_sha1($tree)
 789                or die "Cannot get tree id ($tree): $!";
 790        close($fh)
 791                or die "Error running git write-tree: $?\n";
 792        print "Tree ID $tree\n" if $opt_v;
 793        return $tree;
 794}
 795
 796my ($patchset,$date,$author_name,$author_email,$branch,$ancestor,$tag,$logmsg);
 797my (@old,@new,@skipped,%ignorebranch,@commit_revisions);
 798
 799# commits that cvsps cannot place anywhere...
 800$ignorebranch{'#CVSPS_NO_BRANCH'} = 1;
 801
 802sub commit {
 803        if ($branch eq $opt_o && !$index{branch} &&
 804                !get_headref("$remote/$branch")) {
 805            # looks like an initial commit
 806            # use the index primed by git init
 807            $ENV{GIT_INDEX_FILE} = "$git_dir/index";
 808            $index{$branch} = "$git_dir/index";
 809        } else {
 810            # use an index per branch to speed up
 811            # imports of projects with many branches
 812            unless ($index{$branch}) {
 813                $index{$branch} = tmpnam();
 814                $ENV{GIT_INDEX_FILE} = $index{$branch};
 815                if ($ancestor) {
 816                    system("git", "read-tree", "$remote/$ancestor");
 817                } else {
 818                    system("git", "read-tree", "$remote/$branch");
 819                }
 820                die "read-tree failed: $?\n" if $?;
 821            }
 822        }
 823        $ENV{GIT_INDEX_FILE} = $index{$branch};
 824
 825        update_index(@old, @new);
 826        @old = @new = ();
 827        my $tree = write_tree();
 828        my $parent = get_headref("$remote/$last_branch");
 829        print "Parent ID " . ($parent ? $parent : "(empty)") . "\n" if $opt_v;
 830
 831        my @commit_args;
 832        push @commit_args, ("-p", $parent) if $parent;
 833
 834        # loose detection of merges
 835        # based on the commit msg
 836        foreach my $rx (@mergerx) {
 837                next unless $logmsg =~ $rx && $1;
 838                my $mparent = $1 eq 'HEAD' ? $opt_o : $1;
 839                if (my $sha1 = get_headref("$remote/$mparent")) {
 840                        push @commit_args, '-p', "$remote/$mparent";
 841                        print "Merge parent branch: $mparent\n" if $opt_v;
 842                }
 843        }
 844
 845        my $commit_date = strftime("+0000 %Y-%m-%d %H:%M:%S",gmtime($date));
 846        $ENV{GIT_AUTHOR_NAME} = $author_name;
 847        $ENV{GIT_AUTHOR_EMAIL} = $author_email;
 848        $ENV{GIT_AUTHOR_DATE} = $commit_date;
 849        $ENV{GIT_COMMITTER_NAME} = $author_name;
 850        $ENV{GIT_COMMITTER_EMAIL} = $author_email;
 851        $ENV{GIT_COMMITTER_DATE} = $commit_date;
 852        my $pid = open2(my $commit_read, my $commit_write,
 853                'git', 'commit-tree', $tree, @commit_args);
 854
 855        # compatibility with git2cvs
 856        substr($logmsg,32767) = "" if length($logmsg) > 32767;
 857        $logmsg =~ s/[\s\n]+\z//;
 858
 859        if (@skipped) {
 860            $logmsg .= "\n\n\nSKIPPED:\n\t";
 861            $logmsg .= join("\n\t", @skipped) . "\n";
 862            @skipped = ();
 863        }
 864
 865        print($commit_write "$logmsg\n") && close($commit_write)
 866                or die "Error writing to git commit-tree: $!\n";
 867
 868        print "Committed patch $patchset ($branch $commit_date)\n" if $opt_v;
 869        chomp(my $cid = <$commit_read>);
 870        is_sha1($cid) or die "Cannot get commit id ($cid): $!\n";
 871        print "Commit ID $cid\n" if $opt_v;
 872        close($commit_read);
 873
 874        waitpid($pid,0);
 875        die "Error running git commit-tree: $?\n" if $?;
 876
 877        system('git' , 'update-ref', "$remote/$branch", $cid) == 0
 878                or die "Cannot write branch $branch for update: $!\n";
 879
 880        if ($revision_map) {
 881                print $revision_map "@$_ $cid\n" for @commit_revisions;
 882        }
 883        @commit_revisions = ();
 884
 885        if ($tag) {
 886                my ($xtag) = $tag;
 887                $xtag =~ s/\s+\*\*.*$//; # Remove stuff like ** INVALID ** and ** FUNKY **
 888                $xtag =~ tr/_/\./ if ( $opt_u );
 889                $xtag =~ s/[\/]/$opt_s/g;
 890                $xtag =~ s/\[//g;
 891
 892                system('git' , 'tag', '-f', $xtag, $cid) == 0
 893                        or die "Cannot create tag $xtag: $!\n";
 894
 895                print "Created tag '$xtag' on '$branch'\n" if $opt_v;
 896        }
 897};
 898
 899my $commitcount = 1;
 900while (<CVS>) {
 901        chomp;
 902        if ($state == 0 and /^-+$/) {
 903                $state = 1;
 904        } elsif ($state == 0) {
 905                $state = 1;
 906                redo;
 907        } elsif (($state==0 or $state==1) and s/^PatchSet\s+//) {
 908                $patchset = 0+$_;
 909                $state=2;
 910        } elsif ($state == 2 and s/^Date:\s+//) {
 911                $date = pdate($_);
 912                unless ($date) {
 913                        print STDERR "Could not parse date: $_\n";
 914                        $state=0;
 915                        next;
 916                }
 917                $state=3;
 918        } elsif ($state == 3 and s/^Author:\s+//) {
 919                s/\s+$//;
 920                if (/^(.*?)\s+<(.*)>/) {
 921                    ($author_name, $author_email) = ($1, $2);
 922                } elsif ($conv_author_name{$_}) {
 923                        $author_name = $conv_author_name{$_};
 924                        $author_email = $conv_author_email{$_};
 925                } else {
 926                    $author_name = $author_email = $_;
 927                }
 928                $state = 4;
 929        } elsif ($state == 4 and s/^Branch:\s+//) {
 930                s/\s+$//;
 931                tr/_/\./ if ( $opt_u );
 932                s/[\/]/$opt_s/g;
 933                $branch = $_;
 934                $state = 5;
 935        } elsif ($state == 5 and s/^Ancestor branch:\s+//) {
 936                s/\s+$//;
 937                $ancestor = $_;
 938                $ancestor = $opt_o if $ancestor eq "HEAD";
 939                $state = 6;
 940        } elsif ($state == 5) {
 941                $ancestor = undef;
 942                $state = 6;
 943                redo;
 944        } elsif ($state == 6 and s/^Tag:\s+//) {
 945                s/\s+$//;
 946                if ($_ eq "(none)") {
 947                        $tag = undef;
 948                } else {
 949                        $tag = $_;
 950                }
 951                $state = 7;
 952        } elsif ($state == 7 and /^Log:/) {
 953                $logmsg = "";
 954                $state = 8;
 955        } elsif ($state == 8 and /^Members:/) {
 956                $branch = $opt_o if $branch eq "HEAD";
 957                if (defined $branch_date{$branch} and $branch_date{$branch} >= $date) {
 958                        # skip
 959                        print "skip patchset $patchset: $date before $branch_date{$branch}\n" if $opt_v;
 960                        $state = 11;
 961                        next;
 962                }
 963                if (!$opt_a && $starttime - 300 - (defined $opt_z ? $opt_z : 300) <= $date) {
 964                        # skip if the commit is too recent
 965                        # given that the cvsps default fuzz is 300s, we give ourselves another
 966                        # 300s just in case -- this also prevents skipping commits
 967                        # due to server clock drift
 968                        print "skip patchset $patchset: $date too recent\n" if $opt_v;
 969                        $state = 11;
 970                        next;
 971                }
 972                if (exists $ignorebranch{$branch}) {
 973                        print STDERR "Skipping $branch\n";
 974                        $state = 11;
 975                        next;
 976                }
 977                if ($ancestor) {
 978                        if ($ancestor eq $branch) {
 979                                print STDERR "Branch $branch erroneously stems from itself -- changed ancestor to $opt_o\n";
 980                                $ancestor = $opt_o;
 981                        }
 982                        if (defined get_headref("$remote/$branch")) {
 983                                print STDERR "Branch $branch already exists!\n";
 984                                $state=11;
 985                                next;
 986                        }
 987                        my $id = get_headref("$remote/$ancestor");
 988                        if (!$id) {
 989                                print STDERR "Branch $ancestor does not exist!\n";
 990                                $ignorebranch{$branch} = 1;
 991                                $state=11;
 992                                next;
 993                        }
 994
 995                        system(qw(git update-ref -m cvsimport),
 996                                "$remote/$branch", $id);
 997                        if($? != 0) {
 998                                print STDERR "Could not create branch $branch\n";
 999                                $ignorebranch{$branch} = 1;
1000                                $state=11;
1001                                next;
1002                        }
1003                }
1004                $last_branch = $branch if $branch ne $last_branch;
1005                $state = 9;
1006        } elsif ($state == 8) {
1007                $logmsg .= "$_\n";
1008        } elsif ($state == 9 and /^\s+(.+?):(INITIAL|\d+(?:\.\d+)+)->(\d+(?:\.\d+)+)\s*$/) {
1009#       VERSION:1.96->1.96.2.1
1010                my $init = ($2 eq "INITIAL");
1011                my $fn = $1;
1012                my $rev = $3;
1013                $fn =~ s#^/+##;
1014                if ($opt_S && $fn =~ m/$opt_S/) {
1015                    print "SKIPPING $fn v $rev\n";
1016                    push(@skipped, $fn);
1017                    next;
1018                }
1019                push @commit_revisions, [$fn, $rev];
1020                print "Fetching $fn   v $rev\n" if $opt_v;
1021                my ($tmpname, $size) = $cvs->file($fn,$rev);
1022                if ($size == -1) {
1023                        push(@old,$fn);
1024                        print "Drop $fn\n" if $opt_v;
1025                } else {
1026                        print "".($init ? "New" : "Update")." $fn: $size bytes\n" if $opt_v;
1027                        my $pid = open(my $F, '-|');
1028                        die $! unless defined $pid;
1029                        if (!$pid) {
1030                            exec("git", "hash-object", "-w", $tmpname)
1031                                or die "Cannot create object: $!\n";
1032                        }
1033                        my $sha = <$F>;
1034                        chomp $sha;
1035                        close $F;
1036                        my $mode = pmode($cvs->{'mode'});
1037                        push(@new,[$mode, $sha, $fn]); # may be resurrected!
1038                }
1039                unlink($tmpname);
1040        } elsif ($state == 9 and /^\s+(.+?):\d+(?:\.\d+)+->(\d+(?:\.\d+)+)\(DEAD\)\s*$/) {
1041                my $fn = $1;
1042                my $rev = $2;
1043                $fn =~ s#^/+##;
1044                push @commit_revisions, [$fn, $rev];
1045                push(@old,$fn);
1046                print "Delete $fn\n" if $opt_v;
1047        } elsif ($state == 9 and /^\s*$/) {
1048                $state = 10;
1049        } elsif (($state == 9 or $state == 10) and /^-+$/) {
1050                $commitcount++;
1051                if ($opt_L && $commitcount > $opt_L) {
1052                        last;
1053                }
1054                commit();
1055                if (($commitcount & 1023) == 0) {
1056                        system(qw(git repack -a -d));
1057                }
1058                $state = 1;
1059        } elsif ($state == 11 and /^-+$/) {
1060                $state = 1;
1061        } elsif (/^-+$/) { # end of unknown-line processing
1062                $state = 1;
1063        } elsif ($state != 11) { # ignore stuff when skipping
1064                print STDERR "* UNKNOWN LINE * $_\n";
1065        }
1066}
1067commit() if $branch and $state != 11;
1068
1069unless ($opt_P) {
1070        unlink($cvspsfile);
1071}
1072
1073# The heuristic of repacking every 1024 commits can leave a
1074# lot of unpacked data.  If there is more than 1MB worth of
1075# not-packed objects, repack once more.
1076my $line = `git count-objects`;
1077if ($line =~ /^(\d+) objects, (\d+) kilobytes$/) {
1078  my ($n_objects, $kb) = ($1, $2);
1079  1024 < $kb
1080    and system(qw(git repack -a -d));
1081}
1082
1083foreach my $git_index (values %index) {
1084    if ($git_index ne "$git_dir/index") {
1085        unlink($git_index);
1086    }
1087}
1088
1089if (defined $orig_git_index) {
1090        $ENV{GIT_INDEX_FILE} = $orig_git_index;
1091} else {
1092        delete $ENV{GIT_INDEX_FILE};
1093}
1094
1095# Now switch back to the branch we were in before all of this happened
1096if ($orig_branch) {
1097        print "DONE.\n" if $opt_v;
1098        if ($opt_i) {
1099                exit 0;
1100        }
1101        my $tip_at_end = `git rev-parse --verify HEAD`;
1102        if ($tip_at_start ne $tip_at_end) {
1103                for ($tip_at_start, $tip_at_end) { chomp; }
1104                print "Fetched into the current branch.\n" if $opt_v;
1105                system(qw(git read-tree -u -m),
1106                       $tip_at_start, $tip_at_end);
1107                die "Fast-forward update failed: $?\n" if $?;
1108        }
1109        else {
1110                system(qw(git merge cvsimport HEAD), "$remote/$opt_o");
1111                die "Could not merge $opt_o into the current branch.\n" if $?;
1112        }
1113} else {
1114        $orig_branch = "master";
1115        print "DONE; creating $orig_branch branch\n" if $opt_v;
1116        system("git", "update-ref", "refs/heads/master", "$remote/$opt_o")
1117                unless defined get_headref('refs/heads/master');
1118        system("git", "symbolic-ref", "$remote/HEAD", "$remote/$opt_o")
1119                if ($opt_r && $opt_o ne 'HEAD');
1120        system('git', 'update-ref', 'HEAD', "$orig_branch");
1121        unless ($opt_i) {
1122                system(qw(git checkout -f));
1123                die "checkout failed: $?\n" if $?;
1124        }
1125}