git-svnimport.perlon commit svn improt needs SVN::Core 1.2.1 or better (37dcf6d)
   1#!/usr/bin/perl -w
   2
   3# This tool is copyright (c) 2005, Matthias Urlichs.
   4# It is released under the Gnu Public License, version 2.
   5#
   6# The basic idea is to pull and analyze SVN changes.
   7#
   8# Checking out the files is done by a single long-running CVS connection
   9# / server process.
  10#
  11# The head revision is on branch "origin" by default.
  12# You can change that with the '-o' option.
  13
  14require v5.8.0; # for shell-safe open("-|",LIST)
  15use strict;
  16use warnings;
  17use Getopt::Std;
  18use File::Spec;
  19use File::Temp qw(tempfile);
  20use File::Path qw(mkpath);
  21use File::Basename qw(basename dirname);
  22use Time::Local;
  23use IO::Pipe;
  24use POSIX qw(strftime dup2);
  25use IPC::Open2;
  26use SVN::Core;
  27use SVN::Ra;
  28
  29die "Need CVN:COre 1.2.1 or better" if $SVN::Core::VERSION lt "1.2.1";
  30
  31$SIG{'PIPE'}="IGNORE";
  32$ENV{'TZ'}="UTC";
  33
  34our($opt_h,$opt_o,$opt_v,$opt_u,$opt_C,$opt_i,$opt_m,$opt_M,$opt_t,$opt_T,$opt_b);
  35
  36sub usage() {
  37        print STDERR <<END;
  38Usage: ${\basename $0}     # fetch/update GIT from CVS
  39       [-o branch-for-HEAD] [-h] [-v]
  40       [-C GIT_repository] [-t tagname] [-T trunkname] [-b branchname]
  41       [-i] [-u] [-s subst] [-m] [-M regex] [SVN_URL]
  42END
  43        exit(1);
  44}
  45
  46getopts("b:C:hivmM:o:t:T:u") or usage();
  47usage if $opt_h;
  48
  49my $tag_name = $opt_t || "tags";
  50my $trunk_name = $opt_T || "trunk";
  51my $branch_name = $opt_b || "branches";
  52
  53@ARGV <= 1 or usage();
  54
  55$opt_o ||= "origin";
  56my $git_tree = $opt_C;
  57$git_tree ||= ".";
  58
  59my $cvs_tree;
  60if ($#ARGV == 0) {
  61        $cvs_tree = $ARGV[0];
  62} elsif (-f 'CVS/Repository') {
  63        open my $f, '<', 'CVS/Repository' or 
  64            die 'Failed to open CVS/Repository';
  65        $cvs_tree = <$f>;
  66        chomp $cvs_tree;
  67        close $f;
  68} else {
  69        usage();
  70}
  71
  72our @mergerx = ();
  73if ($opt_m) {
  74        @mergerx = ( qr/\W(?:from|of|merge|merging|merged) (\w+)/i );
  75}
  76if ($opt_M) {
  77        push (@mergerx, qr/$opt_M/);
  78}
  79
  80select(STDERR); $|=1; select(STDOUT);
  81
  82
  83package SVNconn;
  84# Basic SVN connection.
  85# We're only interested in connecting and downloading, so ...
  86
  87use File::Spec;
  88use File::Temp qw(tempfile);
  89use POSIX qw(strftime dup2);
  90
  91sub new {
  92        my($what,$repo) = @_;
  93        $what=ref($what) if ref($what);
  94
  95        my $self = {};
  96        $self->{'buffer'} = "";
  97        bless($self,$what);
  98
  99        $repo =~ s#/+$##;
 100        $self->{'fullrep'} = $repo;
 101        $self->conn();
 102
 103        $self->{'lines'} = undef;
 104
 105        return $self;
 106}
 107
 108sub conn {
 109        my $self = shift;
 110        my $repo = $self->{'fullrep'};
 111        my $s = SVN::Ra->new($repo);
 112
 113        die "SVN connection to $repo: $!\n" unless defined $s;
 114        $self->{'svn'} = $s;
 115        $self->{'repo'} = $repo;
 116        $self->{'maxrev'} = $s->get_latest_revnum();
 117}
 118
 119sub file {
 120        my($self,$path,$rev) = @_;
 121        my $res;
 122
 123        my ($fh, $name) = tempfile('gitsvn.XXXXXX', 
 124                    DIR => File::Spec->tmpdir(), UNLINK => 1);
 125
 126        print "... $rev $path ...\n" if $opt_v;
 127        eval { $self->{'svn'}->get_file($path,$rev,$fh); };
 128        if (defined $@ and $@ !~ /Attempted to get checksum/) {
 129            # retry
 130            $self->conn();
 131                eval { $self->{'svn'}->get_file($path,$rev,$fh); };
 132        };
 133        return () if defined $@ and $@ !~ /Attempted to get checksum/;
 134        die $@ if $@;
 135        close ($fh);
 136
 137        return ($name, $res);
 138}
 139
 140
 141package main;
 142
 143my $svn = SVNconn->new($cvs_tree);
 144
 145
 146sub pdate($) {
 147        my($d) = @_;
 148        $d =~ m#(\d\d\d\d)-(\d\d)-(\d\d)T(\d\d):(\d\d):(\d\d)#
 149                or die "Unparseable date: $d\n";
 150        my $y=$1; $y-=1900 if $y>1900;
 151        return timegm($6||0,$5,$4,$3,$2-1,$y);
 152}
 153
 154sub getwd() {
 155        my $pwd = `pwd`;
 156        chomp $pwd;
 157        return $pwd;
 158}
 159
 160
 161sub get_headref($$) {
 162    my $name    = shift;
 163    my $git_dir = shift; 
 164    my $sha;
 165    
 166    if (open(C,"$git_dir/refs/heads/$name")) {
 167        chomp($sha = <C>);
 168        close(C);
 169        length($sha) == 40
 170            or die "Cannot get head id for $name ($sha): $!\n";
 171    }
 172    return $sha;
 173}
 174
 175
 176-d $git_tree
 177        or mkdir($git_tree,0777)
 178        or die "Could not create $git_tree: $!";
 179chdir($git_tree);
 180
 181my $orig_branch = "";
 182my $forward_master = 0;
 183my %branches;
 184
 185my $git_dir = $ENV{"GIT_DIR"} || ".git";
 186$git_dir = getwd()."/".$git_dir unless $git_dir =~ m#^/#;
 187$ENV{"GIT_DIR"} = $git_dir;
 188my $orig_git_index;
 189$orig_git_index = $ENV{GIT_INDEX_FILE} if exists $ENV{GIT_INDEX_FILE};
 190my ($git_ih, $git_index) = tempfile('gitXXXXXX', SUFFIX => '.idx',
 191                                    DIR => File::Spec->tmpdir());
 192close ($git_ih);
 193$ENV{GIT_INDEX_FILE} = $git_index;
 194my $maxnum = 0;
 195my $last_rev = "";
 196my $last_branch;
 197my $current_rev = 0;
 198unless(-d $git_dir) {
 199        system("git-init-db");
 200        die "Cannot init the GIT db at $git_tree: $?\n" if $?;
 201        system("git-read-tree");
 202        die "Cannot init an empty tree: $?\n" if $?;
 203
 204        $last_branch = $opt_o;
 205        $orig_branch = "";
 206} else {
 207        -f "$git_dir/refs/heads/$opt_o"
 208                or die "Branch '$opt_o' does not exist.\n".
 209                       "Either use the correct '-o branch' option,\n".
 210                       "or import to a new repository.\n";
 211
 212        -f "$git_dir/svn2git"
 213                or die "'$git_dir/svn2git' does not exist.\n".
 214                       "You need that file for incremental imports.\n";
 215        $last_branch = basename(readlink("$git_dir/HEAD"));
 216        unless($last_branch) {
 217                warn "Cannot read the last branch name: $! -- assuming 'master'\n";
 218                $last_branch = "master";
 219        }
 220        $orig_branch = $last_branch;
 221        $last_rev = get_headref($orig_branch, $git_dir);
 222        if (-f "$git_dir/SVN2GIT_HEAD") {
 223                die <<EOM;
 224SVN2GIT_HEAD exists.
 225Make sure your working directory corresponds to HEAD and remove SVN2GIT_HEAD.
 226You may need to run
 227
 228    git-read-tree -m -u SVN2GIT_HEAD HEAD
 229EOM
 230        }
 231        system('cp', "$git_dir/HEAD", "$git_dir/SVN2GIT_HEAD");
 232
 233        $forward_master =
 234            $opt_o ne 'master' && -f "$git_dir/refs/heads/master" &&
 235            system('cmp', '-s', "$git_dir/refs/heads/master", 
 236                                "$git_dir/refs/heads/$opt_o") == 0;
 237
 238        # populate index
 239        system('git-read-tree', $last_rev);
 240        die "read-tree failed: $?\n" if $?;
 241
 242        # Get the last import timestamps
 243        open my $B,"<", "$git_dir/svn2git";
 244        while(<$B>) {
 245                chomp;
 246                my($num,$branch,$ref) = split;
 247                $branches{$branch}{$num} = $ref;
 248                $branches{$branch}{"LAST"} = $ref;
 249                $current_rev = $num+1 if $current_rev < $num+1;
 250        }
 251        close($B);
 252}
 253-d $git_dir
 254        or die "Could not create git subdir ($git_dir).\n";
 255
 256open BRANCHES,">>", "$git_dir/svn2git";
 257
 258
 259## cvsps output:
 260#---------------------
 261#PatchSet 314
 262#Date: 1999/09/18 13:03:59
 263#Author: wkoch
 264#Branch: STABLE-BRANCH-1-0
 265#Ancestor branch: HEAD
 266#Tag: (none)
 267#Log:
 268#    See ChangeLog: Sat Sep 18 13:03:28 CEST 1999  Werner Koch
 269#Members:
 270#       README:1.57->1.57.2.1
 271#       VERSION:1.96->1.96.2.1
 272#
 273#---------------------
 274
 275my $state = 0;
 276
 277sub get_file($$$) {
 278        my($rev,$branch,$path) = @_;
 279
 280        # revert split_path(), below
 281        my $svnpath;
 282        $path = "" if $path eq "/"; # this should not happen, but ...
 283        if($branch eq "/") {
 284                $svnpath = "/$trunk_name/$path";
 285        } elsif($branch =~ m#^/#) {
 286                $svnpath = "/$tag_name$branch/$path";
 287        } else {
 288                $svnpath = "/$branch_name/$branch/$path";
 289        }
 290
 291        # now get it
 292        my ($name, $res) = eval { $svn->file($svnpath,$rev); };
 293        return () unless defined $name;
 294
 295        open my $F, '-|', "git-hash-object", "-w", $name
 296                or die "Cannot create object: $!\n";
 297        my $sha = <$F>;
 298        chomp $sha;
 299        close $F;
 300        my $mode = "0644"; # SV does not seem to store any file modes
 301        return [$mode, $sha, $path];
 302}
 303
 304sub split_path($$) {
 305        my($rev,$path) = @_;
 306        my $branch;
 307
 308        if($path =~ s#^/\Q$tag_name\E/([^/]+)/?##) {
 309                $branch = "/$1";
 310        } elsif($path =~ s#^/\Q$trunk_name\E/?##) {
 311                $branch = "/";
 312        } elsif($path =~ s#^/\Q$branch_name\E/([^/]+)/?##) {
 313                $branch = $1;
 314        } else {
 315                print STDERR "$rev: Unrecognized path: $path\n";
 316                return ()
 317        }
 318        $path = "/" if $path eq "";
 319        return ($branch,$path);
 320}
 321
 322sub commit {
 323        my($branch, $changed_paths, $revision, $author, $date, $message) = @_;
 324        my($author_name,$author_email,$dest);
 325        my(@old,@new);
 326
 327        if (not defined $author) {
 328                $author_name = $author_email = "unknown";
 329        } elsif ($author =~ /^(.*?)\s+<(.*)>$/) {
 330                ($author_name, $author_email) = ($1, $2);
 331        } else {
 332                $author =~ s/^<(.*)>$/$1/;
 333                $author_name = $author_email = $author;
 334        }
 335        $date = pdate($date);
 336
 337        my $tag;
 338        my $parent;
 339        if($branch eq "/") { # trunk
 340                $parent = $opt_o;
 341        } elsif($branch =~ m#^/(.+)#) { # tag
 342                $tag = 1;
 343                $parent = $1;
 344        } else { # "normal" branch
 345                # nothing to do
 346                $parent = $branch;
 347        }
 348        $dest = $parent;
 349
 350        my $prev = $changed_paths->{"/"};
 351        if($prev and $prev->[0] eq "A") {
 352                delete $changed_paths->{"/"};
 353                my $oldpath = $prev->[1];
 354                my $rev;
 355                if(defined $oldpath) {
 356                        my $p;
 357                        ($parent,$p) = split_path($revision,$oldpath);
 358                        if($parent eq "/") {
 359                                $parent = $opt_o;
 360                        } else {
 361                                $parent =~ s#^/##; # if it's a tag
 362                        }
 363                } else {
 364                        $parent = undef;
 365                }
 366        }
 367
 368        my $rev;
 369        if(defined $parent) {
 370                open(H,"git-rev-parse --verify $parent |");
 371                $rev = <H>;
 372                close(H) or do {
 373                        print STDERR "$revision: cannot find commit '$parent'!\n";
 374                        return;
 375                };
 376                chop $rev;
 377                if(length($rev) != 40) {
 378                        print STDERR "$revision: cannot find commit '$parent'!\n";
 379                        return;
 380                }
 381                $rev = $branches{($parent eq $opt_o) ? "/" : $parent}{"LAST"};
 382                if($revision != 1 and not $rev) {
 383                        print STDERR "$revision: do not know ancestor for '$parent'!\n";
 384                        return;
 385                }
 386        } else {
 387                $rev = undef;
 388        }
 389
 390#       if($prev and $prev->[0] eq "A") {
 391#               if(not $tag) {
 392#                       unless(open(H,"> $git_dir/refs/heads/$branch")) {
 393#                               print STDERR "$revision: Could not create branch $branch: $!\n";
 394#                               $state=11;
 395#                               next;
 396#                       }
 397#                       print H "$rev\n"
 398#                               or die "Could not write branch $branch: $!";
 399#                       close(H)
 400#                               or die "Could not write branch $branch: $!";
 401#               }
 402#       }
 403        if(not defined $rev) {
 404                unlink($git_index);
 405        } elsif ($rev ne $last_rev) {
 406                print "Switching from $last_rev to $rev ($branch)\n" if $opt_v;
 407                system("git-read-tree", $rev);
 408                die "read-tree failed for $rev: $?\n" if $?;
 409                $last_rev = $rev;
 410        }
 411
 412        while(my($path,$action) = each %$changed_paths) {
 413                if ($action->[0] eq "A") {
 414                        my $f = get_file($revision,$branch,$path);
 415                        push(@new,$f) if $f;
 416                } elsif ($action->[0] eq "D") {
 417                        push(@old,$path);
 418                } elsif ($action->[0] eq "M") {
 419                        my $f = get_file($revision,$branch,$path);
 420                        push(@new,$f) if $f;
 421                } elsif ($action->[0] eq "R") {
 422                        # refer to a file/tree in an earlier commit
 423                        push(@old,$path); # remove any old stuff
 424
 425                        # ... and add any new stuff
 426                        my($b,$p) = split_path($revision,$action->[1]);
 427                        open my $F,"-|","git-ls-tree","-r","-z", $branches{$b}{$action->[2]}, $p;
 428                        local $/ = '\0';
 429                        while(<$F>) {
 430                                chomp;
 431                                my($m,$p) = split(/\t/,$_,2);
 432                                my($mode,$type,$sha1) = split(/ /,$m);
 433                                next if $type ne "blob";
 434                                push(@new,[$mode,$sha1,$p]);
 435                        }
 436                } else {
 437                        die "$revision: unknown action '".$action->[0]."' for $path\n";
 438                }
 439        }
 440
 441        if(@old) {
 442                open my $F, "-│", "git-ls-files", "-z", @old or die $!;
 443                @old = ();
 444                local $/ = '\0';
 445                while(<$F>) {
 446                        chomp;
 447                        push(@old,$_);
 448                }
 449                close($F);
 450
 451                while(@old) {
 452                        my @o2;
 453                        if(@old > 55) {
 454                                @o2 = splice(@old,0,50);
 455                        } else {
 456                                @o2 = @old;
 457                                @old = ();
 458                        }
 459                        system("git-update-index","--force-remove","--",@o2);
 460                        die "Cannot remove files: $?\n" if $?;
 461                }
 462        }
 463        while(@new) {
 464                my @n2;
 465                if(@new > 12) {
 466                        @n2 = splice(@new,0,10);
 467                } else {
 468                        @n2 = @new;
 469                        @new = ();
 470                }
 471                system("git-update-index","--add",
 472                        (map { ('--cacheinfo', @$_) } @n2));
 473                die "Cannot add files: $?\n" if $?;
 474        }
 475
 476        my $pid = open(C,"-|");
 477        die "Cannot fork: $!" unless defined $pid;
 478        unless($pid) {
 479                exec("git-write-tree");
 480                die "Cannot exec git-write-tree: $!\n";
 481        }
 482        chomp(my $tree = <C>);
 483        length($tree) == 40
 484                or die "Cannot get tree id ($tree): $!\n";
 485        close(C)
 486                or die "Error running git-write-tree: $?\n";
 487        print "Tree ID $tree\n" if $opt_v;
 488
 489        my $pr = IO::Pipe->new() or die "Cannot open pipe: $!\n";
 490        my $pw = IO::Pipe->new() or die "Cannot open pipe: $!\n";
 491        $pid = fork();
 492        die "Fork: $!\n" unless defined $pid;
 493        unless($pid) {
 494                $pr->writer();
 495                $pw->reader();
 496                open(OUT,">&STDOUT");
 497                dup2($pw->fileno(),0);
 498                dup2($pr->fileno(),1);
 499                $pr->close();
 500                $pw->close();
 501
 502                my @par = ();
 503                @par = ("-p",$rev) if defined $rev;
 504
 505                # loose detection of merges
 506                # based on the commit msg
 507                foreach my $rx (@mergerx) {
 508                        if ($message =~ $rx) {
 509                                my $mparent = $1;
 510                                if ($mparent eq 'HEAD') { $mparent = $opt_o };
 511                                if ( -e "$git_dir/refs/heads/$mparent") {
 512                                        $mparent = get_headref($mparent, $git_dir);
 513                                        push @par, '-p', $mparent;
 514                                        print OUT "Merge parent branch: $mparent\n" if $opt_v;
 515                                }
 516                        } 
 517                }
 518
 519                exec("env",
 520                        "GIT_AUTHOR_NAME=$author_name",
 521                        "GIT_AUTHOR_EMAIL=$author_email",
 522                        "GIT_AUTHOR_DATE=".strftime("+0000 %Y-%m-%d %H:%M:%S",gmtime($date)),
 523                        "GIT_COMMITTER_NAME=$author_name",
 524                        "GIT_COMMITTER_EMAIL=$author_email",
 525                        "GIT_COMMITTER_DATE=".strftime("+0000 %Y-%m-%d %H:%M:%S",gmtime($date)),
 526                        "git-commit-tree", $tree,@par);
 527                die "Cannot exec git-commit-tree: $!\n";
 528        }
 529        $pw->writer();
 530        $pr->reader();
 531
 532        $message =~ s/[\s\n]+\z//;
 533
 534        print $pw "$message\n"
 535                or die "Error writing to git-commit-tree: $!\n";
 536        $pw->close();
 537
 538        print "Committed change $revision:$branch ".strftime("%Y-%m-%d %H:%M:%S",gmtime($date)).")\n" if $opt_v;
 539        chomp(my $cid = <$pr>);
 540        length($cid) == 40
 541                or die "Cannot get commit id ($cid): $!\n";
 542        print "Commit ID $cid\n" if $opt_v;
 543        $pr->close();
 544
 545        waitpid($pid,0);
 546        die "Error running git-commit-tree: $?\n" if $?;
 547
 548        if(defined $dest) {
 549                print "Writing to refs/heads/$dest\n" if $opt_v;
 550                open(C,">$git_dir/refs/heads/$dest") and 
 551                print C ("$cid\n") and
 552                close(C)
 553                        or die "Cannot write branch $dest for update: $!\n";
 554        } else {
 555                print "... no known parent\n" if $opt_v;
 556        }
 557        $branches{$branch}{"LAST"} = $cid;
 558        $branches{$branch}{$revision} = $cid;
 559        $last_rev = $cid;
 560        print BRANCHES "$revision $branch $cid\n";
 561        print "DONE: $revision $dest $cid\n" if $opt_v;
 562
 563        if($tag) {
 564                my($in, $out) = ('','');
 565                $last_rev = "-" if %$changed_paths;
 566                # the tag was 'complex', i.e. did not refer to a "real" revision
 567                
 568                $tag =~ tr/_/\./ if $opt_u;
 569
 570                my $pid = open2($in, $out, 'git-mktag');
 571                print $out ("object $cid\n".
 572                    "type commit\n".
 573                    "tag $tag\n".
 574                    "tagger $author_name <$author_email>\n") and
 575                close($out)
 576                    or die "Cannot create tag object $tag: $!\n";
 577
 578                my $tagobj = <$in>;
 579                chomp $tagobj;
 580
 581                if ( !close($in) or waitpid($pid, 0) != $pid or
 582                                $? != 0 or $tagobj !~ /^[0123456789abcdef]{40}$/ ) {
 583                        die "Cannot create tag object $tag: $!\n";
 584                }
 585                
 586
 587                open(C,">$git_dir/refs/tags/$tag")
 588                        or die "Cannot create tag $tag: $!\n";
 589                print C "$tagobj\n"
 590                        or die "Cannot write tag $tag: $!\n";
 591                close(C)
 592                        or die "Cannot write tag $tag: $!\n";
 593
 594                print "Created tag '$tag' on '$branch'\n" if $opt_v;
 595        }
 596}
 597
 598my ($changed_paths, $revision, $author, $date, $message, $pool) = @_;
 599sub _commit_all {
 600        ($changed_paths, $revision, $author, $date, $message, $pool) = @_;
 601        my %p;
 602        while(my($path,$action) = each %$changed_paths) {
 603                $p{$path} = [ $action->action,$action->copyfrom_path, $action->copyfrom_rev ];
 604        }
 605        $changed_paths = \%p;
 606}
 607
 608sub commit_all {
 609        my %done;
 610        my @col;
 611        my $pref;
 612        my $branch;
 613
 614        while(my($path,$action) = each %$changed_paths) {
 615                ($branch,$path) = split_path($revision,$path);
 616                next if not defined $branch;
 617                $done{$branch}{$path} = $action;
 618        }
 619        while(($branch,$changed_paths) = each %done) {
 620                commit($branch, $changed_paths, $revision, $author, $date, $message);
 621        }
 622}
 623
 624while(++$current_rev < $svn->{'maxrev'}) {
 625        $svn->{'svn'}->get_log("/",$current_rev,$current_rev,$current_rev,1,1,\&_commit_all,"");
 626        commit_all();
 627}
 628
 629
 630unlink($git_index);
 631
 632if (defined $orig_git_index) {
 633        $ENV{GIT_INDEX_FILE} = $orig_git_index;
 634} else {
 635        delete $ENV{GIT_INDEX_FILE};
 636}
 637
 638# Now switch back to the branch we were in before all of this happened
 639if($orig_branch) {
 640        print "DONE\n" if $opt_v;
 641        system("cp","$git_dir/refs/heads/$opt_o","$git_dir/refs/heads/master")
 642                if $forward_master;
 643        unless ($opt_i) {
 644                system('git-read-tree', '-m', '-u', 'SVN2GIT_HEAD', 'HEAD');
 645                die "read-tree failed: $?\n" if $?;
 646        }
 647} else {
 648        $orig_branch = "master";
 649        print "DONE; creating $orig_branch branch\n" if $opt_v;
 650        system("cp","$git_dir/refs/heads/$opt_o","$git_dir/refs/heads/master")
 651                unless -f "$git_dir/refs/heads/master";
 652        unlink("$git_dir/HEAD");
 653        symlink("refs/heads/$orig_branch","$git_dir/HEAD");
 654        unless ($opt_i) {
 655                system('git checkout');
 656                die "checkout failed: $?\n" if $?;
 657        }
 658}
 659unlink("$git_dir/SVN2GIT_HEAD");
 660close(BRANCHES);