git-archimport.perlon commit archimport: add -D <depth> and -a switch (42f44b0)
   1#!/usr/bin/perl -w
   2#
   3# This tool is copyright (c) 2005, Martin Langhoff.
   4# It is released under the Gnu Public License, version 2.
   5#
   6# The basic idea is to walk the output of tla abrowse, 
   7# fetch the changesets and apply them. 
   8#
   9
  10=head1 Invocation
  11
  12    git-archimport [ -h ] [ -v ] [ -T ] [ -t tempdir ] <archive>/<branch> [ <archive>/<branch> ]
  13
  14Imports a project from one or more Arch repositories. It will follow branches
  15and repositories within the namespaces defined by the <archive/branch>
  16parameters suppplied. If it cannot find the remote branch a merge comes from
  17it will just import it as a regular commit. If it can find it, it will mark it 
  18as a merge whenever possible.
  19
  20See man (1) git-archimport for more details.
  21
  22=head1 TODO
  23
  24 - create tag objects instead of ref tags
  25 - audit shell-escaping of filenames
  26 - hide our private tags somewhere smarter
  27 - find a way to make "cat *patches | patch" safe even when patchfiles are missing newlines  
  28
  29=head1 Devel tricks
  30
  31Add print in front of the shell commands invoked via backticks. 
  32
  33=head1 Devel Notes
  34
  35There are several places where Arch and git terminology are intermixed
  36and potentially confused.
  37
  38The notion of a "branch" in git is approximately equivalent to
  39a "archive/category--branch--version" in Arch.  Also, it should be noted
  40that the "--branch" portion of "archive/category--branch--version" is really
  41optional in Arch although not many people (nor tools!) seem to know this.
  42This means that "archive/category--version" is also a valid "branch"
  43in git terms.
  44
  45We always refer to Arch names by their fully qualified variant (which
  46means the "archive" name is prefixed.
  47
  48For people unfamiliar with Arch, an "archive" is the term for "repository",
  49and can contain multiple, unrelated branches.
  50
  51=cut
  52
  53use strict;
  54use warnings;
  55use Getopt::Std;
  56use File::Temp qw(tempdir);
  57use File::Path qw(mkpath rmtree);
  58use File::Basename qw(basename dirname);
  59use Data::Dumper qw/ Dumper /;
  60use IPC::Open2;
  61
  62$SIG{'PIPE'}="IGNORE";
  63$ENV{'TZ'}="UTC";
  64
  65my $git_dir = $ENV{"GIT_DIR"} || ".git";
  66$ENV{"GIT_DIR"} = $git_dir;
  67my $ptag_dir = "$git_dir/archimport/tags";
  68
  69our($opt_h,$opt_v,$opt_T,$opt_t,$opt_D,$opt_a,$opt_o);
  70
  71sub usage() {
  72    print STDERR <<END;
  73Usage: ${\basename $0}     # fetch/update GIT from Arch
  74       [ -o ] [ -h ] [ -v ] [ -T ] [ -a ] [ -D depth  ] [ -t tempdir ]
  75       repository/arch-branch [ repository/arch-branch] ...
  76END
  77    exit(1);
  78}
  79
  80getopts("Thvat:D:") or usage();
  81usage if $opt_h;
  82
  83@ARGV >= 1 or usage();
  84# $arch_branches:
  85# values associated with keys:
  86#   =1 - Arch version / git 'branch' detected via abrowse on a limit
  87#   >1 - Arch version / git 'branch' of an auxilliary branch we've merged
  88my %arch_branches = map { $_ => 1 } @ARGV;
  89
  90$ENV{'TMPDIR'} = $opt_t if $opt_t; # $ENV{TMPDIR} will affect tempdir() calls:
  91my $tmp = tempdir('git-archimport-XXXXXX', TMPDIR => 1, CLEANUP => 1);
  92$opt_v && print "+ Using $tmp as temporary directory\n";
  93
  94my %reachable = ();             # Arch repositories we can access
  95my %unreachable = ();           # Arch repositories we can't access :<
  96my @psets  = ();                # the collection
  97my %psets  = ();                # the collection, by name
  98
  99my %rptags = ();                # my reverse private tags
 100                                # to map a SHA1 to a commitid
 101my $TLA = $ENV{'ARCH_CLIENT'} || 'tla';
 102
 103sub do_abrowse {
 104    my $stage = shift;
 105    while (my ($limit, $level) = each %arch_branches) {
 106        next unless $level == $stage;
 107        
 108        open ABROWSE, "$TLA abrowse -fkD --merges $limit |" 
 109                                or die "Problems with tla abrowse: $!";
 110    
 111        my %ps        = ();         # the current one
 112        my $lastseen  = '';
 113    
 114        while (<ABROWSE>) {
 115            chomp;
 116            
 117            # first record padded w 8 spaces
 118            if (s/^\s{8}\b//) {
 119                my ($id, $type) = split(m/\s+/, $_, 2);
 120
 121                my %last_ps;
 122                # store the record we just captured
 123                if (%ps && !exists $psets{ $ps{id} }) {
 124                    %last_ps = %ps; # break references
 125                    push (@psets, \%last_ps);
 126                    $psets{ $last_ps{id} } = \%last_ps;
 127                }
 128                
 129                my $branch = extract_versionname($id);
 130                %ps = ( id => $id, branch => $branch );
 131                if (%last_ps && ($last_ps{branch} eq $branch)) {
 132                    $ps{parent_id} = $last_ps{id};
 133                }
 134                
 135                $arch_branches{$branch} = 1;
 136                $lastseen = 'id';
 137
 138                # deal with types (should work with baz or tla):
 139                if ($type =~ m/\(.*changeset\)/) {
 140                    $ps{type} = 's';
 141                } elsif ($type =~ /\(.*import\)/) {
 142                    $ps{type} = 'i';
 143                } elsif ($type =~ m/\(tag.*\)/) {
 144                    $ps{type} = 't';
 145                    # read which revision we've tagged when we parse the log
 146                    #$ps{tag}  = $1;
 147                } else { 
 148                    warn "Unknown type $type";
 149                }
 150
 151                $arch_branches{$branch} = 1;
 152                $lastseen = 'id';
 153            } elsif (s/^\s{10}//) { 
 154                # 10 leading spaces or more 
 155                # indicate commit metadata
 156                
 157                # date
 158                if ($lastseen eq 'id' && m/^(\d{4}-\d\d-\d\d \d\d:\d\d:\d\d)/){
 159                    $ps{date}   = $1;
 160                    $lastseen = 'date';
 161                } elsif ($_ eq 'merges in:') {
 162                    $ps{merges} = [];
 163                    $lastseen = 'merges';
 164                } elsif ($lastseen eq 'merges' && s/^\s{2}//) {
 165                    my $id = $_;
 166                    push (@{$ps{merges}}, $id);
 167                   
 168                    # aggressive branch finding:
 169                    if ($opt_D) {
 170                        my $branch = extract_versionname($id);
 171                        my $repo = extract_reponame($branch);
 172                        
 173                        if (archive_reachable($repo) &&
 174                                !defined $arch_branches{$branch}) {
 175                            $arch_branches{$branch} = $stage + 1;
 176                        }
 177                    }
 178                } else {
 179                    warn "more metadata after merges!?: $_\n" unless /^\s*$/;
 180                }
 181            }
 182        }
 183
 184        if (%ps && !exists $psets{ $ps{id} }) {
 185            my %temp = %ps;         # break references
 186            if (@psets && $psets[$#psets]{branch} eq $ps{branch}) {
 187                $temp{parent_id} = $psets[$#psets]{id};
 188            }
 189            push (@psets, \%temp);  
 190            $psets{ $temp{id} } = \%temp;
 191        }    
 192        
 193        close ABROWSE or die "$TLA abrowse failed on $limit\n";
 194    }
 195}                               # end foreach $root
 196
 197do_abrowse(1);
 198my $depth = 2;
 199$opt_D ||= 0;
 200while ($depth <= $opt_D) {
 201    do_abrowse($depth);
 202    $depth++;
 203}
 204
 205## Order patches by time
 206# FIXME see if we can find a more optimal way to do this by graphing
 207# the ancestry data and walking it, that way we won't have to rely on
 208# client-supplied dates
 209@psets = sort {$a->{date}.$b->{id} cmp $b->{date}.$b->{id}} @psets;
 210
 211#print Dumper \@psets;
 212
 213##
 214## TODO cleanup irrelevant patches
 215##      and put an initial import
 216##      or a full tag
 217my $import = 0;
 218unless (-d $git_dir) { # initial import
 219    if ($psets[0]{type} eq 'i' || $psets[0]{type} eq 't') {
 220        print "Starting import from $psets[0]{id}\n";
 221        `git-init-db`;
 222        die $! if $?;
 223        $import = 1;
 224    } else {
 225        die "Need to start from an import or a tag -- cannot use $psets[0]{id}";
 226    }
 227} else {    # progressing an import
 228    # load the rptags
 229    opendir(DIR, $ptag_dir)
 230        || die "can't opendir: $!";
 231    while (my $file = readdir(DIR)) {
 232        # skip non-interesting-files
 233        next unless -f "$ptag_dir/$file";
 234   
 235        # convert first '--' to '/' from old git-archimport to use
 236        # as an archivename/c--b--v private tag
 237        if ($file !~ m!,!) {
 238            my $oldfile = $file;
 239            $file =~ s!--!,!;
 240            print STDERR "converting old tag $oldfile to $file\n";
 241            rename("$ptag_dir/$oldfile", "$ptag_dir/$file") or die $!;
 242        }
 243        my $sha = ptag($file);
 244        chomp $sha;
 245        $rptags{$sha} = $file;
 246    }
 247    closedir DIR;
 248}
 249
 250# process patchsets
 251# extract the Arch repository name (Arch "archive" in Arch-speak)
 252sub extract_reponame {
 253    my $fq_cvbr = shift; # archivename/[[[[category]branch]version]revision]
 254    return (split(/\//, $fq_cvbr))[0];
 255}
 256 
 257sub extract_versionname {
 258    my $name = shift;
 259    $name =~ s/--(?:patch|version(?:fix)?|base)-\d+$//;
 260    return $name;
 261}
 262
 263# convert a fully-qualified revision or version to a unique dirname:
 264#   normalperson@yhbt.net-05/mpd--uclinux--1--patch-2 
 265# becomes: normalperson@yhbt.net-05,mpd--uclinux--1
 266#
 267# the git notion of a branch is closer to
 268# archive/category--branch--version than archive/category--branch, so we
 269# use this to convert to git branch names.
 270# Also, keep archive names but replace '/' with ',' since it won't require
 271# subdirectories, and is safer than swapping '--' which could confuse
 272# reverse-mapping when dealing with bastard branches that
 273# are just archive/category--version  (no --branch)
 274sub tree_dirname {
 275    my $revision = shift;
 276    my $name = extract_versionname($revision);
 277    $name =~ s#/#,#;
 278    return $name;
 279}
 280
 281# old versions of git-archimport just use the <category--branch> part:
 282sub old_style_branchname {
 283    my $id = shift;
 284    my $ret = safe_pipe_capture($TLA,'parse-package-name','-p',$id);
 285    chomp $ret;
 286    return $ret;
 287}
 288
 289*git_branchname = $opt_o ? *old_style_branchname : *tree_dirname;
 290
 291# process patchsets
 292foreach my $ps (@psets) {
 293    $ps->{branch} = git_branchname($ps->{id});
 294
 295    #
 296    # ensure we have a clean state 
 297    # 
 298    if (`git-diff-files`) {
 299        die "Unclean tree when about to process $ps->{id} " .
 300            " - did we fail to commit cleanly before?";
 301    }
 302    die $! if $?;
 303
 304    #
 305    # skip commits already in repo
 306    #
 307    if (ptag($ps->{id})) {
 308      $opt_v && print " * Skipping already imported: $ps->{id}\n";
 309      next;
 310    }
 311
 312    print " * Starting to work on $ps->{id}\n";
 313
 314    # 
 315    # create the branch if needed
 316    #
 317    if ($ps->{type} eq 'i' && !$import) {
 318        die "Should not have more than one 'Initial import' per GIT import: $ps->{id}";
 319    }
 320
 321    unless ($import) { # skip for import
 322        if ( -e "$git_dir/refs/heads/$ps->{branch}") {
 323            # we know about this branch
 324            system('git-checkout',$ps->{branch});
 325        } else {
 326            # new branch! we need to verify a few things
 327            die "Branch on a non-tag!" unless $ps->{type} eq 't';
 328            my $branchpoint = ptag($ps->{tag});
 329            die "Tagging from unknown id unsupported: $ps->{tag}" 
 330                unless $branchpoint;
 331            
 332            # find where we are supposed to branch from
 333            system('git-checkout','-b',$ps->{branch},$branchpoint);
 334
 335            # If we trust Arch with the fact that this is just 
 336            # a tag, and it does not affect the state of the tree
 337            # then we just tag and move on
 338            tag($ps->{id}, $branchpoint);
 339            ptag($ps->{id}, $branchpoint);
 340            print " * Tagged $ps->{id} at $branchpoint\n";
 341            next;
 342        } 
 343        die $! if $?;
 344    } 
 345
 346    #
 347    # Apply the import/changeset/merge into the working tree
 348    # 
 349    if ($ps->{type} eq 'i' || $ps->{type} eq 't') {
 350        apply_import($ps) or die $!;
 351        $import=0;
 352    } elsif ($ps->{type} eq 's') {
 353        apply_cset($ps);
 354    }
 355
 356    #
 357    # prepare update git's index, based on what arch knows
 358    # about the pset, resolve parents, etc
 359    #
 360    my $tree;
 361    
 362    my $commitlog = safe_pipe_capture($TLA,'cat-archive-log',$ps->{id}); 
 363    die "Error in cat-archive-log: $!" if $?;
 364        
 365    # parselog will git-add/rm files
 366    # and generally prepare things for the commit
 367    # NOTE: parselog will shell-quote filenames! 
 368    my ($sum, $msg, $add, $del, $mod, $ren) = parselog($commitlog);
 369    my $logmessage = "$sum\n$msg";
 370
 371
 372    # imports don't give us good info
 373    # on added files. Shame on them
 374    if ($ps->{type} eq 'i' || $ps->{type} eq 't') { 
 375        `find . -type f -print0 | grep -zv '^./$git_dir' | xargs -0 -l100 git-update-index --add`;
 376        `git-ls-files --deleted -z | xargs --no-run-if-empty -0 -l100 git-update-index --remove`;
 377    }
 378
 379    if (@$add) {
 380        while (@$add) {
 381            my @slice = splice(@$add, 0, 100);
 382            my $slice = join(' ', @slice);          
 383            `git-update-index --add $slice`;
 384            die "Error in git-update-index --add: $!" if $?;
 385        }
 386    }
 387    if (@$del) {
 388        foreach my $file (@$del) {
 389            unlink $file or die "Problems deleting $file : $!";
 390        }
 391        while (@$del) {
 392            my @slice = splice(@$del, 0, 100);
 393            my $slice = join(' ', @slice);
 394            `git-update-index --remove $slice`;
 395            die "Error in git-update-index --remove: $!" if $?;
 396        }
 397    }
 398    if (@$ren) {                # renamed
 399        if (@$ren % 2) {
 400            die "Odd number of entries in rename!?";
 401        }
 402        ;
 403        while (@$ren) {
 404            my $from = pop @$ren;
 405            my $to   = pop @$ren;           
 406
 407            unless (-d dirname($to)) {
 408                mkpath(dirname($to)); # will die on err
 409            }
 410            #print "moving $from $to";
 411            `mv $from $to`;
 412            die "Error renaming $from $to : $!" if $?;
 413            `git-update-index --remove $from`;
 414            die "Error in git-update-index --remove: $!" if $?;
 415            `git-update-index --add $to`;
 416            die "Error in git-update-index --add: $!" if $?;
 417        }
 418
 419    }
 420    if (@$mod) {                # must be _after_ renames
 421        while (@$mod) {
 422            my @slice = splice(@$mod, 0, 100);
 423            my $slice = join(' ', @slice);
 424            `git-update-index $slice`;
 425            die "Error in git-update-index: $!" if $?;
 426        }
 427    }
 428
 429    # warn "errors when running git-update-index! $!";
 430    $tree = `git-write-tree`;
 431    die "cannot write tree $!" if $?;
 432    chomp $tree;
 433        
 434    
 435    #
 436    # Who's your daddy?
 437    #
 438    my @par;
 439    if ( -e "$git_dir/refs/heads/$ps->{branch}") {
 440        if (open HEAD, "<","$git_dir/refs/heads/$ps->{branch}") {
 441            my $p = <HEAD>;
 442            close HEAD;
 443            chomp $p;
 444            push @par, '-p', $p;
 445        } else { 
 446            if ($ps->{type} eq 's') {
 447                warn "Could not find the right head for the branch $ps->{branch}";
 448            }
 449        }
 450    }
 451    
 452    if ($ps->{merges}) {
 453        push @par, find_parents($ps);
 454    }
 455
 456    #    
 457    # Commit, tag and clean state
 458    #
 459    $ENV{TZ}                  = 'GMT';
 460    $ENV{GIT_AUTHOR_NAME}     = $ps->{author};
 461    $ENV{GIT_AUTHOR_EMAIL}    = $ps->{email};
 462    $ENV{GIT_AUTHOR_DATE}     = $ps->{date};
 463    $ENV{GIT_COMMITTER_NAME}  = $ps->{author};
 464    $ENV{GIT_COMMITTER_EMAIL} = $ps->{email};
 465    $ENV{GIT_COMMITTER_DATE}  = $ps->{date};
 466
 467    my ($pid, $commit_rh, $commit_wh);
 468    $commit_rh = 'commit_rh';
 469    $commit_wh = 'commit_wh';
 470    
 471    $pid = open2(*READER, *WRITER,'git-commit-tree',$tree,@par) 
 472        or die $!;
 473    print WRITER $logmessage;   # write
 474    close WRITER;
 475    my $commitid = <READER>;    # read
 476    chomp $commitid;
 477    close READER;
 478    waitpid $pid,0;             # close;
 479
 480    if (length $commitid != 40) {
 481        die "Something went wrong with the commit! $! $commitid";
 482    }
 483    #
 484    # Update the branch
 485    # 
 486    open  HEAD, ">","$git_dir/refs/heads/$ps->{branch}";
 487    print HEAD $commitid;
 488    close HEAD;
 489    system('git-update-ref', 'HEAD', "$ps->{branch}");
 490
 491    # tag accordingly
 492    ptag($ps->{id}, $commitid); # private tag
 493    if ($opt_T || $ps->{type} eq 't' || $ps->{type} eq 'i') {
 494        tag($ps->{id}, $commitid);
 495    }
 496    print " * Committed $ps->{id}\n";
 497    print "   + tree   $tree\n";
 498    print "   + commit $commitid\n";
 499    $opt_v && print "   + commit date is  $ps->{date} \n";
 500    $opt_v && print "   + parents:  ",join(' ',@par),"\n";
 501}
 502
 503sub apply_import {
 504    my $ps = shift;
 505    my $bname = git_branchname($ps->{id});
 506
 507    mkpath($tmp);
 508
 509    safe_pipe_capture($TLA,'get','-s','--no-pristine',$ps->{id},"$tmp/import");
 510    die "Cannot get import: $!" if $?;    
 511    system('rsync','-aI','--delete', '--exclude',$git_dir,
 512                '--exclude','.arch-ids','--exclude','{arch}',
 513                "$tmp/import/", './');
 514    die "Cannot rsync import:$!" if $?;
 515    
 516    rmtree("$tmp/import");
 517    die "Cannot remove tempdir: $!" if $?;
 518    
 519
 520    return 1;
 521}
 522
 523sub apply_cset {
 524    my $ps = shift;
 525
 526    mkpath($tmp);
 527
 528    # get the changeset
 529    safe_pipe_capture($TLA,'get-changeset',$ps->{id},"$tmp/changeset");
 530    die "Cannot get changeset: $!" if $?;
 531    
 532    # apply patches
 533    if (`find $tmp/changeset/patches -type f -name '*.patch'`) {
 534        # this can be sped up considerably by doing
 535        #    (find | xargs cat) | patch
 536        # but that cna get mucked up by patches
 537        # with missing trailing newlines or the standard 
 538        # 'missing newline' flag in the patch - possibly
 539        # produced with an old/buggy diff.
 540        # slow and safe, we invoke patch once per patchfile
 541        `find $tmp/changeset/patches -type f -name '*.patch' -print0 | grep -zv '{arch}' | xargs -iFILE -0 --no-run-if-empty patch -p1 --forward -iFILE`;
 542        die "Problem applying patches! $!" if $?;
 543    }
 544
 545    # apply changed binary files
 546    if (my @modified = `find $tmp/changeset/patches -type f -name '*.modified'`) {
 547        foreach my $mod (@modified) {
 548            chomp $mod;
 549            my $orig = $mod;
 550            $orig =~ s/\.modified$//; # lazy
 551            $orig =~ s!^\Q$tmp\E/changeset/patches/!!;
 552            #print "rsync -p '$mod' '$orig'";
 553            system('rsync','-p',$mod,"./$orig");
 554            die "Problem applying binary changes! $!" if $?;
 555        }
 556    }
 557
 558    # bring in new files
 559    system('rsync','-aI','--exclude',$git_dir,
 560                '--exclude','.arch-ids',
 561                '--exclude', '{arch}',
 562                "$tmp/changeset/new-files-archive/",'./');
 563
 564    # deleted files are hinted from the commitlog processing
 565
 566    rmtree("$tmp/changeset");
 567}
 568
 569
 570# =for reference
 571# A log entry looks like 
 572# Revision: moodle-org--moodle--1.3.3--patch-15
 573# Archive: arch-eduforge@catalyst.net.nz--2004
 574# Creator: Penny Leach <penny@catalyst.net.nz>
 575# Date: Wed May 25 14:15:34 NZST 2005
 576# Standard-date: 2005-05-25 02:15:34 GMT
 577# New-files: lang/de/.arch-ids/block_glossary_random.php.id
 578#     lang/de/.arch-ids/block_html.php.id
 579# New-directories: lang/de/help/questionnaire
 580#     lang/de/help/questionnaire/.arch-ids
 581# Renamed-files: .arch-ids/db_sears.sql.id db/.arch-ids/db_sears.sql.id
 582#    db_sears.sql db/db_sears.sql
 583# Removed-files: lang/be/docs/.arch-ids/release.html.id
 584#     lang/be/docs/.arch-ids/releaseold.html.id
 585# Modified-files: admin/cron.php admin/delete.php
 586#     admin/editor.html backup/lib.php backup/restore.php
 587# New-patches: arch-eduforge@catalyst.net.nz--2004/moodle-org--moodle--1.3.3--patch-15
 588# Summary: Updating to latest from MOODLE_14_STABLE (1.4.5+)
 589# Keywords:
 590#
 591# Updating yadda tadda tadda madda
 592sub parselog {
 593    my $log = shift;
 594    #print $log;
 595
 596    my (@add, @del, @mod, @ren, @kw, $sum, $msg );
 597
 598    if ($log =~ m/(?:\n|^)New-files:(.*?)(?=\n\w)/s ) {
 599        my $files = $1;
 600        @add = split(m/\s+/s, $files);
 601    }
 602       
 603    if ($log =~ m/(?:\n|^)Removed-files:(.*?)(?=\n\w)/s ) {
 604        my $files = $1;
 605        @del = split(m/\s+/s, $files);
 606    }
 607    
 608    if ($log =~ m/(?:\n|^)Modified-files:(.*?)(?=\n\w)/s ) {
 609        my $files = $1;
 610        @mod = split(m/\s+/s, $files);
 611    }
 612    
 613    if ($log =~ m/(?:\n|^)Renamed-files:(.*?)(?=\n\w)/s ) {
 614        my $files = $1;
 615        @ren = split(m/\s+/s, $files);
 616    }
 617
 618    $sum ='';
 619    if ($log =~ m/^Summary:(.+?)$/m ) {
 620        $sum = $1;
 621        $sum =~ s/^\s+//;
 622        $sum =~ s/\s+$//;
 623    }
 624
 625    $msg = '';
 626    if ($log =~ m/\n\n(.+)$/s) {
 627        $msg = $1;
 628        $msg =~ s/^\s+//;
 629        $msg =~ s/\s+$//;
 630    }
 631
 632
 633    # cleanup the arrays
 634    foreach my $ref ( (\@add, \@del, \@mod, \@ren) ) {
 635        my @tmp = ();
 636        while (my $t = pop @$ref) {
 637            next unless length ($t);
 638            next if $t =~ m!\{arch\}/!;
 639            next if $t =~ m!\.arch-ids/!;
 640            next if $t =~ m!\.arch-inventory$!;
 641           # tla cat-archive-log will give us filenames with spaces as file\(sp)name - why?
 642           # we can assume that any filename with \ indicates some pika escaping that we want to get rid of.
 643           if  ($t =~ /\\/ ){
 644               $t = (safe_pipe_capture($TLA,'escape','--unescaped',$t))[0];
 645           }
 646            push (@tmp, $t);
 647        }
 648        @$ref = @tmp;
 649    }
 650    
 651    #print Dumper [$sum, $msg, \@add, \@del, \@mod, \@ren]; 
 652    return       ($sum, $msg, \@add, \@del, \@mod, \@ren); 
 653}
 654
 655# write/read a tag
 656sub tag {
 657    my ($tag, $commit) = @_;
 658 
 659    if ($opt_o) {
 660        $tag =~ s|/|--|g;
 661    } else {
 662        # don't use subdirs for tags yet, it could screw up other porcelains
 663        $tag =~ s|/|,|g;
 664    }
 665    
 666    if ($commit) {
 667        open(C,">","$git_dir/refs/tags/$tag")
 668            or die "Cannot create tag $tag: $!\n";
 669        print C "$commit\n"
 670            or die "Cannot write tag $tag: $!\n";
 671        close(C)
 672            or die "Cannot write tag $tag: $!\n";
 673        print " * Created tag '$tag' on '$commit'\n" if $opt_v;
 674    } else {                    # read
 675        open(C,"<","$git_dir/refs/tags/$tag")
 676            or die "Cannot read tag $tag: $!\n";
 677        $commit = <C>;
 678        chomp $commit;
 679        die "Error reading tag $tag: $!\n" unless length $commit == 40;
 680        close(C)
 681            or die "Cannot read tag $tag: $!\n";
 682        return $commit;
 683    }
 684}
 685
 686# write/read a private tag
 687# reads fail softly if the tag isn't there
 688sub ptag {
 689    my ($tag, $commit) = @_;
 690
 691    # don't use subdirs for tags yet, it could screw up other porcelains
 692    $tag =~ s|/|,|g; 
 693    
 694    my $tag_file = "$ptag_dir/$tag";
 695    my $tag_branch_dir = dirname($tag_file);
 696    mkpath($tag_branch_dir) unless (-d $tag_branch_dir);
 697
 698    if ($commit) {              # write
 699        open(C,">",$tag_file)
 700            or die "Cannot create tag $tag: $!\n";
 701        print C "$commit\n"
 702            or die "Cannot write tag $tag: $!\n";
 703        close(C)
 704            or die "Cannot write tag $tag: $!\n";
 705        $rptags{$commit} = $tag 
 706            unless $tag =~ m/--base-0$/;
 707    } else {                    # read
 708        # if the tag isn't there, return 0
 709        unless ( -s $tag_file) {
 710            return 0;
 711        }
 712        open(C,"<",$tag_file)
 713            or die "Cannot read tag $tag: $!\n";
 714        $commit = <C>;
 715        chomp $commit;
 716        die "Error reading tag $tag: $!\n" unless length $commit == 40;
 717        close(C)
 718            or die "Cannot read tag $tag: $!\n";
 719        unless (defined $rptags{$commit}) {
 720            $rptags{$commit} = $tag;
 721        }
 722        return $commit;
 723    }
 724}
 725
 726sub find_parents {
 727    #
 728    # Identify what branches are merging into me
 729    # and whether we are fully merged
 730    # git-merge-base <headsha> <headsha> should tell
 731    # me what the base of the merge should be 
 732    #
 733    my $ps = shift;
 734
 735    my %branches; # holds an arrayref per branch
 736                  # the arrayref contains a list of
 737                  # merged patches between the base
 738                  # of the merge and the current head
 739
 740    my @parents;  # parents found for this commit
 741
 742    # simple loop to split the merges
 743    # per branch
 744    foreach my $merge (@{$ps->{merges}}) {
 745        my $branch = git_branchname($merge);
 746        unless (defined $branches{$branch} ){
 747            $branches{$branch} = [];
 748        }
 749        push @{$branches{$branch}}, $merge;
 750    }
 751
 752    #
 753    # foreach branch find a merge base and walk it to the 
 754    # head where we are, collecting the merged patchsets that
 755    # Arch has recorded. Keep that in @have
 756    # Compare that with the commits on the other branch
 757    # between merge-base and the tip of the branch (@need)
 758    # and see if we have a series of consecutive patches
 759    # starting from the merge base. The tip of the series
 760    # of consecutive patches merged is our new parent for 
 761    # that branch.
 762    #
 763    foreach my $branch (keys %branches) {
 764
 765        # check that we actually know about the branch
 766        next unless -e "$git_dir/refs/heads/$branch";
 767
 768        my $mergebase = `git-merge-base $branch $ps->{branch}`;
 769        if ($?) { 
 770            # Don't die here, Arch supports one-way cherry-picking
 771            # between branches with no common base (or any relationship
 772            # at all beforehand)
 773            warn "Cannot find merge base for $branch and $ps->{branch}";
 774            next;
 775        }
 776        chomp $mergebase;
 777
 778        # now walk up to the mergepoint collecting what patches we have
 779        my $branchtip = git_rev_parse($ps->{branch});
 780        my @ancestors = `git-rev-list --merge-order $branchtip ^$mergebase`;
 781        my %have; # collected merges this branch has
 782        foreach my $merge (@{$ps->{merges}}) {
 783            $have{$merge} = 1;
 784        }
 785        my %ancestorshave;
 786        foreach my $par (@ancestors) {
 787            $par = commitid2pset($par);
 788            if (defined $par->{merges}) {
 789                foreach my $merge (@{$par->{merges}}) {
 790                    $ancestorshave{$merge}=1;
 791                }
 792            }
 793        }
 794        # print "++++ Merges in $ps->{id} are....\n";
 795        # my @have = sort keys %have;   print Dumper(\@have);
 796
 797        # merge what we have with what ancestors have
 798        %have = (%have, %ancestorshave);
 799
 800        # see what the remote branch has - these are the merges we 
 801        # will want to have in a consecutive series from the mergebase
 802        my $otherbranchtip = git_rev_parse($branch);
 803        my @needraw = `git-rev-list --merge-order $otherbranchtip ^$mergebase`;
 804        my @need;
 805        foreach my $needps (@needraw) {         # get the psets
 806            $needps = commitid2pset($needps);
 807            # git-rev-list will also
 808            # list commits merged in via earlier 
 809            # merges. we are only interested in commits
 810            # from the branch we're looking at
 811            if ($branch eq $needps->{branch}) {
 812                push @need, $needps->{id};
 813            }
 814        }
 815
 816        # print "++++ Merges from $branch we want are....\n";
 817        # print Dumper(\@need);
 818
 819        my $newparent;
 820        while (my $needed_commit = pop @need) {
 821            if ($have{$needed_commit}) {
 822                $newparent = $needed_commit;
 823            } else {
 824                last; # break out of the while
 825            }
 826        }
 827        if ($newparent) {
 828            push @parents, $newparent;
 829        }
 830
 831
 832    } # end foreach branch
 833
 834    # prune redundant parents
 835    my %parents;
 836    foreach my $p (@parents) {
 837        $parents{$p} = 1;
 838    }
 839    foreach my $p (@parents) {
 840        next unless exists $psets{$p}{merges};
 841        next unless ref    $psets{$p}{merges};
 842        my @merges = @{$psets{$p}{merges}};
 843        foreach my $merge (@merges) {
 844            if ($parents{$merge}) { 
 845                delete $parents{$merge};
 846            }
 847        }
 848    }
 849
 850    @parents = ();
 851    foreach (keys %parents) {
 852        push @parents, '-p', ptag($_);
 853    }
 854    return @parents;
 855}
 856
 857sub git_rev_parse {
 858    my $name = shift;
 859    my $val  = `git-rev-parse $name`;
 860    die "Error: git-rev-parse $name" if $?;
 861    chomp $val;
 862    return $val;
 863}
 864
 865# resolve a SHA1 to a known patchset
 866sub commitid2pset {
 867    my $commitid = shift;
 868    chomp $commitid;
 869    my $name = $rptags{$commitid} 
 870        || die "Cannot find reverse tag mapping for $commitid";
 871    $name =~ s|,|/|;
 872    my $ps   = $psets{$name} 
 873        || (print Dumper(sort keys %psets)) && die "Cannot find patchset for $name";
 874    return $ps;
 875}
 876
 877
 878# an alterative to `command` that allows input to be passed as an array
 879# to work around shell problems with weird characters in arguments
 880sub safe_pipe_capture {
 881    my @output;
 882    if (my $pid = open my $child, '-|') {
 883        @output = (<$child>);
 884        close $child or die join(' ',@_).": $! $?";
 885    } else {
 886        exec(@_) or die $?; # exec() can fail the executable can't be found
 887    }
 888    return wantarray ? @output : join('',@output);
 889}
 890
 891# `tla logs -rf -d <dir> | head -n1` or `baz tree-id <dir>`
 892sub arch_tree_id {
 893    my $dir = shift;
 894    chomp( my $ret = (safe_pipe_capture($TLA,'logs','-rf','-d',$dir))[0] );
 895    return $ret;
 896}
 897
 898sub archive_reachable {
 899    my $archive = shift;
 900    return 1 if $reachable{$archive};
 901    return 0 if $unreachable{$archive};
 902    
 903    if (system "$TLA whereis-archive $archive >/dev/null") {
 904        if ($opt_a && (system($TLA,'register-archive',
 905                      "http://mirrors.sourcecontrol.net/$archive") == 0)) {
 906            $reachable{$archive} = 1;
 907            return 1;
 908        }
 909        print STDERR "Archive is unreachable: $archive\n";
 910        $unreachable{$archive} = 1;
 911        return 0;
 912    } else {
 913        $reachable{$archive} = 1;
 914        return 1;
 915    }
 916}
 917