builtin / fast-export.con commit fast-export: do not copy from modified file (b3e8ca8)
   1/*
   2 * "git fast-export" builtin command
   3 *
   4 * Copyright (C) 2007 Johannes E. Schindelin
   5 */
   6#include "builtin.h"
   7#include "cache.h"
   8#include "refs.h"
   9#include "commit.h"
  10#include "object.h"
  11#include "tag.h"
  12#include "diff.h"
  13#include "diffcore.h"
  14#include "log-tree.h"
  15#include "revision.h"
  16#include "decorate.h"
  17#include "string-list.h"
  18#include "utf8.h"
  19#include "parse-options.h"
  20#include "quote.h"
  21#include "remote.h"
  22#include "blob.h"
  23
  24static const char *fast_export_usage[] = {
  25        N_("git fast-export [rev-list-opts]"),
  26        NULL
  27};
  28
  29static int progress;
  30static enum { ABORT, VERBATIM, WARN, WARN_STRIP, STRIP } signed_tag_mode = ABORT;
  31static enum { ERROR, DROP, REWRITE } tag_of_filtered_mode = ERROR;
  32static int fake_missing_tagger;
  33static int use_done_feature;
  34static int no_data;
  35static int full_tree;
  36static struct string_list extra_refs = STRING_LIST_INIT_NODUP;
  37static struct refspec *refspecs;
  38static int refspecs_nr;
  39static int anonymize;
  40
  41static int parse_opt_signed_tag_mode(const struct option *opt,
  42                                     const char *arg, int unset)
  43{
  44        if (unset || !strcmp(arg, "abort"))
  45                signed_tag_mode = ABORT;
  46        else if (!strcmp(arg, "verbatim") || !strcmp(arg, "ignore"))
  47                signed_tag_mode = VERBATIM;
  48        else if (!strcmp(arg, "warn"))
  49                signed_tag_mode = WARN;
  50        else if (!strcmp(arg, "warn-strip"))
  51                signed_tag_mode = WARN_STRIP;
  52        else if (!strcmp(arg, "strip"))
  53                signed_tag_mode = STRIP;
  54        else
  55                return error("Unknown signed-tags mode: %s", arg);
  56        return 0;
  57}
  58
  59static int parse_opt_tag_of_filtered_mode(const struct option *opt,
  60                                          const char *arg, int unset)
  61{
  62        if (unset || !strcmp(arg, "abort"))
  63                tag_of_filtered_mode = ERROR;
  64        else if (!strcmp(arg, "drop"))
  65                tag_of_filtered_mode = DROP;
  66        else if (!strcmp(arg, "rewrite"))
  67                tag_of_filtered_mode = REWRITE;
  68        else
  69                return error("Unknown tag-of-filtered mode: %s", arg);
  70        return 0;
  71}
  72
  73static struct decoration idnums;
  74static uint32_t last_idnum;
  75
  76static int has_unshown_parent(struct commit *commit)
  77{
  78        struct commit_list *parent;
  79
  80        for (parent = commit->parents; parent; parent = parent->next)
  81                if (!(parent->item->object.flags & SHOWN) &&
  82                    !(parent->item->object.flags & UNINTERESTING))
  83                        return 1;
  84        return 0;
  85}
  86
  87struct anonymized_entry {
  88        struct hashmap_entry hash;
  89        const char *orig;
  90        size_t orig_len;
  91        const char *anon;
  92        size_t anon_len;
  93};
  94
  95static int anonymized_entry_cmp(const void *va, const void *vb,
  96                                const void *data)
  97{
  98        const struct anonymized_entry *a = va, *b = vb;
  99        return a->orig_len != b->orig_len ||
 100                memcmp(a->orig, b->orig, a->orig_len);
 101}
 102
 103/*
 104 * Basically keep a cache of X->Y so that we can repeatedly replace
 105 * the same anonymized string with another. The actual generation
 106 * is farmed out to the generate function.
 107 */
 108static const void *anonymize_mem(struct hashmap *map,
 109                                 void *(*generate)(const void *, size_t *),
 110                                 const void *orig, size_t *len)
 111{
 112        struct anonymized_entry key, *ret;
 113
 114        if (!map->cmpfn)
 115                hashmap_init(map, anonymized_entry_cmp, 0);
 116
 117        hashmap_entry_init(&key, memhash(orig, *len));
 118        key.orig = orig;
 119        key.orig_len = *len;
 120        ret = hashmap_get(map, &key, NULL);
 121
 122        if (!ret) {
 123                ret = xmalloc(sizeof(*ret));
 124                hashmap_entry_init(&ret->hash, key.hash.hash);
 125                ret->orig = xstrdup(orig);
 126                ret->orig_len = *len;
 127                ret->anon = generate(orig, len);
 128                ret->anon_len = *len;
 129                hashmap_put(map, ret);
 130        }
 131
 132        *len = ret->anon_len;
 133        return ret->anon;
 134}
 135
 136/*
 137 * We anonymize each component of a path individually,
 138 * so that paths a/b and a/c will share a common root.
 139 * The paths are cached via anonymize_mem so that repeated
 140 * lookups for "a" will yield the same value.
 141 */
 142static void anonymize_path(struct strbuf *out, const char *path,
 143                           struct hashmap *map,
 144                           void *(*generate)(const void *, size_t *))
 145{
 146        while (*path) {
 147                const char *end_of_component = strchrnul(path, '/');
 148                size_t len = end_of_component - path;
 149                const char *c = anonymize_mem(map, generate, path, &len);
 150                strbuf_add(out, c, len);
 151                path = end_of_component;
 152                if (*path)
 153                        strbuf_addch(out, *path++);
 154        }
 155}
 156
 157/* Since intptr_t is C99, we do not use it here */
 158static inline uint32_t *mark_to_ptr(uint32_t mark)
 159{
 160        return ((uint32_t *)NULL) + mark;
 161}
 162
 163static inline uint32_t ptr_to_mark(void * mark)
 164{
 165        return (uint32_t *)mark - (uint32_t *)NULL;
 166}
 167
 168static inline void mark_object(struct object *object, uint32_t mark)
 169{
 170        add_decoration(&idnums, object, mark_to_ptr(mark));
 171}
 172
 173static inline void mark_next_object(struct object *object)
 174{
 175        mark_object(object, ++last_idnum);
 176}
 177
 178static int get_object_mark(struct object *object)
 179{
 180        void *decoration = lookup_decoration(&idnums, object);
 181        if (!decoration)
 182                return 0;
 183        return ptr_to_mark(decoration);
 184}
 185
 186static void show_progress(void)
 187{
 188        static int counter = 0;
 189        if (!progress)
 190                return;
 191        if ((++counter % progress) == 0)
 192                printf("progress %d objects\n", counter);
 193}
 194
 195/*
 196 * Ideally we would want some transformation of the blob data here
 197 * that is unreversible, but would still be the same size and have
 198 * the same data relationship to other blobs (so that we get the same
 199 * delta and packing behavior as the original). But the first and last
 200 * requirements there are probably mutually exclusive, so let's take
 201 * the easy way out for now, and just generate arbitrary content.
 202 *
 203 * There's no need to cache this result with anonymize_mem, since
 204 * we already handle blob content caching with marks.
 205 */
 206static char *anonymize_blob(unsigned long *size)
 207{
 208        static int counter;
 209        struct strbuf out = STRBUF_INIT;
 210        strbuf_addf(&out, "anonymous blob %d", counter++);
 211        *size = out.len;
 212        return strbuf_detach(&out, NULL);
 213}
 214
 215static void export_blob(const unsigned char *sha1)
 216{
 217        unsigned long size;
 218        enum object_type type;
 219        char *buf;
 220        struct object *object;
 221        int eaten;
 222
 223        if (no_data)
 224                return;
 225
 226        if (is_null_sha1(sha1))
 227                return;
 228
 229        object = lookup_object(sha1);
 230        if (object && object->flags & SHOWN)
 231                return;
 232
 233        if (anonymize) {
 234                buf = anonymize_blob(&size);
 235                object = (struct object *)lookup_blob(sha1);
 236                eaten = 0;
 237        } else {
 238                buf = read_sha1_file(sha1, &type, &size);
 239                if (!buf)
 240                        die ("Could not read blob %s", sha1_to_hex(sha1));
 241                if (check_sha1_signature(sha1, buf, size, typename(type)) < 0)
 242                        die("sha1 mismatch in blob %s", sha1_to_hex(sha1));
 243                object = parse_object_buffer(sha1, type, size, buf, &eaten);
 244        }
 245
 246        if (!object)
 247                die("Could not read blob %s", sha1_to_hex(sha1));
 248
 249        mark_next_object(object);
 250
 251        printf("blob\nmark :%"PRIu32"\ndata %lu\n", last_idnum, size);
 252        if (size && fwrite(buf, size, 1, stdout) != 1)
 253                die_errno ("Could not write blob '%s'", sha1_to_hex(sha1));
 254        printf("\n");
 255
 256        show_progress();
 257
 258        object->flags |= SHOWN;
 259        if (!eaten)
 260                free(buf);
 261}
 262
 263static int depth_first(const void *a_, const void *b_)
 264{
 265        const struct diff_filepair *a = *((const struct diff_filepair **)a_);
 266        const struct diff_filepair *b = *((const struct diff_filepair **)b_);
 267        const char *name_a, *name_b;
 268        int len_a, len_b, len;
 269        int cmp;
 270
 271        name_a = a->one ? a->one->path : a->two->path;
 272        name_b = b->one ? b->one->path : b->two->path;
 273
 274        len_a = strlen(name_a);
 275        len_b = strlen(name_b);
 276        len = (len_a < len_b) ? len_a : len_b;
 277
 278        /* strcmp will sort 'd' before 'd/e', we want 'd/e' before 'd' */
 279        cmp = memcmp(name_a, name_b, len);
 280        if (cmp)
 281                return cmp;
 282        cmp = len_b - len_a;
 283        if (cmp)
 284                return cmp;
 285        /*
 286         * Move 'R'ename entries last so that all references of the file
 287         * appear in the output before it is renamed (e.g., when a file
 288         * was copied and renamed in the same commit).
 289         */
 290        return (a->status == 'R') - (b->status == 'R');
 291}
 292
 293static void print_path_1(const char *path)
 294{
 295        int need_quote = quote_c_style(path, NULL, NULL, 0);
 296        if (need_quote)
 297                quote_c_style(path, NULL, stdout, 0);
 298        else if (strchr(path, ' '))
 299                printf("\"%s\"", path);
 300        else
 301                printf("%s", path);
 302}
 303
 304static void *anonymize_path_component(const void *path, size_t *len)
 305{
 306        static int counter;
 307        struct strbuf out = STRBUF_INIT;
 308        strbuf_addf(&out, "path%d", counter++);
 309        return strbuf_detach(&out, len);
 310}
 311
 312static void print_path(const char *path)
 313{
 314        if (!anonymize)
 315                print_path_1(path);
 316        else {
 317                static struct hashmap paths;
 318                static struct strbuf anon = STRBUF_INIT;
 319
 320                anonymize_path(&anon, path, &paths, anonymize_path_component);
 321                print_path_1(anon.buf);
 322                strbuf_reset(&anon);
 323        }
 324}
 325
 326static void *generate_fake_sha1(const void *old, size_t *len)
 327{
 328        static uint32_t counter = 1; /* avoid null sha1 */
 329        unsigned char *out = xcalloc(20, 1);
 330        put_be32(out + 16, counter++);
 331        return out;
 332}
 333
 334static const unsigned char *anonymize_sha1(const unsigned char *sha1)
 335{
 336        static struct hashmap sha1s;
 337        size_t len = 20;
 338        return anonymize_mem(&sha1s, generate_fake_sha1, sha1, &len);
 339}
 340
 341static void show_filemodify(struct diff_queue_struct *q,
 342                            struct diff_options *options, void *data)
 343{
 344        int i;
 345        struct string_list *changed = data;
 346
 347        /*
 348         * Handle files below a directory first, in case they are all deleted
 349         * and the directory changes to a file or symlink.
 350         */
 351        QSORT(q->queue, q->nr, depth_first);
 352
 353        for (i = 0; i < q->nr; i++) {
 354                struct diff_filespec *ospec = q->queue[i]->one;
 355                struct diff_filespec *spec = q->queue[i]->two;
 356
 357                switch (q->queue[i]->status) {
 358                case DIFF_STATUS_DELETED:
 359                        printf("D ");
 360                        print_path(spec->path);
 361                        string_list_insert(changed, spec->path);
 362                        putchar('\n');
 363                        break;
 364
 365                case DIFF_STATUS_COPIED:
 366                case DIFF_STATUS_RENAMED:
 367                        /*
 368                         * If a change in the file corresponding to ospec->path
 369                         * has been observed, we cannot trust its contents
 370                         * because the diff is calculated based on the prior
 371                         * contents, not the current contents.  So, declare a
 372                         * copy or rename only if there was no change observed.
 373                         */
 374                        if (!string_list_has_string(changed, ospec->path)) {
 375                                printf("%c ", q->queue[i]->status);
 376                                print_path(ospec->path);
 377                                putchar(' ');
 378                                print_path(spec->path);
 379                                string_list_insert(changed, spec->path);
 380                                putchar('\n');
 381
 382                                if (!oidcmp(&ospec->oid, &spec->oid) &&
 383                                    ospec->mode == spec->mode)
 384                                        break;
 385                        }
 386                        /* fallthrough */
 387
 388                case DIFF_STATUS_TYPE_CHANGED:
 389                case DIFF_STATUS_MODIFIED:
 390                case DIFF_STATUS_ADDED:
 391                        /*
 392                         * Links refer to objects in another repositories;
 393                         * output the SHA-1 verbatim.
 394                         */
 395                        if (no_data || S_ISGITLINK(spec->mode))
 396                                printf("M %06o %s ", spec->mode,
 397                                       sha1_to_hex(anonymize ?
 398                                                   anonymize_sha1(spec->oid.hash) :
 399                                                   spec->oid.hash));
 400                        else {
 401                                struct object *object = lookup_object(spec->oid.hash);
 402                                printf("M %06o :%d ", spec->mode,
 403                                       get_object_mark(object));
 404                        }
 405                        print_path(spec->path);
 406                        string_list_insert(changed, spec->path);
 407                        putchar('\n');
 408                        break;
 409
 410                default:
 411                        die("Unexpected comparison status '%c' for %s, %s",
 412                                q->queue[i]->status,
 413                                ospec->path ? ospec->path : "none",
 414                                spec->path ? spec->path : "none");
 415                }
 416        }
 417}
 418
 419static const char *find_encoding(const char *begin, const char *end)
 420{
 421        const char *needle = "\nencoding ";
 422        char *bol, *eol;
 423
 424        bol = memmem(begin, end ? end - begin : strlen(begin),
 425                     needle, strlen(needle));
 426        if (!bol)
 427                return git_commit_encoding;
 428        bol += strlen(needle);
 429        eol = strchrnul(bol, '\n');
 430        *eol = '\0';
 431        return bol;
 432}
 433
 434static void *anonymize_ref_component(const void *old, size_t *len)
 435{
 436        static int counter;
 437        struct strbuf out = STRBUF_INIT;
 438        strbuf_addf(&out, "ref%d", counter++);
 439        return strbuf_detach(&out, len);
 440}
 441
 442static const char *anonymize_refname(const char *refname)
 443{
 444        /*
 445         * If any of these prefixes is found, we will leave it intact
 446         * so that tags remain tags and so forth.
 447         */
 448        static const char *prefixes[] = {
 449                "refs/heads/",
 450                "refs/tags/",
 451                "refs/remotes/",
 452                "refs/"
 453        };
 454        static struct hashmap refs;
 455        static struct strbuf anon = STRBUF_INIT;
 456        int i;
 457
 458        /*
 459         * We also leave "master" as a special case, since it does not reveal
 460         * anything interesting.
 461         */
 462        if (!strcmp(refname, "refs/heads/master"))
 463                return refname;
 464
 465        strbuf_reset(&anon);
 466        for (i = 0; i < ARRAY_SIZE(prefixes); i++) {
 467                if (skip_prefix(refname, prefixes[i], &refname)) {
 468                        strbuf_addstr(&anon, prefixes[i]);
 469                        break;
 470                }
 471        }
 472
 473        anonymize_path(&anon, refname, &refs, anonymize_ref_component);
 474        return anon.buf;
 475}
 476
 477/*
 478 * We do not even bother to cache commit messages, as they are unlikely
 479 * to be repeated verbatim, and it is not that interesting when they are.
 480 */
 481static char *anonymize_commit_message(const char *old)
 482{
 483        static int counter;
 484        return xstrfmt("subject %d\n\nbody\n", counter++);
 485}
 486
 487static struct hashmap idents;
 488static void *anonymize_ident(const void *old, size_t *len)
 489{
 490        static int counter;
 491        struct strbuf out = STRBUF_INIT;
 492        strbuf_addf(&out, "User %d <user%d@example.com>", counter, counter);
 493        counter++;
 494        return strbuf_detach(&out, len);
 495}
 496
 497/*
 498 * Our strategy here is to anonymize the names and email addresses,
 499 * but keep timestamps intact, as they influence things like traversal
 500 * order (and by themselves should not be too revealing).
 501 */
 502static void anonymize_ident_line(const char **beg, const char **end)
 503{
 504        static struct strbuf buffers[] = { STRBUF_INIT, STRBUF_INIT };
 505        static unsigned which_buffer;
 506
 507        struct strbuf *out;
 508        struct ident_split split;
 509        const char *end_of_header;
 510
 511        out = &buffers[which_buffer++];
 512        which_buffer %= ARRAY_SIZE(buffers);
 513        strbuf_reset(out);
 514
 515        /* skip "committer", "author", "tagger", etc */
 516        end_of_header = strchr(*beg, ' ');
 517        if (!end_of_header)
 518                die("BUG: malformed line fed to anonymize_ident_line: %.*s",
 519                    (int)(*end - *beg), *beg);
 520        end_of_header++;
 521        strbuf_add(out, *beg, end_of_header - *beg);
 522
 523        if (!split_ident_line(&split, end_of_header, *end - end_of_header) &&
 524            split.date_begin) {
 525                const char *ident;
 526                size_t len;
 527
 528                len = split.mail_end - split.name_begin;
 529                ident = anonymize_mem(&idents, anonymize_ident,
 530                                      split.name_begin, &len);
 531                strbuf_add(out, ident, len);
 532                strbuf_addch(out, ' ');
 533                strbuf_add(out, split.date_begin, split.tz_end - split.date_begin);
 534        } else {
 535                strbuf_addstr(out, "Malformed Ident <malformed@example.com> 0 -0000");
 536        }
 537
 538        *beg = out->buf;
 539        *end = out->buf + out->len;
 540}
 541
 542static void handle_commit(struct commit *commit, struct rev_info *rev,
 543                          struct string_list *paths_of_changed_objects)
 544{
 545        int saved_output_format = rev->diffopt.output_format;
 546        const char *commit_buffer;
 547        const char *author, *author_end, *committer, *committer_end;
 548        const char *encoding, *message;
 549        char *reencoded = NULL;
 550        struct commit_list *p;
 551        const char *refname;
 552        int i;
 553
 554        rev->diffopt.output_format = DIFF_FORMAT_CALLBACK;
 555
 556        parse_commit_or_die(commit);
 557        commit_buffer = get_commit_buffer(commit, NULL);
 558        author = strstr(commit_buffer, "\nauthor ");
 559        if (!author)
 560                die ("Could not find author in commit %s",
 561                     oid_to_hex(&commit->object.oid));
 562        author++;
 563        author_end = strchrnul(author, '\n');
 564        committer = strstr(author_end, "\ncommitter ");
 565        if (!committer)
 566                die ("Could not find committer in commit %s",
 567                     oid_to_hex(&commit->object.oid));
 568        committer++;
 569        committer_end = strchrnul(committer, '\n');
 570        message = strstr(committer_end, "\n\n");
 571        encoding = find_encoding(committer_end, message);
 572        if (message)
 573                message += 2;
 574
 575        if (commit->parents &&
 576            get_object_mark(&commit->parents->item->object) != 0 &&
 577            !full_tree) {
 578                parse_commit_or_die(commit->parents->item);
 579                diff_tree_sha1(commit->parents->item->tree->object.oid.hash,
 580                               commit->tree->object.oid.hash, "", &rev->diffopt);
 581        }
 582        else
 583                diff_root_tree_sha1(commit->tree->object.oid.hash,
 584                                    "", &rev->diffopt);
 585
 586        /* Export the referenced blobs, and remember the marks. */
 587        for (i = 0; i < diff_queued_diff.nr; i++)
 588                if (!S_ISGITLINK(diff_queued_diff.queue[i]->two->mode))
 589                        export_blob(diff_queued_diff.queue[i]->two->oid.hash);
 590
 591        refname = commit->util;
 592        if (anonymize) {
 593                refname = anonymize_refname(refname);
 594                anonymize_ident_line(&committer, &committer_end);
 595                anonymize_ident_line(&author, &author_end);
 596        }
 597
 598        mark_next_object(&commit->object);
 599        if (anonymize)
 600                reencoded = anonymize_commit_message(message);
 601        else if (!is_encoding_utf8(encoding))
 602                reencoded = reencode_string(message, "UTF-8", encoding);
 603        if (!commit->parents)
 604                printf("reset %s\n", refname);
 605        printf("commit %s\nmark :%"PRIu32"\n%.*s\n%.*s\ndata %u\n%s",
 606               refname, last_idnum,
 607               (int)(author_end - author), author,
 608               (int)(committer_end - committer), committer,
 609               (unsigned)(reencoded
 610                          ? strlen(reencoded) : message
 611                          ? strlen(message) : 0),
 612               reencoded ? reencoded : message ? message : "");
 613        free(reencoded);
 614        unuse_commit_buffer(commit, commit_buffer);
 615
 616        for (i = 0, p = commit->parents; p; p = p->next) {
 617                int mark = get_object_mark(&p->item->object);
 618                if (!mark)
 619                        continue;
 620                if (i == 0)
 621                        printf("from :%d\n", mark);
 622                else
 623                        printf("merge :%d\n", mark);
 624                i++;
 625        }
 626
 627        if (full_tree)
 628                printf("deleteall\n");
 629        log_tree_diff_flush(rev);
 630        string_list_clear(paths_of_changed_objects, 0);
 631        rev->diffopt.output_format = saved_output_format;
 632
 633        printf("\n");
 634
 635        show_progress();
 636}
 637
 638static void *anonymize_tag(const void *old, size_t *len)
 639{
 640        static int counter;
 641        struct strbuf out = STRBUF_INIT;
 642        strbuf_addf(&out, "tag message %d", counter++);
 643        return strbuf_detach(&out, len);
 644}
 645
 646static void handle_tail(struct object_array *commits, struct rev_info *revs,
 647                        struct string_list *paths_of_changed_objects)
 648{
 649        struct commit *commit;
 650        while (commits->nr) {
 651                commit = (struct commit *)commits->objects[commits->nr - 1].item;
 652                if (has_unshown_parent(commit))
 653                        return;
 654                handle_commit(commit, revs, paths_of_changed_objects);
 655                commits->nr--;
 656        }
 657}
 658
 659static void handle_tag(const char *name, struct tag *tag)
 660{
 661        unsigned long size;
 662        enum object_type type;
 663        char *buf;
 664        const char *tagger, *tagger_end, *message;
 665        size_t message_size = 0;
 666        struct object *tagged;
 667        int tagged_mark;
 668        struct commit *p;
 669
 670        /* Trees have no identifier in fast-export output, thus we have no way
 671         * to output tags of trees, tags of tags of trees, etc.  Simply omit
 672         * such tags.
 673         */
 674        tagged = tag->tagged;
 675        while (tagged->type == OBJ_TAG) {
 676                tagged = ((struct tag *)tagged)->tagged;
 677        }
 678        if (tagged->type == OBJ_TREE) {
 679                warning("Omitting tag %s,\nsince tags of trees (or tags of tags of trees, etc.) are not supported.",
 680                        oid_to_hex(&tag->object.oid));
 681                return;
 682        }
 683
 684        buf = read_sha1_file(tag->object.oid.hash, &type, &size);
 685        if (!buf)
 686                die ("Could not read tag %s", oid_to_hex(&tag->object.oid));
 687        message = memmem(buf, size, "\n\n", 2);
 688        if (message) {
 689                message += 2;
 690                message_size = strlen(message);
 691        }
 692        tagger = memmem(buf, message ? message - buf : size, "\ntagger ", 8);
 693        if (!tagger) {
 694                if (fake_missing_tagger)
 695                        tagger = "tagger Unspecified Tagger "
 696                                "<unspecified-tagger> 0 +0000";
 697                else
 698                        tagger = "";
 699                tagger_end = tagger + strlen(tagger);
 700        } else {
 701                tagger++;
 702                tagger_end = strchrnul(tagger, '\n');
 703                if (anonymize)
 704                        anonymize_ident_line(&tagger, &tagger_end);
 705        }
 706
 707        if (anonymize) {
 708                name = anonymize_refname(name);
 709                if (message) {
 710                        static struct hashmap tags;
 711                        message = anonymize_mem(&tags, anonymize_tag,
 712                                                message, &message_size);
 713                }
 714        }
 715
 716        /* handle signed tags */
 717        if (message) {
 718                const char *signature = strstr(message,
 719                                               "\n-----BEGIN PGP SIGNATURE-----\n");
 720                if (signature)
 721                        switch(signed_tag_mode) {
 722                        case ABORT:
 723                                die ("Encountered signed tag %s; use "
 724                                     "--signed-tags=<mode> to handle it.",
 725                                     oid_to_hex(&tag->object.oid));
 726                        case WARN:
 727                                warning ("Exporting signed tag %s",
 728                                         oid_to_hex(&tag->object.oid));
 729                                /* fallthru */
 730                        case VERBATIM:
 731                                break;
 732                        case WARN_STRIP:
 733                                warning ("Stripping signature from tag %s",
 734                                         oid_to_hex(&tag->object.oid));
 735                                /* fallthru */
 736                        case STRIP:
 737                                message_size = signature + 1 - message;
 738                                break;
 739                        }
 740        }
 741
 742        /* handle tag->tagged having been filtered out due to paths specified */
 743        tagged = tag->tagged;
 744        tagged_mark = get_object_mark(tagged);
 745        if (!tagged_mark) {
 746                switch(tag_of_filtered_mode) {
 747                case ABORT:
 748                        die ("Tag %s tags unexported object; use "
 749                             "--tag-of-filtered-object=<mode> to handle it.",
 750                             oid_to_hex(&tag->object.oid));
 751                case DROP:
 752                        /* Ignore this tag altogether */
 753                        return;
 754                case REWRITE:
 755                        if (tagged->type != OBJ_COMMIT) {
 756                                die ("Tag %s tags unexported %s!",
 757                                     oid_to_hex(&tag->object.oid),
 758                                     typename(tagged->type));
 759                        }
 760                        p = (struct commit *)tagged;
 761                        for (;;) {
 762                                if (p->parents && p->parents->next)
 763                                        break;
 764                                if (p->object.flags & UNINTERESTING)
 765                                        break;
 766                                if (!(p->object.flags & TREESAME))
 767                                        break;
 768                                if (!p->parents)
 769                                        die ("Can't find replacement commit for tag %s\n",
 770                                             oid_to_hex(&tag->object.oid));
 771                                p = p->parents->item;
 772                        }
 773                        tagged_mark = get_object_mark(&p->object);
 774                }
 775        }
 776
 777        if (starts_with(name, "refs/tags/"))
 778                name += 10;
 779        printf("tag %s\nfrom :%d\n%.*s%sdata %d\n%.*s\n",
 780               name, tagged_mark,
 781               (int)(tagger_end - tagger), tagger,
 782               tagger == tagger_end ? "" : "\n",
 783               (int)message_size, (int)message_size, message ? message : "");
 784}
 785
 786static struct commit *get_commit(struct rev_cmdline_entry *e, char *full_name)
 787{
 788        switch (e->item->type) {
 789        case OBJ_COMMIT:
 790                return (struct commit *)e->item;
 791        case OBJ_TAG: {
 792                struct tag *tag = (struct tag *)e->item;
 793
 794                /* handle nested tags */
 795                while (tag && tag->object.type == OBJ_TAG) {
 796                        parse_object(tag->object.oid.hash);
 797                        string_list_append(&extra_refs, full_name)->util = tag;
 798                        tag = (struct tag *)tag->tagged;
 799                }
 800                if (!tag)
 801                        die("Tag %s points nowhere?", e->name);
 802                return (struct commit *)tag;
 803                break;
 804        }
 805        default:
 806                return NULL;
 807        }
 808}
 809
 810static void get_tags_and_duplicates(struct rev_cmdline_info *info)
 811{
 812        int i;
 813
 814        for (i = 0; i < info->nr; i++) {
 815                struct rev_cmdline_entry *e = info->rev + i;
 816                unsigned char sha1[20];
 817                struct commit *commit;
 818                char *full_name;
 819
 820                if (e->flags & UNINTERESTING)
 821                        continue;
 822
 823                if (dwim_ref(e->name, strlen(e->name), sha1, &full_name) != 1)
 824                        continue;
 825
 826                if (refspecs) {
 827                        char *private;
 828                        private = apply_refspecs(refspecs, refspecs_nr, full_name);
 829                        if (private) {
 830                                free(full_name);
 831                                full_name = private;
 832                        }
 833                }
 834
 835                commit = get_commit(e, full_name);
 836                if (!commit) {
 837                        warning("%s: Unexpected object of type %s, skipping.",
 838                                e->name,
 839                                typename(e->item->type));
 840                        continue;
 841                }
 842
 843                switch(commit->object.type) {
 844                case OBJ_COMMIT:
 845                        break;
 846                case OBJ_BLOB:
 847                        export_blob(commit->object.oid.hash);
 848                        continue;
 849                default: /* OBJ_TAG (nested tags) is already handled */
 850                        warning("Tag points to object of unexpected type %s, skipping.",
 851                                typename(commit->object.type));
 852                        continue;
 853                }
 854
 855                /*
 856                 * This ref will not be updated through a commit, lets make
 857                 * sure it gets properly updated eventually.
 858                 */
 859                if (commit->util || commit->object.flags & SHOWN)
 860                        string_list_append(&extra_refs, full_name)->util = commit;
 861                if (!commit->util)
 862                        commit->util = full_name;
 863        }
 864}
 865
 866static void handle_tags_and_duplicates(void)
 867{
 868        struct commit *commit;
 869        int i;
 870
 871        for (i = extra_refs.nr - 1; i >= 0; i--) {
 872                const char *name = extra_refs.items[i].string;
 873                struct object *object = extra_refs.items[i].util;
 874                switch (object->type) {
 875                case OBJ_TAG:
 876                        handle_tag(name, (struct tag *)object);
 877                        break;
 878                case OBJ_COMMIT:
 879                        if (anonymize)
 880                                name = anonymize_refname(name);
 881                        /* create refs pointing to already seen commits */
 882                        commit = (struct commit *)object;
 883                        printf("reset %s\nfrom :%d\n\n", name,
 884                               get_object_mark(&commit->object));
 885                        show_progress();
 886                        break;
 887                }
 888        }
 889}
 890
 891static void export_marks(char *file)
 892{
 893        unsigned int i;
 894        uint32_t mark;
 895        struct object_decoration *deco = idnums.hash;
 896        FILE *f;
 897        int e = 0;
 898
 899        f = fopen_for_writing(file);
 900        if (!f)
 901                die_errno("Unable to open marks file %s for writing.", file);
 902
 903        for (i = 0; i < idnums.size; i++) {
 904                if (deco->base && deco->base->type == 1) {
 905                        mark = ptr_to_mark(deco->decoration);
 906                        if (fprintf(f, ":%"PRIu32" %s\n", mark,
 907                                oid_to_hex(&deco->base->oid)) < 0) {
 908                            e = 1;
 909                            break;
 910                        }
 911                }
 912                deco++;
 913        }
 914
 915        e |= ferror(f);
 916        e |= fclose(f);
 917        if (e)
 918                error("Unable to write marks file %s.", file);
 919}
 920
 921static void import_marks(char *input_file)
 922{
 923        char line[512];
 924        FILE *f = fopen(input_file, "r");
 925        if (!f)
 926                die_errno("cannot read '%s'", input_file);
 927
 928        while (fgets(line, sizeof(line), f)) {
 929                uint32_t mark;
 930                char *line_end, *mark_end;
 931                unsigned char sha1[20];
 932                struct object *object;
 933                struct commit *commit;
 934                enum object_type type;
 935
 936                line_end = strchr(line, '\n');
 937                if (line[0] != ':' || !line_end)
 938                        die("corrupt mark line: %s", line);
 939                *line_end = '\0';
 940
 941                mark = strtoumax(line + 1, &mark_end, 10);
 942                if (!mark || mark_end == line + 1
 943                        || *mark_end != ' ' || get_sha1_hex(mark_end + 1, sha1))
 944                        die("corrupt mark line: %s", line);
 945
 946                if (last_idnum < mark)
 947                        last_idnum = mark;
 948
 949                type = sha1_object_info(sha1, NULL);
 950                if (type < 0)
 951                        die("object not found: %s", sha1_to_hex(sha1));
 952
 953                if (type != OBJ_COMMIT)
 954                        /* only commits */
 955                        continue;
 956
 957                commit = lookup_commit(sha1);
 958                if (!commit)
 959                        die("not a commit? can't happen: %s", sha1_to_hex(sha1));
 960
 961                object = &commit->object;
 962
 963                if (object->flags & SHOWN)
 964                        error("Object %s already has a mark", sha1_to_hex(sha1));
 965
 966                mark_object(object, mark);
 967
 968                object->flags |= SHOWN;
 969        }
 970        fclose(f);
 971}
 972
 973static void handle_deletes(void)
 974{
 975        int i;
 976        for (i = 0; i < refspecs_nr; i++) {
 977                struct refspec *refspec = &refspecs[i];
 978                if (*refspec->src)
 979                        continue;
 980
 981                printf("reset %s\nfrom %s\n\n",
 982                                refspec->dst, sha1_to_hex(null_sha1));
 983        }
 984}
 985
 986int cmd_fast_export(int argc, const char **argv, const char *prefix)
 987{
 988        struct rev_info revs;
 989        struct object_array commits = OBJECT_ARRAY_INIT;
 990        struct commit *commit;
 991        char *export_filename = NULL, *import_filename = NULL;
 992        uint32_t lastimportid;
 993        struct string_list refspecs_list = STRING_LIST_INIT_NODUP;
 994        struct string_list paths_of_changed_objects = STRING_LIST_INIT_DUP;
 995        struct option options[] = {
 996                OPT_INTEGER(0, "progress", &progress,
 997                            N_("show progress after <n> objects")),
 998                OPT_CALLBACK(0, "signed-tags", &signed_tag_mode, N_("mode"),
 999                             N_("select handling of signed tags"),
1000                             parse_opt_signed_tag_mode),
1001                OPT_CALLBACK(0, "tag-of-filtered-object", &tag_of_filtered_mode, N_("mode"),
1002                             N_("select handling of tags that tag filtered objects"),
1003                             parse_opt_tag_of_filtered_mode),
1004                OPT_STRING(0, "export-marks", &export_filename, N_("file"),
1005                             N_("Dump marks to this file")),
1006                OPT_STRING(0, "import-marks", &import_filename, N_("file"),
1007                             N_("Import marks from this file")),
1008                OPT_BOOL(0, "fake-missing-tagger", &fake_missing_tagger,
1009                         N_("Fake a tagger when tags lack one")),
1010                OPT_BOOL(0, "full-tree", &full_tree,
1011                         N_("Output full tree for each commit")),
1012                OPT_BOOL(0, "use-done-feature", &use_done_feature,
1013                             N_("Use the done feature to terminate the stream")),
1014                OPT_BOOL(0, "no-data", &no_data, N_("Skip output of blob data")),
1015                OPT_STRING_LIST(0, "refspec", &refspecs_list, N_("refspec"),
1016                             N_("Apply refspec to exported refs")),
1017                OPT_BOOL(0, "anonymize", &anonymize, N_("anonymize output")),
1018                OPT_END()
1019        };
1020
1021        if (argc == 1)
1022                usage_with_options (fast_export_usage, options);
1023
1024        /* we handle encodings */
1025        git_config(git_default_config, NULL);
1026
1027        init_revisions(&revs, prefix);
1028        revs.topo_order = 1;
1029        revs.show_source = 1;
1030        revs.rewrite_parents = 1;
1031        argc = parse_options(argc, argv, prefix, options, fast_export_usage,
1032                        PARSE_OPT_KEEP_ARGV0 | PARSE_OPT_KEEP_UNKNOWN);
1033        argc = setup_revisions(argc, argv, &revs, NULL);
1034        if (argc > 1)
1035                usage_with_options (fast_export_usage, options);
1036
1037        if (refspecs_list.nr) {
1038                const char **refspecs_str;
1039                int i;
1040
1041                ALLOC_ARRAY(refspecs_str, refspecs_list.nr);
1042                for (i = 0; i < refspecs_list.nr; i++)
1043                        refspecs_str[i] = refspecs_list.items[i].string;
1044
1045                refspecs_nr = refspecs_list.nr;
1046                refspecs = parse_fetch_refspec(refspecs_nr, refspecs_str);
1047
1048                string_list_clear(&refspecs_list, 1);
1049                free(refspecs_str);
1050        }
1051
1052        if (use_done_feature)
1053                printf("feature done\n");
1054
1055        if (import_filename)
1056                import_marks(import_filename);
1057        lastimportid = last_idnum;
1058
1059        if (import_filename && revs.prune_data.nr)
1060                full_tree = 1;
1061
1062        get_tags_and_duplicates(&revs.cmdline);
1063
1064        if (prepare_revision_walk(&revs))
1065                die("revision walk setup failed");
1066        revs.diffopt.format_callback = show_filemodify;
1067        revs.diffopt.format_callback_data = &paths_of_changed_objects;
1068        DIFF_OPT_SET(&revs.diffopt, RECURSIVE);
1069        while ((commit = get_revision(&revs))) {
1070                if (has_unshown_parent(commit)) {
1071                        add_object_array(&commit->object, NULL, &commits);
1072                }
1073                else {
1074                        handle_commit(commit, &revs, &paths_of_changed_objects);
1075                        handle_tail(&commits, &revs, &paths_of_changed_objects);
1076                }
1077        }
1078
1079        handle_tags_and_duplicates();
1080        handle_deletes();
1081
1082        if (export_filename && lastimportid != last_idnum)
1083                export_marks(export_filename);
1084
1085        if (use_done_feature)
1086                printf("done\n");
1087
1088        free_refspec(refspecs_nr, refspecs);
1089
1090        return 0;
1091}