merge-tree.con commit Handling large files with GIT (492e075)
   1#include "cache.h"
   2#include "diff.h"
   3
   4static const char merge_tree_usage[] = "git-merge-tree <base-tree> <branch1> <branch2>";
   5static int resolve_directories = 1;
   6
   7static void merge_trees(struct tree_desc t[3], const char *base);
   8
   9static void *fill_tree_descriptor(struct tree_desc *desc, const unsigned char *sha1)
  10{
  11        unsigned long size = 0;
  12        void *buf = NULL;
  13
  14        if (sha1) {
  15                buf = read_object_with_reference(sha1, "tree", &size, NULL);
  16                if (!buf)
  17                        die("unable to read tree %s", sha1_to_hex(sha1));
  18        }
  19        desc->size = size;
  20        desc->buf = buf;
  21        return buf;
  22}
  23
  24struct name_entry {
  25        const unsigned char *sha1;
  26        const char *path;
  27        unsigned int mode;
  28        int pathlen;
  29};
  30
  31static void entry_clear(struct name_entry *a)
  32{
  33        memset(a, 0, sizeof(*a));
  34}
  35
  36static int entry_compare(struct name_entry *a, struct name_entry *b)
  37{
  38        return base_name_compare(
  39                        a->path, a->pathlen, a->mode,
  40                        b->path, b->pathlen, b->mode);
  41}
  42
  43static void entry_extract(struct tree_desc *t, struct name_entry *a)
  44{
  45        a->sha1 = tree_entry_extract(t, &a->path, &a->mode);
  46        a->pathlen = strlen(a->path);
  47}
  48
  49/* An empty entry never compares same, not even to another empty entry */
  50static int same_entry(struct name_entry *a, struct name_entry *b)
  51{
  52        return  a->sha1 &&
  53                b->sha1 &&
  54                !memcmp(a->sha1, b->sha1, 20) &&
  55                a->mode == b->mode;
  56}
  57
  58static void resolve(const char *base, struct name_entry *result)
  59{
  60        printf("0 %06o %s %s%s\n", result->mode, sha1_to_hex(result->sha1), base, result->path);
  61}
  62
  63static int unresolved_directory(const char *base, struct name_entry n[3])
  64{
  65        int baselen;
  66        char *newbase;
  67        struct name_entry *p;
  68        struct tree_desc t[3];
  69        void *buf0, *buf1, *buf2;
  70
  71        if (!resolve_directories)
  72                return 0;
  73        p = n;
  74        if (!p->mode) {
  75                p++;
  76                if (!p->mode)
  77                        p++;
  78        }
  79        if (!S_ISDIR(p->mode))
  80                return 0;
  81        baselen = strlen(base);
  82        newbase = xmalloc(baselen + p->pathlen + 2);
  83        memcpy(newbase, base, baselen);
  84        memcpy(newbase + baselen, p->path, p->pathlen);
  85        memcpy(newbase + baselen + p->pathlen, "/", 2);
  86
  87        buf0 = fill_tree_descriptor(t+0, n[0].sha1);
  88        buf1 = fill_tree_descriptor(t+1, n[1].sha1);
  89        buf2 = fill_tree_descriptor(t+2, n[2].sha1);
  90        merge_trees(t, newbase);
  91
  92        free(buf0);
  93        free(buf1);
  94        free(buf2);
  95        free(newbase);
  96        return 1;
  97}
  98
  99static void unresolved(const char *base, struct name_entry n[3])
 100{
 101        if (unresolved_directory(base, n))
 102                return;
 103        printf("1 %06o %s %s%s\n", n[0].mode, sha1_to_hex(n[0].sha1), base, n[0].path);
 104        printf("2 %06o %s %s%s\n", n[1].mode, sha1_to_hex(n[1].sha1), base, n[1].path);
 105        printf("3 %06o %s %s%s\n", n[2].mode, sha1_to_hex(n[2].sha1), base, n[2].path);
 106}
 107
 108/*
 109 * Merge two trees together (t[1] and t[2]), using a common base (t[0])
 110 * as the origin.
 111 *
 112 * This walks the (sorted) trees in lock-step, checking every possible
 113 * name. Note that directories automatically sort differently from other
 114 * files (see "base_name_compare"), so you'll never see file/directory
 115 * conflicts, because they won't ever compare the same.
 116 *
 117 * IOW, if a directory changes to a filename, it will automatically be
 118 * seen as the directory going away, and the filename being created.
 119 *
 120 * Think of this as a three-way diff.
 121 *
 122 * The output will be either:
 123 *  - successful merge
 124 *       "0 mode sha1 filename"
 125 *    NOTE NOTE NOTE! FIXME! We really really need to walk the index
 126 *    in parallel with this too!
 127 * 
 128 *  - conflict:
 129 *      "1 mode sha1 filename"
 130 *      "2 mode sha1 filename"
 131 *      "3 mode sha1 filename"
 132 *    where not all of the 1/2/3 lines may exist, of course.
 133 *
 134 * The successful merge rules are the same as for the three-way merge
 135 * in git-read-tree.
 136 */
 137static void merge_trees(struct tree_desc t[3], const char *base)
 138{
 139        for (;;) {
 140                struct name_entry entry[3];
 141                unsigned int mask = 0;
 142                int i, last;
 143
 144                last = -1;
 145                for (i = 0; i < 3; i++) {
 146                        if (!t[i].size)
 147                                continue;
 148                        entry_extract(t+i, entry+i);
 149                        if (last >= 0) {
 150                                int cmp = entry_compare(entry+i, entry+last);
 151
 152                                /*
 153                                 * Is the new name bigger than the old one?
 154                                 * Ignore it
 155                                 */
 156                                if (cmp > 0)
 157                                        continue;
 158                                /*
 159                                 * Is the new name smaller than the old one?
 160                                 * Ignore all old ones
 161                                 */
 162                                if (cmp < 0)
 163                                        mask = 0;
 164                        }
 165                        mask |= 1u << i;
 166                        last = i;
 167                }
 168                if (!mask)
 169                        break;
 170
 171                /*
 172                 * Update the tree entries we've walked, and clear
 173                 * all the unused name-entries.
 174                 */
 175                for (i = 0; i < 3; i++) {
 176                        if (mask & (1u << i)) {
 177                                update_tree_entry(t+i);
 178                                continue;
 179                        }
 180                        entry_clear(entry + i);
 181                }
 182
 183                /* Same in both? */
 184                if (same_entry(entry+1, entry+2)) {
 185                        if (entry[0].sha1) {
 186                                resolve(base, entry+1);
 187                                continue;
 188                        }
 189                }
 190
 191                if (same_entry(entry+0, entry+1)) {
 192                        if (entry[2].sha1) {
 193                                resolve(base, entry+2);
 194                                continue;
 195                        }
 196                }
 197
 198                if (same_entry(entry+0, entry+2)) {
 199                        if (entry[1].sha1) {
 200                                resolve(base, entry+1);
 201                                continue;
 202                        }
 203                }
 204
 205                unresolved(base, entry);
 206        }
 207}
 208
 209static void *get_tree_descriptor(struct tree_desc *desc, const char *rev)
 210{
 211        unsigned char sha1[20];
 212        void *buf;
 213
 214        if (get_sha1(rev, sha1) < 0)
 215                die("unknown rev %s", rev);
 216        buf = fill_tree_descriptor(desc, sha1);
 217        if (!buf)
 218                die("%s is not a tree", rev);
 219        return buf;
 220}
 221
 222int main(int argc, char **argv)
 223{
 224        struct tree_desc t[3];
 225        void *buf1, *buf2, *buf3;
 226
 227        if (argc < 4)
 228                usage(merge_tree_usage);
 229
 230        buf1 = get_tree_descriptor(t+0, argv[1]);
 231        buf2 = get_tree_descriptor(t+1, argv[2]);
 232        buf3 = get_tree_descriptor(t+2, argv[3]);
 233        merge_trees(t, "");
 234        free(buf1);
 235        free(buf2);
 236        free(buf3);
 237        return 0;
 238}