vcs-svn / svndump.con commit Merge branch 'db/strbufs-for-metadata' into db/svn-fe-code-purge (fa6c4bc)
   1/*
   2 * Parse and rearrange a svnadmin dump.
   3 * Create the dump with:
   4 * svnadmin dump --incremental -r<startrev>:<endrev> <repository> >outfile
   5 *
   6 * Licensed under a two-clause BSD-style license.
   7 * See LICENSE for details.
   8 */
   9
  10#include "cache.h"
  11#include "repo_tree.h"
  12#include "fast_export.h"
  13#include "line_buffer.h"
  14#include "string_pool.h"
  15#include "strbuf.h"
  16
  17#define REPORT_FILENO 3
  18
  19/*
  20 * Compare start of string to literal of equal length;
  21 * must be guarded by length test.
  22 */
  23#define constcmp(s, ref) memcmp(s, ref, sizeof(ref) - 1)
  24
  25#define NODEACT_REPLACE 4
  26#define NODEACT_DELETE 3
  27#define NODEACT_ADD 2
  28#define NODEACT_CHANGE 1
  29#define NODEACT_UNKNOWN 0
  30
  31/* States: */
  32#define DUMP_CTX 0      /* dump metadata */
  33#define REV_CTX  1      /* revision metadata */
  34#define NODE_CTX 2      /* node metadata */
  35#define INTERNODE_CTX 3 /* between nodes */
  36
  37#define LENGTH_UNKNOWN (~0)
  38#define DATE_RFC2822_LEN 31
  39
  40static struct line_buffer input = LINE_BUFFER_INIT;
  41
  42static struct {
  43        uint32_t action, propLength, textLength, srcRev, type;
  44        uint32_t src[REPO_MAX_PATH_DEPTH], dst[REPO_MAX_PATH_DEPTH];
  45        uint32_t text_delta, prop_delta;
  46} node_ctx;
  47
  48static struct {
  49        uint32_t revision;
  50        unsigned long timestamp;
  51        struct strbuf log, author;
  52} rev_ctx;
  53
  54static struct {
  55        uint32_t version;
  56        struct strbuf uuid, url;
  57} dump_ctx;
  58
  59static void reset_node_ctx(char *fname)
  60{
  61        node_ctx.type = 0;
  62        node_ctx.action = NODEACT_UNKNOWN;
  63        node_ctx.propLength = LENGTH_UNKNOWN;
  64        node_ctx.textLength = LENGTH_UNKNOWN;
  65        node_ctx.src[0] = ~0;
  66        node_ctx.srcRev = 0;
  67        pool_tok_seq(REPO_MAX_PATH_DEPTH, node_ctx.dst, "/", fname);
  68        node_ctx.text_delta = 0;
  69        node_ctx.prop_delta = 0;
  70}
  71
  72static void reset_rev_ctx(uint32_t revision)
  73{
  74        rev_ctx.revision = revision;
  75        rev_ctx.timestamp = 0;
  76        strbuf_reset(&rev_ctx.log);
  77        strbuf_reset(&rev_ctx.author);
  78}
  79
  80static void reset_dump_ctx(const char *url)
  81{
  82        strbuf_reset(&dump_ctx.url);
  83        if (url)
  84                strbuf_addstr(&dump_ctx.url, url);
  85        dump_ctx.version = 1;
  86        strbuf_reset(&dump_ctx.uuid);
  87}
  88
  89static void handle_property(const struct strbuf *key_buf,
  90                                const char *val, uint32_t len,
  91                                uint32_t *type_set)
  92{
  93        const char *key = key_buf->buf;
  94        size_t keylen = key_buf->len;
  95
  96        switch (keylen + 1) {
  97        case sizeof("svn:log"):
  98                if (constcmp(key, "svn:log"))
  99                        break;
 100                if (!val)
 101                        die("invalid dump: unsets svn:log");
 102                strbuf_reset(&rev_ctx.log);
 103                strbuf_add(&rev_ctx.log, val, len);
 104                break;
 105        case sizeof("svn:author"):
 106                if (constcmp(key, "svn:author"))
 107                        break;
 108                strbuf_reset(&rev_ctx.author);
 109                if (val)
 110                        strbuf_add(&rev_ctx.author, val, len);
 111                break;
 112        case sizeof("svn:date"):
 113                if (constcmp(key, "svn:date"))
 114                        break;
 115                if (!val)
 116                        die("invalid dump: unsets svn:date");
 117                if (parse_date_basic(val, &rev_ctx.timestamp, NULL))
 118                        warning("invalid timestamp: %s", val);
 119                break;
 120        case sizeof("svn:executable"):
 121        case sizeof("svn:special"):
 122                if (keylen == strlen("svn:executable") &&
 123                    constcmp(key, "svn:executable"))
 124                        break;
 125                if (keylen == strlen("svn:special") &&
 126                    constcmp(key, "svn:special"))
 127                        break;
 128                if (*type_set) {
 129                        if (!val)
 130                                return;
 131                        die("invalid dump: sets type twice");
 132                }
 133                if (!val) {
 134                        node_ctx.type = REPO_MODE_BLB;
 135                        return;
 136                }
 137                *type_set = 1;
 138                node_ctx.type = keylen == strlen("svn:executable") ?
 139                                REPO_MODE_EXE :
 140                                REPO_MODE_LNK;
 141        }
 142}
 143
 144static void die_short_read(void)
 145{
 146        if (buffer_ferror(&input))
 147                die_errno("error reading dump file");
 148        die("invalid dump: unexpected end of file");
 149}
 150
 151static void read_props(void)
 152{
 153        static struct strbuf key = STRBUF_INIT;
 154        const char *t;
 155        /*
 156         * NEEDSWORK: to support simple mode changes like
 157         *      K 11
 158         *      svn:special
 159         *      V 1
 160         *      *
 161         *      D 14
 162         *      svn:executable
 163         * we keep track of whether a mode has been set and reset to
 164         * plain file only if not.  We should be keeping track of the
 165         * symlink and executable bits separately instead.
 166         */
 167        uint32_t type_set = 0;
 168        while ((t = buffer_read_line(&input)) && strcmp(t, "PROPS-END")) {
 169                uint32_t len;
 170                const char *val;
 171                const char type = t[0];
 172                int ch;
 173
 174                if (!type || t[1] != ' ')
 175                        die("invalid property line: %s\n", t);
 176                len = atoi(&t[2]);
 177                val = buffer_read_string(&input, len);
 178                if (!val || strlen(val) != len)
 179                        die_short_read();
 180
 181                /* Discard trailing newline. */
 182                ch = buffer_read_char(&input);
 183                if (ch == EOF)
 184                        die_short_read();
 185                if (ch != '\n')
 186                        die("invalid dump: expected newline after %s", val);
 187
 188                switch (type) {
 189                case 'K':
 190                case 'D':
 191                        strbuf_reset(&key);
 192                        if (val)
 193                                strbuf_add(&key, val, len);
 194                        if (type == 'K')
 195                                continue;
 196                        assert(type == 'D');
 197                        val = NULL;
 198                        len = 0;
 199                        /* fall through */
 200                case 'V':
 201                        handle_property(&key, val, len, &type_set);
 202                        strbuf_reset(&key);
 203                        continue;
 204                default:
 205                        die("invalid property line: %s\n", t);
 206                }
 207        }
 208}
 209
 210static void handle_node(void)
 211{
 212        const uint32_t type = node_ctx.type;
 213        const int have_props = node_ctx.propLength != LENGTH_UNKNOWN;
 214        const int have_text = node_ctx.textLength != LENGTH_UNKNOWN;
 215        /*
 216         * Old text for this node:
 217         *  NULL        - directory or bug
 218         *  empty_blob  - empty
 219         *  "<dataref>" - data retrievable from fast-import
 220         */
 221        static const char *const empty_blob = "::empty::";
 222        const char *old_data = NULL;
 223
 224        if (node_ctx.text_delta)
 225                die("text deltas not supported");
 226
 227        if (node_ctx.action == NODEACT_DELETE) {
 228                if (have_text || have_props || node_ctx.srcRev)
 229                        die("invalid dump: deletion node has "
 230                                "copyfrom info, text, or properties");
 231                return repo_delete(node_ctx.dst);
 232        }
 233        if (node_ctx.action == NODEACT_REPLACE) {
 234                repo_delete(node_ctx.dst);
 235                node_ctx.action = NODEACT_ADD;
 236        }
 237        if (node_ctx.srcRev) {
 238                repo_copy(node_ctx.srcRev, node_ctx.src, node_ctx.dst);
 239                if (node_ctx.action == NODEACT_ADD)
 240                        node_ctx.action = NODEACT_CHANGE;
 241        }
 242        if (have_text && type == REPO_MODE_DIR)
 243                die("invalid dump: directories cannot have text attached");
 244
 245        /*
 246         * Find old content (old_data) and decide on the new mode.
 247         */
 248        if (node_ctx.action == NODEACT_CHANGE && !~*node_ctx.dst) {
 249                if (type != REPO_MODE_DIR)
 250                        die("invalid dump: root of tree is not a regular file");
 251                old_data = NULL;
 252        } else if (node_ctx.action == NODEACT_CHANGE) {
 253                uint32_t mode;
 254                old_data = repo_read_path(node_ctx.dst);
 255                mode = repo_read_mode(node_ctx.dst);
 256                if (mode == REPO_MODE_DIR && type != REPO_MODE_DIR)
 257                        die("invalid dump: cannot modify a directory into a file");
 258                if (mode != REPO_MODE_DIR && type == REPO_MODE_DIR)
 259                        die("invalid dump: cannot modify a file into a directory");
 260                node_ctx.type = mode;
 261        } else if (node_ctx.action == NODEACT_ADD) {
 262                if (type == REPO_MODE_DIR)
 263                        old_data = NULL;
 264                else if (have_text)
 265                        old_data = empty_blob;
 266                else
 267                        die("invalid dump: adds node without text");
 268        } else {
 269                die("invalid dump: Node-path block lacks Node-action");
 270        }
 271
 272        /*
 273         * Adjust mode to reflect properties.
 274         */
 275        if (have_props) {
 276                if (!node_ctx.prop_delta)
 277                        node_ctx.type = type;
 278                if (node_ctx.propLength)
 279                        read_props();
 280        }
 281
 282        /*
 283         * Save the result.
 284         */
 285        if (type == REPO_MODE_DIR)      /* directories are not tracked. */
 286                return;
 287        assert(old_data);
 288        if (old_data == empty_blob)
 289                /* For the fast_export_* functions, NULL means empty. */
 290                old_data = NULL;
 291        if (!have_text) {
 292                fast_export_modify(REPO_MAX_PATH_DEPTH, node_ctx.dst,
 293                                        node_ctx.type, old_data);
 294                return;
 295        }
 296        fast_export_modify(REPO_MAX_PATH_DEPTH, node_ctx.dst,
 297                                node_ctx.type, "inline");
 298        fast_export_data(node_ctx.type, node_ctx.textLength, &input);
 299}
 300
 301static void begin_revision(void)
 302{
 303        if (!rev_ctx.revision)  /* revision 0 gets no git commit. */
 304                return;
 305        fast_export_begin_commit(rev_ctx.revision, rev_ctx.author.buf,
 306                rev_ctx.log.buf, dump_ctx.uuid.buf, dump_ctx.url.buf,
 307                rev_ctx.timestamp);
 308}
 309
 310static void end_revision(void)
 311{
 312        if (rev_ctx.revision)
 313                fast_export_end_commit(rev_ctx.revision);
 314}
 315
 316void svndump_read(const char *url)
 317{
 318        char *val;
 319        char *t;
 320        uint32_t active_ctx = DUMP_CTX;
 321        uint32_t len;
 322
 323        reset_dump_ctx(url);
 324        while ((t = buffer_read_line(&input))) {
 325                val = strstr(t, ": ");
 326                if (!val)
 327                        continue;
 328                val += 2;
 329
 330                /* strlen(key) + 1 */
 331                switch (val - t - 1) {
 332                case sizeof("SVN-fs-dump-format-version"):
 333                        if (constcmp(t, "SVN-fs-dump-format-version"))
 334                                continue;
 335                        dump_ctx.version = atoi(val);
 336                        if (dump_ctx.version > 3)
 337                                die("expected svn dump format version <= 3, found %"PRIu32,
 338                                    dump_ctx.version);
 339                        break;
 340                case sizeof("UUID"):
 341                        if (constcmp(t, "UUID"))
 342                                continue;
 343                        strbuf_reset(&dump_ctx.uuid);
 344                        strbuf_addstr(&dump_ctx.uuid, val);
 345                        break;
 346                case sizeof("Revision-number"):
 347                        if (constcmp(t, "Revision-number"))
 348                                continue;
 349                        if (active_ctx == NODE_CTX)
 350                                handle_node();
 351                        if (active_ctx == REV_CTX)
 352                                begin_revision();
 353                        if (active_ctx != DUMP_CTX)
 354                                end_revision();
 355                        active_ctx = REV_CTX;
 356                        reset_rev_ctx(atoi(val));
 357                        break;
 358                case sizeof("Node-path"):
 359                        if (prefixcmp(t, "Node-"))
 360                                continue;
 361                        if (!constcmp(t + strlen("Node-"), "path")) {
 362                                if (active_ctx == NODE_CTX)
 363                                        handle_node();
 364                                if (active_ctx == REV_CTX)
 365                                        begin_revision();
 366                                active_ctx = NODE_CTX;
 367                                reset_node_ctx(val);
 368                                break;
 369                        }
 370                        if (constcmp(t + strlen("Node-"), "kind"))
 371                                continue;
 372                        if (!strcmp(val, "dir"))
 373                                node_ctx.type = REPO_MODE_DIR;
 374                        else if (!strcmp(val, "file"))
 375                                node_ctx.type = REPO_MODE_BLB;
 376                        else
 377                                fprintf(stderr, "Unknown node-kind: %s\n", val);
 378                        break;
 379                case sizeof("Node-action"):
 380                        if (constcmp(t, "Node-action"))
 381                                continue;
 382                        if (!strcmp(val, "delete")) {
 383                                node_ctx.action = NODEACT_DELETE;
 384                        } else if (!strcmp(val, "add")) {
 385                                node_ctx.action = NODEACT_ADD;
 386                        } else if (!strcmp(val, "change")) {
 387                                node_ctx.action = NODEACT_CHANGE;
 388                        } else if (!strcmp(val, "replace")) {
 389                                node_ctx.action = NODEACT_REPLACE;
 390                        } else {
 391                                fprintf(stderr, "Unknown node-action: %s\n", val);
 392                                node_ctx.action = NODEACT_UNKNOWN;
 393                        }
 394                        break;
 395                case sizeof("Node-copyfrom-path"):
 396                        if (constcmp(t, "Node-copyfrom-path"))
 397                                continue;
 398                        pool_tok_seq(REPO_MAX_PATH_DEPTH, node_ctx.src, "/", val);
 399                        break;
 400                case sizeof("Node-copyfrom-rev"):
 401                        if (constcmp(t, "Node-copyfrom-rev"))
 402                                continue;
 403                        node_ctx.srcRev = atoi(val);
 404                        break;
 405                case sizeof("Text-content-length"):
 406                        if (!constcmp(t, "Text-content-length")) {
 407                                node_ctx.textLength = atoi(val);
 408                                break;
 409                        }
 410                        if (constcmp(t, "Prop-content-length"))
 411                                continue;
 412                        node_ctx.propLength = atoi(val);
 413                        break;
 414                case sizeof("Text-delta"):
 415                        if (!constcmp(t, "Text-delta")) {
 416                                node_ctx.text_delta = !strcmp(val, "true");
 417                                break;
 418                        }
 419                        if (constcmp(t, "Prop-delta"))
 420                                continue;
 421                        node_ctx.prop_delta = !strcmp(val, "true");
 422                        break;
 423                case sizeof("Content-length"):
 424                        if (constcmp(t, "Content-length"))
 425                                continue;
 426                        len = atoi(val);
 427                        t = buffer_read_line(&input);
 428                        if (!t)
 429                                die_short_read();
 430                        if (*t)
 431                                die("invalid dump: expected blank line after content length header");
 432                        if (active_ctx == REV_CTX) {
 433                                read_props();
 434                        } else if (active_ctx == NODE_CTX) {
 435                                handle_node();
 436                                active_ctx = INTERNODE_CTX;
 437                        } else {
 438                                fprintf(stderr, "Unexpected content length header: %"PRIu32"\n", len);
 439                                if (buffer_skip_bytes(&input, len) != len)
 440                                        die_short_read();
 441                        }
 442                }
 443        }
 444        if (buffer_ferror(&input))
 445                die_short_read();
 446        if (active_ctx == NODE_CTX)
 447                handle_node();
 448        if (active_ctx == REV_CTX)
 449                begin_revision();
 450        if (active_ctx != DUMP_CTX)
 451                end_revision();
 452}
 453
 454int svndump_init(const char *filename)
 455{
 456        if (buffer_init(&input, filename))
 457                return error("cannot open %s: %s", filename, strerror(errno));
 458        fast_export_init(REPORT_FILENO);
 459        strbuf_init(&dump_ctx.uuid, 4096);
 460        strbuf_init(&dump_ctx.url, 4096);
 461        strbuf_init(&rev_ctx.log, 4096);
 462        strbuf_init(&rev_ctx.author, 4096);
 463        reset_dump_ctx(NULL);
 464        reset_rev_ctx(0);
 465        reset_node_ctx(NULL);
 466        return 0;
 467}
 468
 469void svndump_deinit(void)
 470{
 471        fast_export_deinit();
 472        reset_dump_ctx(NULL);
 473        reset_rev_ctx(0);
 474        reset_node_ctx(NULL);
 475        strbuf_release(&rev_ctx.log);
 476        if (buffer_deinit(&input))
 477                fprintf(stderr, "Input error\n");
 478        if (ferror(stdout))
 479                fprintf(stderr, "Output error\n");
 480}
 481
 482void svndump_reset(void)
 483{
 484        fast_export_reset();
 485        buffer_reset(&input);
 486        strbuf_release(&dump_ctx.uuid);
 487        strbuf_release(&dump_ctx.url);
 488        strbuf_release(&rev_ctx.log);
 489        strbuf_release(&rev_ctx.author);
 490}