list-objects-filter.con commit list-objects-filter: implement filter tree:0 (bc5975d)
   1#include "cache.h"
   2#include "dir.h"
   3#include "tag.h"
   4#include "commit.h"
   5#include "tree.h"
   6#include "blob.h"
   7#include "diff.h"
   8#include "tree-walk.h"
   9#include "revision.h"
  10#include "list-objects.h"
  11#include "list-objects-filter.h"
  12#include "list-objects-filter-options.h"
  13#include "oidset.h"
  14#include "object-store.h"
  15
  16/* Remember to update object flag allocation in object.h */
  17/*
  18 * FILTER_SHOWN_BUT_REVISIT -- we set this bit on tree objects
  19 * that have been shown, but should be revisited if they appear
  20 * in the traversal (until we mark it SEEN).  This is a way to
  21 * let us silently de-dup calls to show() in the caller.  This
  22 * is subtly different from the "revision.h:SHOWN" and the
  23 * "sha1-name.c:ONELINE_SEEN" bits.  And also different from
  24 * the non-de-dup usage in pack-bitmap.c
  25 */
  26#define FILTER_SHOWN_BUT_REVISIT (1<<21)
  27
  28/*
  29 * A filter for list-objects to omit ALL blobs from the traversal.
  30 * And to OPTIONALLY collect a list of the omitted OIDs.
  31 */
  32struct filter_blobs_none_data {
  33        struct oidset *omits;
  34};
  35
  36static enum list_objects_filter_result filter_blobs_none(
  37        enum list_objects_filter_situation filter_situation,
  38        struct object *obj,
  39        const char *pathname,
  40        const char *filename,
  41        void *filter_data_)
  42{
  43        struct filter_blobs_none_data *filter_data = filter_data_;
  44
  45        switch (filter_situation) {
  46        default:
  47                BUG("unknown filter_situation: %d", filter_situation);
  48
  49        case LOFS_BEGIN_TREE:
  50                assert(obj->type == OBJ_TREE);
  51                /* always include all tree objects */
  52                return LOFR_MARK_SEEN | LOFR_DO_SHOW;
  53
  54        case LOFS_END_TREE:
  55                assert(obj->type == OBJ_TREE);
  56                return LOFR_ZERO;
  57
  58        case LOFS_BLOB:
  59                assert(obj->type == OBJ_BLOB);
  60                assert((obj->flags & SEEN) == 0);
  61
  62                if (filter_data->omits)
  63                        oidset_insert(filter_data->omits, &obj->oid);
  64                return LOFR_MARK_SEEN; /* but not LOFR_DO_SHOW (hard omit) */
  65        }
  66}
  67
  68static void *filter_blobs_none__init(
  69        struct oidset *omitted,
  70        struct list_objects_filter_options *filter_options,
  71        filter_object_fn *filter_fn,
  72        filter_free_fn *filter_free_fn)
  73{
  74        struct filter_blobs_none_data *d = xcalloc(1, sizeof(*d));
  75        d->omits = omitted;
  76
  77        *filter_fn = filter_blobs_none;
  78        *filter_free_fn = free;
  79        return d;
  80}
  81
  82/*
  83 * A filter for list-objects to omit ALL trees and blobs from the traversal.
  84 * Can OPTIONALLY collect a list of the omitted OIDs.
  85 */
  86struct filter_trees_none_data {
  87        struct oidset *omits;
  88};
  89
  90static enum list_objects_filter_result filter_trees_none(
  91        enum list_objects_filter_situation filter_situation,
  92        struct object *obj,
  93        const char *pathname,
  94        const char *filename,
  95        void *filter_data_)
  96{
  97        struct filter_trees_none_data *filter_data = filter_data_;
  98
  99        switch (filter_situation) {
 100        default:
 101                BUG("unknown filter_situation: %d", filter_situation);
 102
 103        case LOFS_BEGIN_TREE:
 104        case LOFS_BLOB:
 105                if (filter_data->omits)
 106                        oidset_insert(filter_data->omits, &obj->oid);
 107                return LOFR_MARK_SEEN; /* but not LOFR_DO_SHOW (hard omit) */
 108
 109        case LOFS_END_TREE:
 110                assert(obj->type == OBJ_TREE);
 111                return LOFR_ZERO;
 112
 113        }
 114}
 115
 116static void* filter_trees_none__init(
 117        struct oidset *omitted,
 118        struct list_objects_filter_options *filter_options,
 119        filter_object_fn *filter_fn,
 120        filter_free_fn *filter_free_fn)
 121{
 122        struct filter_trees_none_data *d = xcalloc(1, sizeof(*d));
 123        d->omits = omitted;
 124
 125        *filter_fn = filter_trees_none;
 126        *filter_free_fn = free;
 127        return d;
 128}
 129
 130/*
 131 * A filter for list-objects to omit large blobs.
 132 * And to OPTIONALLY collect a list of the omitted OIDs.
 133 */
 134struct filter_blobs_limit_data {
 135        struct oidset *omits;
 136        unsigned long max_bytes;
 137};
 138
 139static enum list_objects_filter_result filter_blobs_limit(
 140        enum list_objects_filter_situation filter_situation,
 141        struct object *obj,
 142        const char *pathname,
 143        const char *filename,
 144        void *filter_data_)
 145{
 146        struct filter_blobs_limit_data *filter_data = filter_data_;
 147        unsigned long object_length;
 148        enum object_type t;
 149
 150        switch (filter_situation) {
 151        default:
 152                BUG("unknown filter_situation: %d", filter_situation);
 153
 154        case LOFS_BEGIN_TREE:
 155                assert(obj->type == OBJ_TREE);
 156                /* always include all tree objects */
 157                return LOFR_MARK_SEEN | LOFR_DO_SHOW;
 158
 159        case LOFS_END_TREE:
 160                assert(obj->type == OBJ_TREE);
 161                return LOFR_ZERO;
 162
 163        case LOFS_BLOB:
 164                assert(obj->type == OBJ_BLOB);
 165                assert((obj->flags & SEEN) == 0);
 166
 167                t = oid_object_info(the_repository, &obj->oid, &object_length);
 168                if (t != OBJ_BLOB) { /* probably OBJ_NONE */
 169                        /*
 170                         * We DO NOT have the blob locally, so we cannot
 171                         * apply the size filter criteria.  Be conservative
 172                         * and force show it (and let the caller deal with
 173                         * the ambiguity).
 174                         */
 175                        goto include_it;
 176                }
 177
 178                if (object_length < filter_data->max_bytes)
 179                        goto include_it;
 180
 181                if (filter_data->omits)
 182                        oidset_insert(filter_data->omits, &obj->oid);
 183                return LOFR_MARK_SEEN; /* but not LOFR_DO_SHOW (hard omit) */
 184        }
 185
 186include_it:
 187        if (filter_data->omits)
 188                oidset_remove(filter_data->omits, &obj->oid);
 189        return LOFR_MARK_SEEN | LOFR_DO_SHOW;
 190}
 191
 192static void *filter_blobs_limit__init(
 193        struct oidset *omitted,
 194        struct list_objects_filter_options *filter_options,
 195        filter_object_fn *filter_fn,
 196        filter_free_fn *filter_free_fn)
 197{
 198        struct filter_blobs_limit_data *d = xcalloc(1, sizeof(*d));
 199        d->omits = omitted;
 200        d->max_bytes = filter_options->blob_limit_value;
 201
 202        *filter_fn = filter_blobs_limit;
 203        *filter_free_fn = free;
 204        return d;
 205}
 206
 207/*
 208 * A filter driven by a sparse-checkout specification to only
 209 * include blobs that a sparse checkout would populate.
 210 *
 211 * The sparse-checkout spec can be loaded from a blob with the
 212 * given OID or from a local pathname.  We allow an OID because
 213 * the repo may be bare or we may be doing the filtering on the
 214 * server.
 215 */
 216struct frame {
 217        /*
 218         * defval is the usual default include/exclude value that
 219         * should be inherited as we recurse into directories based
 220         * upon pattern matching of the directory itself or of a
 221         * containing directory.
 222         */
 223        int defval;
 224
 225        /*
 226         * 1 if the directory (recursively) contains any provisionally
 227         * omitted objects.
 228         *
 229         * 0 if everything (recursively) contained in this directory
 230         * has been explicitly included (SHOWN) in the result and
 231         * the directory may be short-cut later in the traversal.
 232         */
 233        unsigned child_prov_omit : 1;
 234};
 235
 236struct filter_sparse_data {
 237        struct oidset *omits;
 238        struct exclude_list el;
 239
 240        size_t nr, alloc;
 241        struct frame *array_frame;
 242};
 243
 244static enum list_objects_filter_result filter_sparse(
 245        enum list_objects_filter_situation filter_situation,
 246        struct object *obj,
 247        const char *pathname,
 248        const char *filename,
 249        void *filter_data_)
 250{
 251        struct filter_sparse_data *filter_data = filter_data_;
 252        int val, dtype;
 253        struct frame *frame;
 254
 255        switch (filter_situation) {
 256        default:
 257                BUG("unknown filter_situation: %d", filter_situation);
 258
 259        case LOFS_BEGIN_TREE:
 260                assert(obj->type == OBJ_TREE);
 261                dtype = DT_DIR;
 262                val = is_excluded_from_list(pathname, strlen(pathname),
 263                                            filename, &dtype, &filter_data->el,
 264                                            &the_index);
 265                if (val < 0)
 266                        val = filter_data->array_frame[filter_data->nr].defval;
 267
 268                ALLOC_GROW(filter_data->array_frame, filter_data->nr + 1,
 269                           filter_data->alloc);
 270                filter_data->nr++;
 271                filter_data->array_frame[filter_data->nr].defval = val;
 272                filter_data->array_frame[filter_data->nr].child_prov_omit = 0;
 273
 274                /*
 275                 * A directory with this tree OID may appear in multiple
 276                 * places in the tree. (Think of a directory move or copy,
 277                 * with no other changes, so the OID is the same, but the
 278                 * full pathnames of objects within this directory are new
 279                 * and may match is_excluded() patterns differently.)
 280                 * So we cannot mark this directory as SEEN (yet), since
 281                 * that will prevent process_tree() from revisiting this
 282                 * tree object with other pathname prefixes.
 283                 *
 284                 * Only _DO_SHOW the tree object the first time we visit
 285                 * this tree object.
 286                 *
 287                 * We always show all tree objects.  A future optimization
 288                 * may want to attempt to narrow this.
 289                 */
 290                if (obj->flags & FILTER_SHOWN_BUT_REVISIT)
 291                        return LOFR_ZERO;
 292                obj->flags |= FILTER_SHOWN_BUT_REVISIT;
 293                return LOFR_DO_SHOW;
 294
 295        case LOFS_END_TREE:
 296                assert(obj->type == OBJ_TREE);
 297                assert(filter_data->nr > 0);
 298
 299                frame = &filter_data->array_frame[filter_data->nr];
 300                filter_data->nr--;
 301
 302                /*
 303                 * Tell our parent directory if any of our children were
 304                 * provisionally omitted.
 305                 */
 306                filter_data->array_frame[filter_data->nr].child_prov_omit |=
 307                        frame->child_prov_omit;
 308
 309                /*
 310                 * If there are NO provisionally omitted child objects (ALL child
 311                 * objects in this folder were INCLUDED), then we can mark the
 312                 * folder as SEEN (so we will not have to revisit it again).
 313                 */
 314                if (!frame->child_prov_omit)
 315                        return LOFR_MARK_SEEN;
 316                return LOFR_ZERO;
 317
 318        case LOFS_BLOB:
 319                assert(obj->type == OBJ_BLOB);
 320                assert((obj->flags & SEEN) == 0);
 321
 322                frame = &filter_data->array_frame[filter_data->nr];
 323
 324                dtype = DT_REG;
 325                val = is_excluded_from_list(pathname, strlen(pathname),
 326                                            filename, &dtype, &filter_data->el,
 327                                            &the_index);
 328                if (val < 0)
 329                        val = frame->defval;
 330                if (val > 0) {
 331                        if (filter_data->omits)
 332                                oidset_remove(filter_data->omits, &obj->oid);
 333                        return LOFR_MARK_SEEN | LOFR_DO_SHOW;
 334                }
 335
 336                /*
 337                 * Provisionally omit it.  We've already established that
 338                 * this pathname is not in the sparse-checkout specification
 339                 * with the CURRENT pathname, so we *WANT* to omit this blob.
 340                 *
 341                 * However, a pathname elsewhere in the tree may also
 342                 * reference this same blob, so we cannot reject it yet.
 343                 * Leave the LOFR_ bits unset so that if the blob appears
 344                 * again in the traversal, we will be asked again.
 345                 */
 346                if (filter_data->omits)
 347                        oidset_insert(filter_data->omits, &obj->oid);
 348
 349                /*
 350                 * Remember that at least 1 blob in this tree was
 351                 * provisionally omitted.  This prevents us from short
 352                 * cutting the tree in future iterations.
 353                 */
 354                frame->child_prov_omit = 1;
 355                return LOFR_ZERO;
 356        }
 357}
 358
 359
 360static void filter_sparse_free(void *filter_data)
 361{
 362        struct filter_sparse_data *d = filter_data;
 363        /* TODO free contents of 'd' */
 364        free(d);
 365}
 366
 367static void *filter_sparse_oid__init(
 368        struct oidset *omitted,
 369        struct list_objects_filter_options *filter_options,
 370        filter_object_fn *filter_fn,
 371        filter_free_fn *filter_free_fn)
 372{
 373        struct filter_sparse_data *d = xcalloc(1, sizeof(*d));
 374        d->omits = omitted;
 375        if (add_excludes_from_blob_to_list(filter_options->sparse_oid_value,
 376                                           NULL, 0, &d->el) < 0)
 377                die("could not load filter specification");
 378
 379        ALLOC_GROW(d->array_frame, d->nr + 1, d->alloc);
 380        d->array_frame[d->nr].defval = 0; /* default to include */
 381        d->array_frame[d->nr].child_prov_omit = 0;
 382
 383        *filter_fn = filter_sparse;
 384        *filter_free_fn = filter_sparse_free;
 385        return d;
 386}
 387
 388static void *filter_sparse_path__init(
 389        struct oidset *omitted,
 390        struct list_objects_filter_options *filter_options,
 391        filter_object_fn *filter_fn,
 392        filter_free_fn *filter_free_fn)
 393{
 394        struct filter_sparse_data *d = xcalloc(1, sizeof(*d));
 395        d->omits = omitted;
 396        if (add_excludes_from_file_to_list(filter_options->sparse_path_value,
 397                                           NULL, 0, &d->el, NULL) < 0)
 398                die("could not load filter specification");
 399
 400        ALLOC_GROW(d->array_frame, d->nr + 1, d->alloc);
 401        d->array_frame[d->nr].defval = 0; /* default to include */
 402        d->array_frame[d->nr].child_prov_omit = 0;
 403
 404        *filter_fn = filter_sparse;
 405        *filter_free_fn = filter_sparse_free;
 406        return d;
 407}
 408
 409typedef void *(*filter_init_fn)(
 410        struct oidset *omitted,
 411        struct list_objects_filter_options *filter_options,
 412        filter_object_fn *filter_fn,
 413        filter_free_fn *filter_free_fn);
 414
 415/*
 416 * Must match "enum list_objects_filter_choice".
 417 */
 418static filter_init_fn s_filters[] = {
 419        NULL,
 420        filter_blobs_none__init,
 421        filter_blobs_limit__init,
 422        filter_trees_none__init,
 423        filter_sparse_oid__init,
 424        filter_sparse_path__init,
 425};
 426
 427void *list_objects_filter__init(
 428        struct oidset *omitted,
 429        struct list_objects_filter_options *filter_options,
 430        filter_object_fn *filter_fn,
 431        filter_free_fn *filter_free_fn)
 432{
 433        filter_init_fn init_fn;
 434
 435        assert((sizeof(s_filters) / sizeof(s_filters[0])) == LOFC__COUNT);
 436
 437        if (filter_options->choice >= LOFC__COUNT)
 438                BUG("invalid list-objects filter choice: %d",
 439                    filter_options->choice);
 440
 441        init_fn = s_filters[filter_options->choice];
 442        if (init_fn)
 443                return init_fn(omitted, filter_options,
 444                               filter_fn, filter_free_fn);
 445        *filter_fn = NULL;
 446        *filter_free_fn = NULL;
 447        return NULL;
 448}