list-objects-filter.con commit list-objects-filter: use BUG rather than die (696aa73)
   1#include "cache.h"
   2#include "dir.h"
   3#include "tag.h"
   4#include "commit.h"
   5#include "tree.h"
   6#include "blob.h"
   7#include "diff.h"
   8#include "tree-walk.h"
   9#include "revision.h"
  10#include "list-objects.h"
  11#include "list-objects-filter.h"
  12#include "list-objects-filter-options.h"
  13#include "oidset.h"
  14#include "object-store.h"
  15
  16/* Remember to update object flag allocation in object.h */
  17/*
  18 * FILTER_SHOWN_BUT_REVISIT -- we set this bit on tree objects
  19 * that have been shown, but should be revisited if they appear
  20 * in the traversal (until we mark it SEEN).  This is a way to
  21 * let us silently de-dup calls to show() in the caller.  This
  22 * is subtly different from the "revision.h:SHOWN" and the
  23 * "sha1-name.c:ONELINE_SEEN" bits.  And also different from
  24 * the non-de-dup usage in pack-bitmap.c
  25 */
  26#define FILTER_SHOWN_BUT_REVISIT (1<<21)
  27
  28/*
  29 * A filter for list-objects to omit ALL blobs from the traversal.
  30 * And to OPTIONALLY collect a list of the omitted OIDs.
  31 */
  32struct filter_blobs_none_data {
  33        struct oidset *omits;
  34};
  35
  36static enum list_objects_filter_result filter_blobs_none(
  37        enum list_objects_filter_situation filter_situation,
  38        struct object *obj,
  39        const char *pathname,
  40        const char *filename,
  41        void *filter_data_)
  42{
  43        struct filter_blobs_none_data *filter_data = filter_data_;
  44
  45        switch (filter_situation) {
  46        default:
  47                BUG("unknown filter_situation: %d", filter_situation);
  48
  49        case LOFS_BEGIN_TREE:
  50                assert(obj->type == OBJ_TREE);
  51                /* always include all tree objects */
  52                return LOFR_MARK_SEEN | LOFR_DO_SHOW;
  53
  54        case LOFS_END_TREE:
  55                assert(obj->type == OBJ_TREE);
  56                return LOFR_ZERO;
  57
  58        case LOFS_BLOB:
  59                assert(obj->type == OBJ_BLOB);
  60                assert((obj->flags & SEEN) == 0);
  61
  62                if (filter_data->omits)
  63                        oidset_insert(filter_data->omits, &obj->oid);
  64                return LOFR_MARK_SEEN; /* but not LOFR_DO_SHOW (hard omit) */
  65        }
  66}
  67
  68static void *filter_blobs_none__init(
  69        struct oidset *omitted,
  70        struct list_objects_filter_options *filter_options,
  71        filter_object_fn *filter_fn,
  72        filter_free_fn *filter_free_fn)
  73{
  74        struct filter_blobs_none_data *d = xcalloc(1, sizeof(*d));
  75        d->omits = omitted;
  76
  77        *filter_fn = filter_blobs_none;
  78        *filter_free_fn = free;
  79        return d;
  80}
  81
  82/*
  83 * A filter for list-objects to omit large blobs.
  84 * And to OPTIONALLY collect a list of the omitted OIDs.
  85 */
  86struct filter_blobs_limit_data {
  87        struct oidset *omits;
  88        unsigned long max_bytes;
  89};
  90
  91static enum list_objects_filter_result filter_blobs_limit(
  92        enum list_objects_filter_situation filter_situation,
  93        struct object *obj,
  94        const char *pathname,
  95        const char *filename,
  96        void *filter_data_)
  97{
  98        struct filter_blobs_limit_data *filter_data = filter_data_;
  99        unsigned long object_length;
 100        enum object_type t;
 101
 102        switch (filter_situation) {
 103        default:
 104                BUG("unknown filter_situation: %d", filter_situation);
 105
 106        case LOFS_BEGIN_TREE:
 107                assert(obj->type == OBJ_TREE);
 108                /* always include all tree objects */
 109                return LOFR_MARK_SEEN | LOFR_DO_SHOW;
 110
 111        case LOFS_END_TREE:
 112                assert(obj->type == OBJ_TREE);
 113                return LOFR_ZERO;
 114
 115        case LOFS_BLOB:
 116                assert(obj->type == OBJ_BLOB);
 117                assert((obj->flags & SEEN) == 0);
 118
 119                t = oid_object_info(the_repository, &obj->oid, &object_length);
 120                if (t != OBJ_BLOB) { /* probably OBJ_NONE */
 121                        /*
 122                         * We DO NOT have the blob locally, so we cannot
 123                         * apply the size filter criteria.  Be conservative
 124                         * and force show it (and let the caller deal with
 125                         * the ambiguity).
 126                         */
 127                        goto include_it;
 128                }
 129
 130                if (object_length < filter_data->max_bytes)
 131                        goto include_it;
 132
 133                if (filter_data->omits)
 134                        oidset_insert(filter_data->omits, &obj->oid);
 135                return LOFR_MARK_SEEN; /* but not LOFR_DO_SHOW (hard omit) */
 136        }
 137
 138include_it:
 139        if (filter_data->omits)
 140                oidset_remove(filter_data->omits, &obj->oid);
 141        return LOFR_MARK_SEEN | LOFR_DO_SHOW;
 142}
 143
 144static void *filter_blobs_limit__init(
 145        struct oidset *omitted,
 146        struct list_objects_filter_options *filter_options,
 147        filter_object_fn *filter_fn,
 148        filter_free_fn *filter_free_fn)
 149{
 150        struct filter_blobs_limit_data *d = xcalloc(1, sizeof(*d));
 151        d->omits = omitted;
 152        d->max_bytes = filter_options->blob_limit_value;
 153
 154        *filter_fn = filter_blobs_limit;
 155        *filter_free_fn = free;
 156        return d;
 157}
 158
 159/*
 160 * A filter driven by a sparse-checkout specification to only
 161 * include blobs that a sparse checkout would populate.
 162 *
 163 * The sparse-checkout spec can be loaded from a blob with the
 164 * given OID or from a local pathname.  We allow an OID because
 165 * the repo may be bare or we may be doing the filtering on the
 166 * server.
 167 */
 168struct frame {
 169        /*
 170         * defval is the usual default include/exclude value that
 171         * should be inherited as we recurse into directories based
 172         * upon pattern matching of the directory itself or of a
 173         * containing directory.
 174         */
 175        int defval;
 176
 177        /*
 178         * 1 if the directory (recursively) contains any provisionally
 179         * omitted objects.
 180         *
 181         * 0 if everything (recursively) contained in this directory
 182         * has been explicitly included (SHOWN) in the result and
 183         * the directory may be short-cut later in the traversal.
 184         */
 185        unsigned child_prov_omit : 1;
 186};
 187
 188struct filter_sparse_data {
 189        struct oidset *omits;
 190        struct exclude_list el;
 191
 192        size_t nr, alloc;
 193        struct frame *array_frame;
 194};
 195
 196static enum list_objects_filter_result filter_sparse(
 197        enum list_objects_filter_situation filter_situation,
 198        struct object *obj,
 199        const char *pathname,
 200        const char *filename,
 201        void *filter_data_)
 202{
 203        struct filter_sparse_data *filter_data = filter_data_;
 204        int val, dtype;
 205        struct frame *frame;
 206
 207        switch (filter_situation) {
 208        default:
 209                BUG("unknown filter_situation: %d", filter_situation);
 210
 211        case LOFS_BEGIN_TREE:
 212                assert(obj->type == OBJ_TREE);
 213                dtype = DT_DIR;
 214                val = is_excluded_from_list(pathname, strlen(pathname),
 215                                            filename, &dtype, &filter_data->el,
 216                                            &the_index);
 217                if (val < 0)
 218                        val = filter_data->array_frame[filter_data->nr].defval;
 219
 220                ALLOC_GROW(filter_data->array_frame, filter_data->nr + 1,
 221                           filter_data->alloc);
 222                filter_data->nr++;
 223                filter_data->array_frame[filter_data->nr].defval = val;
 224                filter_data->array_frame[filter_data->nr].child_prov_omit = 0;
 225
 226                /*
 227                 * A directory with this tree OID may appear in multiple
 228                 * places in the tree. (Think of a directory move or copy,
 229                 * with no other changes, so the OID is the same, but the
 230                 * full pathnames of objects within this directory are new
 231                 * and may match is_excluded() patterns differently.)
 232                 * So we cannot mark this directory as SEEN (yet), since
 233                 * that will prevent process_tree() from revisiting this
 234                 * tree object with other pathname prefixes.
 235                 *
 236                 * Only _DO_SHOW the tree object the first time we visit
 237                 * this tree object.
 238                 *
 239                 * We always show all tree objects.  A future optimization
 240                 * may want to attempt to narrow this.
 241                 */
 242                if (obj->flags & FILTER_SHOWN_BUT_REVISIT)
 243                        return LOFR_ZERO;
 244                obj->flags |= FILTER_SHOWN_BUT_REVISIT;
 245                return LOFR_DO_SHOW;
 246
 247        case LOFS_END_TREE:
 248                assert(obj->type == OBJ_TREE);
 249                assert(filter_data->nr > 0);
 250
 251                frame = &filter_data->array_frame[filter_data->nr];
 252                filter_data->nr--;
 253
 254                /*
 255                 * Tell our parent directory if any of our children were
 256                 * provisionally omitted.
 257                 */
 258                filter_data->array_frame[filter_data->nr].child_prov_omit |=
 259                        frame->child_prov_omit;
 260
 261                /*
 262                 * If there are NO provisionally omitted child objects (ALL child
 263                 * objects in this folder were INCLUDED), then we can mark the
 264                 * folder as SEEN (so we will not have to revisit it again).
 265                 */
 266                if (!frame->child_prov_omit)
 267                        return LOFR_MARK_SEEN;
 268                return LOFR_ZERO;
 269
 270        case LOFS_BLOB:
 271                assert(obj->type == OBJ_BLOB);
 272                assert((obj->flags & SEEN) == 0);
 273
 274                frame = &filter_data->array_frame[filter_data->nr];
 275
 276                dtype = DT_REG;
 277                val = is_excluded_from_list(pathname, strlen(pathname),
 278                                            filename, &dtype, &filter_data->el,
 279                                            &the_index);
 280                if (val < 0)
 281                        val = frame->defval;
 282                if (val > 0) {
 283                        if (filter_data->omits)
 284                                oidset_remove(filter_data->omits, &obj->oid);
 285                        return LOFR_MARK_SEEN | LOFR_DO_SHOW;
 286                }
 287
 288                /*
 289                 * Provisionally omit it.  We've already established that
 290                 * this pathname is not in the sparse-checkout specification
 291                 * with the CURRENT pathname, so we *WANT* to omit this blob.
 292                 *
 293                 * However, a pathname elsewhere in the tree may also
 294                 * reference this same blob, so we cannot reject it yet.
 295                 * Leave the LOFR_ bits unset so that if the blob appears
 296                 * again in the traversal, we will be asked again.
 297                 */
 298                if (filter_data->omits)
 299                        oidset_insert(filter_data->omits, &obj->oid);
 300
 301                /*
 302                 * Remember that at least 1 blob in this tree was
 303                 * provisionally omitted.  This prevents us from short
 304                 * cutting the tree in future iterations.
 305                 */
 306                frame->child_prov_omit = 1;
 307                return LOFR_ZERO;
 308        }
 309}
 310
 311
 312static void filter_sparse_free(void *filter_data)
 313{
 314        struct filter_sparse_data *d = filter_data;
 315        /* TODO free contents of 'd' */
 316        free(d);
 317}
 318
 319static void *filter_sparse_oid__init(
 320        struct oidset *omitted,
 321        struct list_objects_filter_options *filter_options,
 322        filter_object_fn *filter_fn,
 323        filter_free_fn *filter_free_fn)
 324{
 325        struct filter_sparse_data *d = xcalloc(1, sizeof(*d));
 326        d->omits = omitted;
 327        if (add_excludes_from_blob_to_list(filter_options->sparse_oid_value,
 328                                           NULL, 0, &d->el) < 0)
 329                die("could not load filter specification");
 330
 331        ALLOC_GROW(d->array_frame, d->nr + 1, d->alloc);
 332        d->array_frame[d->nr].defval = 0; /* default to include */
 333        d->array_frame[d->nr].child_prov_omit = 0;
 334
 335        *filter_fn = filter_sparse;
 336        *filter_free_fn = filter_sparse_free;
 337        return d;
 338}
 339
 340static void *filter_sparse_path__init(
 341        struct oidset *omitted,
 342        struct list_objects_filter_options *filter_options,
 343        filter_object_fn *filter_fn,
 344        filter_free_fn *filter_free_fn)
 345{
 346        struct filter_sparse_data *d = xcalloc(1, sizeof(*d));
 347        d->omits = omitted;
 348        if (add_excludes_from_file_to_list(filter_options->sparse_path_value,
 349                                           NULL, 0, &d->el, NULL) < 0)
 350                die("could not load filter specification");
 351
 352        ALLOC_GROW(d->array_frame, d->nr + 1, d->alloc);
 353        d->array_frame[d->nr].defval = 0; /* default to include */
 354        d->array_frame[d->nr].child_prov_omit = 0;
 355
 356        *filter_fn = filter_sparse;
 357        *filter_free_fn = filter_sparse_free;
 358        return d;
 359}
 360
 361typedef void *(*filter_init_fn)(
 362        struct oidset *omitted,
 363        struct list_objects_filter_options *filter_options,
 364        filter_object_fn *filter_fn,
 365        filter_free_fn *filter_free_fn);
 366
 367/*
 368 * Must match "enum list_objects_filter_choice".
 369 */
 370static filter_init_fn s_filters[] = {
 371        NULL,
 372        filter_blobs_none__init,
 373        filter_blobs_limit__init,
 374        filter_sparse_oid__init,
 375        filter_sparse_path__init,
 376};
 377
 378void *list_objects_filter__init(
 379        struct oidset *omitted,
 380        struct list_objects_filter_options *filter_options,
 381        filter_object_fn *filter_fn,
 382        filter_free_fn *filter_free_fn)
 383{
 384        filter_init_fn init_fn;
 385
 386        assert((sizeof(s_filters) / sizeof(s_filters[0])) == LOFC__COUNT);
 387
 388        if (filter_options->choice >= LOFC__COUNT)
 389                BUG("invalid list-objects filter choice: %d",
 390                    filter_options->choice);
 391
 392        init_fn = s_filters[filter_options->choice];
 393        if (init_fn)
 394                return init_fn(omitted, filter_options,
 395                               filter_fn, filter_free_fn);
 396        *filter_fn = NULL;
 397        *filter_free_fn = NULL;
 398        return NULL;
 399}