grep.con commit contrib/git-jump/git-jump: jump to exact location (240cf2a)
   1#include "cache.h"
   2#include "config.h"
   3#include "grep.h"
   4#include "userdiff.h"
   5#include "xdiff-interface.h"
   6#include "diff.h"
   7#include "diffcore.h"
   8#include "commit.h"
   9#include "quote.h"
  10
  11static int grep_source_load(struct grep_source *gs);
  12static int grep_source_is_binary(struct grep_source *gs);
  13
  14static struct grep_opt grep_defaults;
  15
  16static void std_output(struct grep_opt *opt, const void *buf, size_t size)
  17{
  18        fwrite(buf, size, 1, stdout);
  19}
  20
  21static void color_set(char *dst, const char *color_bytes)
  22{
  23        xsnprintf(dst, COLOR_MAXLEN, "%s", color_bytes);
  24}
  25
  26/*
  27 * Initialize the grep_defaults template with hardcoded defaults.
  28 * We could let the compiler do this, but without C99 initializers
  29 * the code gets unwieldy and unreadable, so...
  30 */
  31void init_grep_defaults(void)
  32{
  33        struct grep_opt *opt = &grep_defaults;
  34        static int run_once;
  35
  36        if (run_once)
  37                return;
  38        run_once++;
  39
  40        memset(opt, 0, sizeof(*opt));
  41        opt->relative = 1;
  42        opt->pathname = 1;
  43        opt->max_depth = -1;
  44        opt->pattern_type_option = GREP_PATTERN_TYPE_UNSPECIFIED;
  45        color_set(opt->color_context, "");
  46        color_set(opt->color_filename, "");
  47        color_set(opt->color_function, "");
  48        color_set(opt->color_lineno, "");
  49        color_set(opt->color_columnno, "");
  50        color_set(opt->color_match_context, GIT_COLOR_BOLD_RED);
  51        color_set(opt->color_match_selected, GIT_COLOR_BOLD_RED);
  52        color_set(opt->color_selected, "");
  53        color_set(opt->color_sep, GIT_COLOR_CYAN);
  54        opt->color = -1;
  55        opt->output = std_output;
  56}
  57
  58static int parse_pattern_type_arg(const char *opt, const char *arg)
  59{
  60        if (!strcmp(arg, "default"))
  61                return GREP_PATTERN_TYPE_UNSPECIFIED;
  62        else if (!strcmp(arg, "basic"))
  63                return GREP_PATTERN_TYPE_BRE;
  64        else if (!strcmp(arg, "extended"))
  65                return GREP_PATTERN_TYPE_ERE;
  66        else if (!strcmp(arg, "fixed"))
  67                return GREP_PATTERN_TYPE_FIXED;
  68        else if (!strcmp(arg, "perl"))
  69                return GREP_PATTERN_TYPE_PCRE;
  70        die("bad %s argument: %s", opt, arg);
  71}
  72
  73/*
  74 * Read the configuration file once and store it in
  75 * the grep_defaults template.
  76 */
  77int grep_config(const char *var, const char *value, void *cb)
  78{
  79        struct grep_opt *opt = &grep_defaults;
  80        char *color = NULL;
  81
  82        if (userdiff_config(var, value) < 0)
  83                return -1;
  84
  85        if (!strcmp(var, "grep.extendedregexp")) {
  86                opt->extended_regexp_option = git_config_bool(var, value);
  87                return 0;
  88        }
  89
  90        if (!strcmp(var, "grep.patterntype")) {
  91                opt->pattern_type_option = parse_pattern_type_arg(var, value);
  92                return 0;
  93        }
  94
  95        if (!strcmp(var, "grep.linenumber")) {
  96                opt->linenum = git_config_bool(var, value);
  97                return 0;
  98        }
  99        if (!strcmp(var, "grep.column")) {
 100                opt->columnnum = git_config_bool(var, value);
 101                return 0;
 102        }
 103
 104        if (!strcmp(var, "grep.fullname")) {
 105                opt->relative = !git_config_bool(var, value);
 106                return 0;
 107        }
 108
 109        if (!strcmp(var, "color.grep"))
 110                opt->color = git_config_colorbool(var, value);
 111        else if (!strcmp(var, "color.grep.context"))
 112                color = opt->color_context;
 113        else if (!strcmp(var, "color.grep.filename"))
 114                color = opt->color_filename;
 115        else if (!strcmp(var, "color.grep.function"))
 116                color = opt->color_function;
 117        else if (!strcmp(var, "color.grep.linenumber"))
 118                color = opt->color_lineno;
 119        else if (!strcmp(var, "color.grep.column"))
 120                color = opt->color_columnno;
 121        else if (!strcmp(var, "color.grep.matchcontext"))
 122                color = opt->color_match_context;
 123        else if (!strcmp(var, "color.grep.matchselected"))
 124                color = opt->color_match_selected;
 125        else if (!strcmp(var, "color.grep.selected"))
 126                color = opt->color_selected;
 127        else if (!strcmp(var, "color.grep.separator"))
 128                color = opt->color_sep;
 129        else if (!strcmp(var, "color.grep.match")) {
 130                int rc = 0;
 131                if (!value)
 132                        return config_error_nonbool(var);
 133                rc |= color_parse(value, opt->color_match_context);
 134                rc |= color_parse(value, opt->color_match_selected);
 135                return rc;
 136        }
 137
 138        if (color) {
 139                if (!value)
 140                        return config_error_nonbool(var);
 141                return color_parse(value, color);
 142        }
 143        return 0;
 144}
 145
 146/*
 147 * Initialize one instance of grep_opt and copy the
 148 * default values from the template we read the configuration
 149 * information in an earlier call to git_config(grep_config).
 150 */
 151void grep_init(struct grep_opt *opt, const char *prefix)
 152{
 153        struct grep_opt *def = &grep_defaults;
 154
 155        memset(opt, 0, sizeof(*opt));
 156        opt->prefix = prefix;
 157        opt->prefix_length = (prefix && *prefix) ? strlen(prefix) : 0;
 158        opt->pattern_tail = &opt->pattern_list;
 159        opt->header_tail = &opt->header_list;
 160
 161        opt->color = def->color;
 162        opt->extended_regexp_option = def->extended_regexp_option;
 163        opt->pattern_type_option = def->pattern_type_option;
 164        opt->linenum = def->linenum;
 165        opt->columnnum = def->columnnum;
 166        opt->max_depth = def->max_depth;
 167        opt->pathname = def->pathname;
 168        opt->relative = def->relative;
 169        opt->output = def->output;
 170
 171        color_set(opt->color_context, def->color_context);
 172        color_set(opt->color_filename, def->color_filename);
 173        color_set(opt->color_function, def->color_function);
 174        color_set(opt->color_lineno, def->color_lineno);
 175        color_set(opt->color_columnno, def->color_columnno);
 176        color_set(opt->color_match_context, def->color_match_context);
 177        color_set(opt->color_match_selected, def->color_match_selected);
 178        color_set(opt->color_selected, def->color_selected);
 179        color_set(opt->color_sep, def->color_sep);
 180}
 181
 182static void grep_set_pattern_type_option(enum grep_pattern_type pattern_type, struct grep_opt *opt)
 183{
 184        /*
 185         * When committing to the pattern type by setting the relevant
 186         * fields in grep_opt it's generally not necessary to zero out
 187         * the fields we're not choosing, since they won't have been
 188         * set by anything. The extended_regexp_option field is the
 189         * only exception to this.
 190         *
 191         * This is because in the process of parsing grep.patternType
 192         * & grep.extendedRegexp we set opt->pattern_type_option and
 193         * opt->extended_regexp_option, respectively. We then
 194         * internally use opt->extended_regexp_option to see if we're
 195         * compiling an ERE. It must be unset if that's not actually
 196         * the case.
 197         */
 198        if (pattern_type != GREP_PATTERN_TYPE_ERE &&
 199            opt->extended_regexp_option)
 200                opt->extended_regexp_option = 0;
 201
 202        switch (pattern_type) {
 203        case GREP_PATTERN_TYPE_UNSPECIFIED:
 204                /* fall through */
 205
 206        case GREP_PATTERN_TYPE_BRE:
 207                break;
 208
 209        case GREP_PATTERN_TYPE_ERE:
 210                opt->extended_regexp_option = 1;
 211                break;
 212
 213        case GREP_PATTERN_TYPE_FIXED:
 214                opt->fixed = 1;
 215                break;
 216
 217        case GREP_PATTERN_TYPE_PCRE:
 218#ifdef USE_LIBPCRE2
 219                opt->pcre2 = 1;
 220#else
 221                /*
 222                 * It's important that pcre1 always be assigned to
 223                 * even when there's no USE_LIBPCRE* defined. We still
 224                 * call the PCRE stub function, it just dies with
 225                 * "cannot use Perl-compatible regexes[...]".
 226                 */
 227                opt->pcre1 = 1;
 228#endif
 229                break;
 230        }
 231}
 232
 233void grep_commit_pattern_type(enum grep_pattern_type pattern_type, struct grep_opt *opt)
 234{
 235        if (pattern_type != GREP_PATTERN_TYPE_UNSPECIFIED)
 236                grep_set_pattern_type_option(pattern_type, opt);
 237        else if (opt->pattern_type_option != GREP_PATTERN_TYPE_UNSPECIFIED)
 238                grep_set_pattern_type_option(opt->pattern_type_option, opt);
 239        else if (opt->extended_regexp_option)
 240                /*
 241                 * This branch *must* happen after setting from the
 242                 * opt->pattern_type_option above, we don't want
 243                 * grep.extendedRegexp to override grep.patternType!
 244                 */
 245                grep_set_pattern_type_option(GREP_PATTERN_TYPE_ERE, opt);
 246}
 247
 248static struct grep_pat *create_grep_pat(const char *pat, size_t patlen,
 249                                        const char *origin, int no,
 250                                        enum grep_pat_token t,
 251                                        enum grep_header_field field)
 252{
 253        struct grep_pat *p = xcalloc(1, sizeof(*p));
 254        p->pattern = xmemdupz(pat, patlen);
 255        p->patternlen = patlen;
 256        p->origin = origin;
 257        p->no = no;
 258        p->token = t;
 259        p->field = field;
 260        return p;
 261}
 262
 263static void do_append_grep_pat(struct grep_pat ***tail, struct grep_pat *p)
 264{
 265        **tail = p;
 266        *tail = &p->next;
 267        p->next = NULL;
 268
 269        switch (p->token) {
 270        case GREP_PATTERN: /* atom */
 271        case GREP_PATTERN_HEAD:
 272        case GREP_PATTERN_BODY:
 273                for (;;) {
 274                        struct grep_pat *new_pat;
 275                        size_t len = 0;
 276                        char *cp = p->pattern + p->patternlen, *nl = NULL;
 277                        while (++len <= p->patternlen) {
 278                                if (*(--cp) == '\n') {
 279                                        nl = cp;
 280                                        break;
 281                                }
 282                        }
 283                        if (!nl)
 284                                break;
 285                        new_pat = create_grep_pat(nl + 1, len - 1, p->origin,
 286                                                  p->no, p->token, p->field);
 287                        new_pat->next = p->next;
 288                        if (!p->next)
 289                                *tail = &new_pat->next;
 290                        p->next = new_pat;
 291                        *nl = '\0';
 292                        p->patternlen -= len;
 293                }
 294                break;
 295        default:
 296                break;
 297        }
 298}
 299
 300void append_header_grep_pattern(struct grep_opt *opt,
 301                                enum grep_header_field field, const char *pat)
 302{
 303        struct grep_pat *p = create_grep_pat(pat, strlen(pat), "header", 0,
 304                                             GREP_PATTERN_HEAD, field);
 305        if (field == GREP_HEADER_REFLOG)
 306                opt->use_reflog_filter = 1;
 307        do_append_grep_pat(&opt->header_tail, p);
 308}
 309
 310void append_grep_pattern(struct grep_opt *opt, const char *pat,
 311                         const char *origin, int no, enum grep_pat_token t)
 312{
 313        append_grep_pat(opt, pat, strlen(pat), origin, no, t);
 314}
 315
 316void append_grep_pat(struct grep_opt *opt, const char *pat, size_t patlen,
 317                     const char *origin, int no, enum grep_pat_token t)
 318{
 319        struct grep_pat *p = create_grep_pat(pat, patlen, origin, no, t, 0);
 320        do_append_grep_pat(&opt->pattern_tail, p);
 321}
 322
 323struct grep_opt *grep_opt_dup(const struct grep_opt *opt)
 324{
 325        struct grep_pat *pat;
 326        struct grep_opt *ret = xmalloc(sizeof(struct grep_opt));
 327        *ret = *opt;
 328
 329        ret->pattern_list = NULL;
 330        ret->pattern_tail = &ret->pattern_list;
 331
 332        for(pat = opt->pattern_list; pat != NULL; pat = pat->next)
 333        {
 334                if(pat->token == GREP_PATTERN_HEAD)
 335                        append_header_grep_pattern(ret, pat->field,
 336                                                   pat->pattern);
 337                else
 338                        append_grep_pat(ret, pat->pattern, pat->patternlen,
 339                                        pat->origin, pat->no, pat->token);
 340        }
 341
 342        return ret;
 343}
 344
 345static NORETURN void compile_regexp_failed(const struct grep_pat *p,
 346                const char *error)
 347{
 348        char where[1024];
 349
 350        if (p->no)
 351                xsnprintf(where, sizeof(where), "In '%s' at %d, ", p->origin, p->no);
 352        else if (p->origin)
 353                xsnprintf(where, sizeof(where), "%s, ", p->origin);
 354        else
 355                where[0] = 0;
 356
 357        die("%s'%s': %s", where, p->pattern, error);
 358}
 359
 360static int is_fixed(const char *s, size_t len)
 361{
 362        size_t i;
 363
 364        for (i = 0; i < len; i++) {
 365                if (is_regex_special(s[i]))
 366                        return 0;
 367        }
 368
 369        return 1;
 370}
 371
 372static int has_null(const char *s, size_t len)
 373{
 374        /*
 375         * regcomp cannot accept patterns with NULs so when using it
 376         * we consider any pattern containing a NUL fixed.
 377         */
 378        if (memchr(s, 0, len))
 379                return 1;
 380
 381        return 0;
 382}
 383
 384#ifdef USE_LIBPCRE1
 385static void compile_pcre1_regexp(struct grep_pat *p, const struct grep_opt *opt)
 386{
 387        const char *error;
 388        int erroffset;
 389        int options = PCRE_MULTILINE;
 390
 391        if (opt->ignore_case) {
 392                if (has_non_ascii(p->pattern))
 393                        p->pcre1_tables = pcre_maketables();
 394                options |= PCRE_CASELESS;
 395        }
 396        if (is_utf8_locale() && has_non_ascii(p->pattern))
 397                options |= PCRE_UTF8;
 398
 399        p->pcre1_regexp = pcre_compile(p->pattern, options, &error, &erroffset,
 400                                      p->pcre1_tables);
 401        if (!p->pcre1_regexp)
 402                compile_regexp_failed(p, error);
 403
 404        p->pcre1_extra_info = pcre_study(p->pcre1_regexp, GIT_PCRE_STUDY_JIT_COMPILE, &error);
 405        if (!p->pcre1_extra_info && error)
 406                die("%s", error);
 407
 408#ifdef GIT_PCRE1_USE_JIT
 409        pcre_config(PCRE_CONFIG_JIT, &p->pcre1_jit_on);
 410        if (p->pcre1_jit_on == 1) {
 411                p->pcre1_jit_stack = pcre_jit_stack_alloc(1, 1024 * 1024);
 412                if (!p->pcre1_jit_stack)
 413                        die("Couldn't allocate PCRE JIT stack");
 414                pcre_assign_jit_stack(p->pcre1_extra_info, NULL, p->pcre1_jit_stack);
 415        } else if (p->pcre1_jit_on != 0) {
 416                BUG("The pcre1_jit_on variable should be 0 or 1, not %d",
 417                    p->pcre1_jit_on);
 418        }
 419#endif
 420}
 421
 422static int pcre1match(struct grep_pat *p, const char *line, const char *eol,
 423                regmatch_t *match, int eflags)
 424{
 425        int ovector[30], ret, flags = 0;
 426
 427        if (eflags & REG_NOTBOL)
 428                flags |= PCRE_NOTBOL;
 429
 430#ifdef GIT_PCRE1_USE_JIT
 431        if (p->pcre1_jit_on) {
 432                ret = pcre_jit_exec(p->pcre1_regexp, p->pcre1_extra_info, line,
 433                                    eol - line, 0, flags, ovector,
 434                                    ARRAY_SIZE(ovector), p->pcre1_jit_stack);
 435        } else
 436#endif
 437        {
 438                ret = pcre_exec(p->pcre1_regexp, p->pcre1_extra_info, line,
 439                                eol - line, 0, flags, ovector,
 440                                ARRAY_SIZE(ovector));
 441        }
 442
 443        if (ret < 0 && ret != PCRE_ERROR_NOMATCH)
 444                die("pcre_exec failed with error code %d", ret);
 445        if (ret > 0) {
 446                ret = 0;
 447                match->rm_so = ovector[0];
 448                match->rm_eo = ovector[1];
 449        }
 450
 451        return ret;
 452}
 453
 454static void free_pcre1_regexp(struct grep_pat *p)
 455{
 456        pcre_free(p->pcre1_regexp);
 457#ifdef GIT_PCRE1_USE_JIT
 458        if (p->pcre1_jit_on) {
 459                pcre_free_study(p->pcre1_extra_info);
 460                pcre_jit_stack_free(p->pcre1_jit_stack);
 461        } else
 462#endif
 463        {
 464                pcre_free(p->pcre1_extra_info);
 465        }
 466        pcre_free((void *)p->pcre1_tables);
 467}
 468#else /* !USE_LIBPCRE1 */
 469static void compile_pcre1_regexp(struct grep_pat *p, const struct grep_opt *opt)
 470{
 471        die("cannot use Perl-compatible regexes when not compiled with USE_LIBPCRE");
 472}
 473
 474static int pcre1match(struct grep_pat *p, const char *line, const char *eol,
 475                regmatch_t *match, int eflags)
 476{
 477        return 1;
 478}
 479
 480static void free_pcre1_regexp(struct grep_pat *p)
 481{
 482}
 483#endif /* !USE_LIBPCRE1 */
 484
 485#ifdef USE_LIBPCRE2
 486static void compile_pcre2_pattern(struct grep_pat *p, const struct grep_opt *opt)
 487{
 488        int error;
 489        PCRE2_UCHAR errbuf[256];
 490        PCRE2_SIZE erroffset;
 491        int options = PCRE2_MULTILINE;
 492        const uint8_t *character_tables = NULL;
 493        int jitret;
 494        int patinforet;
 495        size_t jitsizearg;
 496
 497        assert(opt->pcre2);
 498
 499        p->pcre2_compile_context = NULL;
 500
 501        if (opt->ignore_case) {
 502                if (has_non_ascii(p->pattern)) {
 503                        character_tables = pcre2_maketables(NULL);
 504                        p->pcre2_compile_context = pcre2_compile_context_create(NULL);
 505                        pcre2_set_character_tables(p->pcre2_compile_context, character_tables);
 506                }
 507                options |= PCRE2_CASELESS;
 508        }
 509        if (is_utf8_locale() && has_non_ascii(p->pattern))
 510                options |= PCRE2_UTF;
 511
 512        p->pcre2_pattern = pcre2_compile((PCRE2_SPTR)p->pattern,
 513                                         p->patternlen, options, &error, &erroffset,
 514                                         p->pcre2_compile_context);
 515
 516        if (p->pcre2_pattern) {
 517                p->pcre2_match_data = pcre2_match_data_create_from_pattern(p->pcre2_pattern, NULL);
 518                if (!p->pcre2_match_data)
 519                        die("Couldn't allocate PCRE2 match data");
 520        } else {
 521                pcre2_get_error_message(error, errbuf, sizeof(errbuf));
 522                compile_regexp_failed(p, (const char *)&errbuf);
 523        }
 524
 525        pcre2_config(PCRE2_CONFIG_JIT, &p->pcre2_jit_on);
 526        if (p->pcre2_jit_on == 1) {
 527                jitret = pcre2_jit_compile(p->pcre2_pattern, PCRE2_JIT_COMPLETE);
 528                if (jitret)
 529                        die("Couldn't JIT the PCRE2 pattern '%s', got '%d'\n", p->pattern, jitret);
 530
 531                /*
 532                 * The pcre2_config(PCRE2_CONFIG_JIT, ...) call just
 533                 * tells us whether the library itself supports JIT,
 534                 * but to see whether we're going to be actually using
 535                 * JIT we need to extract PCRE2_INFO_JITSIZE from the
 536                 * pattern *after* we do pcre2_jit_compile() above.
 537                 *
 538                 * This is because if the pattern contains the
 539                 * (*NO_JIT) verb (see pcre2syntax(3))
 540                 * pcre2_jit_compile() will exit early with 0. If we
 541                 * then proceed to call pcre2_jit_match() further down
 542                 * the line instead of pcre2_match() we'll either
 543                 * segfault (pre PCRE 10.31) or run into a fatal error
 544                 * (post PCRE2 10.31)
 545                 */
 546                patinforet = pcre2_pattern_info(p->pcre2_pattern, PCRE2_INFO_JITSIZE, &jitsizearg);
 547                if (patinforet)
 548                        BUG("pcre2_pattern_info() failed: %d", patinforet);
 549                if (jitsizearg == 0) {
 550                        p->pcre2_jit_on = 0;
 551                        return;
 552                }
 553
 554                p->pcre2_jit_stack = pcre2_jit_stack_create(1, 1024 * 1024, NULL);
 555                if (!p->pcre2_jit_stack)
 556                        die("Couldn't allocate PCRE2 JIT stack");
 557                p->pcre2_match_context = pcre2_match_context_create(NULL);
 558                if (!p->pcre2_match_context)
 559                        die("Couldn't allocate PCRE2 match context");
 560                pcre2_jit_stack_assign(p->pcre2_match_context, NULL, p->pcre2_jit_stack);
 561        } else if (p->pcre2_jit_on != 0) {
 562                BUG("The pcre2_jit_on variable should be 0 or 1, not %d",
 563                    p->pcre1_jit_on);
 564        }
 565}
 566
 567static int pcre2match(struct grep_pat *p, const char *line, const char *eol,
 568                regmatch_t *match, int eflags)
 569{
 570        int ret, flags = 0;
 571        PCRE2_SIZE *ovector;
 572        PCRE2_UCHAR errbuf[256];
 573
 574        if (eflags & REG_NOTBOL)
 575                flags |= PCRE2_NOTBOL;
 576
 577        if (p->pcre2_jit_on)
 578                ret = pcre2_jit_match(p->pcre2_pattern, (unsigned char *)line,
 579                                      eol - line, 0, flags, p->pcre2_match_data,
 580                                      NULL);
 581        else
 582                ret = pcre2_match(p->pcre2_pattern, (unsigned char *)line,
 583                                  eol - line, 0, flags, p->pcre2_match_data,
 584                                  NULL);
 585
 586        if (ret < 0 && ret != PCRE2_ERROR_NOMATCH) {
 587                pcre2_get_error_message(ret, errbuf, sizeof(errbuf));
 588                die("%s failed with error code %d: %s",
 589                    (p->pcre2_jit_on ? "pcre2_jit_match" : "pcre2_match"), ret,
 590                    errbuf);
 591        }
 592        if (ret > 0) {
 593                ovector = pcre2_get_ovector_pointer(p->pcre2_match_data);
 594                ret = 0;
 595                match->rm_so = (int)ovector[0];
 596                match->rm_eo = (int)ovector[1];
 597        }
 598
 599        return ret;
 600}
 601
 602static void free_pcre2_pattern(struct grep_pat *p)
 603{
 604        pcre2_compile_context_free(p->pcre2_compile_context);
 605        pcre2_code_free(p->pcre2_pattern);
 606        pcre2_match_data_free(p->pcre2_match_data);
 607        pcre2_jit_stack_free(p->pcre2_jit_stack);
 608        pcre2_match_context_free(p->pcre2_match_context);
 609}
 610#else /* !USE_LIBPCRE2 */
 611static void compile_pcre2_pattern(struct grep_pat *p, const struct grep_opt *opt)
 612{
 613        /*
 614         * Unreachable until USE_LIBPCRE2 becomes synonymous with
 615         * USE_LIBPCRE. See the sibling comment in
 616         * grep_set_pattern_type_option().
 617         */
 618        die("cannot use Perl-compatible regexes when not compiled with USE_LIBPCRE");
 619}
 620
 621static int pcre2match(struct grep_pat *p, const char *line, const char *eol,
 622                regmatch_t *match, int eflags)
 623{
 624        return 1;
 625}
 626
 627static void free_pcre2_pattern(struct grep_pat *p)
 628{
 629}
 630#endif /* !USE_LIBPCRE2 */
 631
 632static void compile_fixed_regexp(struct grep_pat *p, struct grep_opt *opt)
 633{
 634        struct strbuf sb = STRBUF_INIT;
 635        int err;
 636        int regflags = 0;
 637
 638        basic_regex_quote_buf(&sb, p->pattern);
 639        if (opt->ignore_case)
 640                regflags |= REG_ICASE;
 641        err = regcomp(&p->regexp, sb.buf, regflags);
 642        if (opt->debug)
 643                fprintf(stderr, "fixed %s\n", sb.buf);
 644        strbuf_release(&sb);
 645        if (err) {
 646                char errbuf[1024];
 647                regerror(err, &p->regexp, errbuf, sizeof(errbuf));
 648                compile_regexp_failed(p, errbuf);
 649        }
 650}
 651
 652static void compile_regexp(struct grep_pat *p, struct grep_opt *opt)
 653{
 654        int ascii_only;
 655        int err;
 656        int regflags = REG_NEWLINE;
 657
 658        p->word_regexp = opt->word_regexp;
 659        p->ignore_case = opt->ignore_case;
 660        ascii_only     = !has_non_ascii(p->pattern);
 661
 662        /*
 663         * Even when -F (fixed) asks us to do a non-regexp search, we
 664         * may not be able to correctly case-fold when -i
 665         * (ignore-case) is asked (in which case, we'll synthesize a
 666         * regexp to match the pattern that matches regexp special
 667         * characters literally, while ignoring case differences).  On
 668         * the other hand, even without -F, if the pattern does not
 669         * have any regexp special characters and there is no need for
 670         * case-folding search, we can internally turn it into a
 671         * simple string match using kws.  p->fixed tells us if we
 672         * want to use kws.
 673         */
 674        if (opt->fixed ||
 675            has_null(p->pattern, p->patternlen) ||
 676            is_fixed(p->pattern, p->patternlen))
 677                p->fixed = !p->ignore_case || ascii_only;
 678
 679        if (p->fixed) {
 680                p->kws = kwsalloc(p->ignore_case ? tolower_trans_tbl : NULL);
 681                kwsincr(p->kws, p->pattern, p->patternlen);
 682                kwsprep(p->kws);
 683                return;
 684        } else if (opt->fixed) {
 685                /*
 686                 * We come here when the pattern has the non-ascii
 687                 * characters we cannot case-fold, and asked to
 688                 * ignore-case.
 689                 */
 690                compile_fixed_regexp(p, opt);
 691                return;
 692        }
 693
 694        if (opt->pcre2) {
 695                compile_pcre2_pattern(p, opt);
 696                return;
 697        }
 698
 699        if (opt->pcre1) {
 700                compile_pcre1_regexp(p, opt);
 701                return;
 702        }
 703
 704        if (p->ignore_case)
 705                regflags |= REG_ICASE;
 706        if (opt->extended_regexp_option)
 707                regflags |= REG_EXTENDED;
 708        err = regcomp(&p->regexp, p->pattern, regflags);
 709        if (err) {
 710                char errbuf[1024];
 711                regerror(err, &p->regexp, errbuf, 1024);
 712                compile_regexp_failed(p, errbuf);
 713        }
 714}
 715
 716static struct grep_expr *compile_pattern_or(struct grep_pat **);
 717static struct grep_expr *compile_pattern_atom(struct grep_pat **list)
 718{
 719        struct grep_pat *p;
 720        struct grep_expr *x;
 721
 722        p = *list;
 723        if (!p)
 724                return NULL;
 725        switch (p->token) {
 726        case GREP_PATTERN: /* atom */
 727        case GREP_PATTERN_HEAD:
 728        case GREP_PATTERN_BODY:
 729                x = xcalloc(1, sizeof (struct grep_expr));
 730                x->node = GREP_NODE_ATOM;
 731                x->u.atom = p;
 732                *list = p->next;
 733                return x;
 734        case GREP_OPEN_PAREN:
 735                *list = p->next;
 736                x = compile_pattern_or(list);
 737                if (!*list || (*list)->token != GREP_CLOSE_PAREN)
 738                        die("unmatched parenthesis");
 739                *list = (*list)->next;
 740                return x;
 741        default:
 742                return NULL;
 743        }
 744}
 745
 746static struct grep_expr *compile_pattern_not(struct grep_pat **list)
 747{
 748        struct grep_pat *p;
 749        struct grep_expr *x;
 750
 751        p = *list;
 752        if (!p)
 753                return NULL;
 754        switch (p->token) {
 755        case GREP_NOT:
 756                if (!p->next)
 757                        die("--not not followed by pattern expression");
 758                *list = p->next;
 759                x = xcalloc(1, sizeof (struct grep_expr));
 760                x->node = GREP_NODE_NOT;
 761                x->u.unary = compile_pattern_not(list);
 762                if (!x->u.unary)
 763                        die("--not followed by non pattern expression");
 764                return x;
 765        default:
 766                return compile_pattern_atom(list);
 767        }
 768}
 769
 770static struct grep_expr *compile_pattern_and(struct grep_pat **list)
 771{
 772        struct grep_pat *p;
 773        struct grep_expr *x, *y, *z;
 774
 775        x = compile_pattern_not(list);
 776        p = *list;
 777        if (p && p->token == GREP_AND) {
 778                if (!p->next)
 779                        die("--and not followed by pattern expression");
 780                *list = p->next;
 781                y = compile_pattern_and(list);
 782                if (!y)
 783                        die("--and not followed by pattern expression");
 784                z = xcalloc(1, sizeof (struct grep_expr));
 785                z->node = GREP_NODE_AND;
 786                z->u.binary.left = x;
 787                z->u.binary.right = y;
 788                return z;
 789        }
 790        return x;
 791}
 792
 793static struct grep_expr *compile_pattern_or(struct grep_pat **list)
 794{
 795        struct grep_pat *p;
 796        struct grep_expr *x, *y, *z;
 797
 798        x = compile_pattern_and(list);
 799        p = *list;
 800        if (x && p && p->token != GREP_CLOSE_PAREN) {
 801                y = compile_pattern_or(list);
 802                if (!y)
 803                        die("not a pattern expression %s", p->pattern);
 804                z = xcalloc(1, sizeof (struct grep_expr));
 805                z->node = GREP_NODE_OR;
 806                z->u.binary.left = x;
 807                z->u.binary.right = y;
 808                return z;
 809        }
 810        return x;
 811}
 812
 813static struct grep_expr *compile_pattern_expr(struct grep_pat **list)
 814{
 815        return compile_pattern_or(list);
 816}
 817
 818static void indent(int in)
 819{
 820        while (in-- > 0)
 821                fputc(' ', stderr);
 822}
 823
 824static void dump_grep_pat(struct grep_pat *p)
 825{
 826        switch (p->token) {
 827        case GREP_AND: fprintf(stderr, "*and*"); break;
 828        case GREP_OPEN_PAREN: fprintf(stderr, "*(*"); break;
 829        case GREP_CLOSE_PAREN: fprintf(stderr, "*)*"); break;
 830        case GREP_NOT: fprintf(stderr, "*not*"); break;
 831        case GREP_OR: fprintf(stderr, "*or*"); break;
 832
 833        case GREP_PATTERN: fprintf(stderr, "pattern"); break;
 834        case GREP_PATTERN_HEAD: fprintf(stderr, "pattern_head"); break;
 835        case GREP_PATTERN_BODY: fprintf(stderr, "pattern_body"); break;
 836        }
 837
 838        switch (p->token) {
 839        default: break;
 840        case GREP_PATTERN_HEAD:
 841                fprintf(stderr, "<head %d>", p->field); break;
 842        case GREP_PATTERN_BODY:
 843                fprintf(stderr, "<body>"); break;
 844        }
 845        switch (p->token) {
 846        default: break;
 847        case GREP_PATTERN_HEAD:
 848        case GREP_PATTERN_BODY:
 849        case GREP_PATTERN:
 850                fprintf(stderr, "%.*s", (int)p->patternlen, p->pattern);
 851                break;
 852        }
 853        fputc('\n', stderr);
 854}
 855
 856static void dump_grep_expression_1(struct grep_expr *x, int in)
 857{
 858        indent(in);
 859        switch (x->node) {
 860        case GREP_NODE_TRUE:
 861                fprintf(stderr, "true\n");
 862                break;
 863        case GREP_NODE_ATOM:
 864                dump_grep_pat(x->u.atom);
 865                break;
 866        case GREP_NODE_NOT:
 867                fprintf(stderr, "(not\n");
 868                dump_grep_expression_1(x->u.unary, in+1);
 869                indent(in);
 870                fprintf(stderr, ")\n");
 871                break;
 872        case GREP_NODE_AND:
 873                fprintf(stderr, "(and\n");
 874                dump_grep_expression_1(x->u.binary.left, in+1);
 875                dump_grep_expression_1(x->u.binary.right, in+1);
 876                indent(in);
 877                fprintf(stderr, ")\n");
 878                break;
 879        case GREP_NODE_OR:
 880                fprintf(stderr, "(or\n");
 881                dump_grep_expression_1(x->u.binary.left, in+1);
 882                dump_grep_expression_1(x->u.binary.right, in+1);
 883                indent(in);
 884                fprintf(stderr, ")\n");
 885                break;
 886        }
 887}
 888
 889static void dump_grep_expression(struct grep_opt *opt)
 890{
 891        struct grep_expr *x = opt->pattern_expression;
 892
 893        if (opt->all_match)
 894                fprintf(stderr, "[all-match]\n");
 895        dump_grep_expression_1(x, 0);
 896        fflush(NULL);
 897}
 898
 899static struct grep_expr *grep_true_expr(void)
 900{
 901        struct grep_expr *z = xcalloc(1, sizeof(*z));
 902        z->node = GREP_NODE_TRUE;
 903        return z;
 904}
 905
 906static struct grep_expr *grep_or_expr(struct grep_expr *left, struct grep_expr *right)
 907{
 908        struct grep_expr *z = xcalloc(1, sizeof(*z));
 909        z->node = GREP_NODE_OR;
 910        z->u.binary.left = left;
 911        z->u.binary.right = right;
 912        return z;
 913}
 914
 915static struct grep_expr *prep_header_patterns(struct grep_opt *opt)
 916{
 917        struct grep_pat *p;
 918        struct grep_expr *header_expr;
 919        struct grep_expr *(header_group[GREP_HEADER_FIELD_MAX]);
 920        enum grep_header_field fld;
 921
 922        if (!opt->header_list)
 923                return NULL;
 924
 925        for (p = opt->header_list; p; p = p->next) {
 926                if (p->token != GREP_PATTERN_HEAD)
 927                        BUG("a non-header pattern in grep header list.");
 928                if (p->field < GREP_HEADER_FIELD_MIN ||
 929                    GREP_HEADER_FIELD_MAX <= p->field)
 930                        BUG("unknown header field %d", p->field);
 931                compile_regexp(p, opt);
 932        }
 933
 934        for (fld = 0; fld < GREP_HEADER_FIELD_MAX; fld++)
 935                header_group[fld] = NULL;
 936
 937        for (p = opt->header_list; p; p = p->next) {
 938                struct grep_expr *h;
 939                struct grep_pat *pp = p;
 940
 941                h = compile_pattern_atom(&pp);
 942                if (!h || pp != p->next)
 943                        BUG("malformed header expr");
 944                if (!header_group[p->field]) {
 945                        header_group[p->field] = h;
 946                        continue;
 947                }
 948                header_group[p->field] = grep_or_expr(h, header_group[p->field]);
 949        }
 950
 951        header_expr = NULL;
 952
 953        for (fld = 0; fld < GREP_HEADER_FIELD_MAX; fld++) {
 954                if (!header_group[fld])
 955                        continue;
 956                if (!header_expr)
 957                        header_expr = grep_true_expr();
 958                header_expr = grep_or_expr(header_group[fld], header_expr);
 959        }
 960        return header_expr;
 961}
 962
 963static struct grep_expr *grep_splice_or(struct grep_expr *x, struct grep_expr *y)
 964{
 965        struct grep_expr *z = x;
 966
 967        while (x) {
 968                assert(x->node == GREP_NODE_OR);
 969                if (x->u.binary.right &&
 970                    x->u.binary.right->node == GREP_NODE_TRUE) {
 971                        x->u.binary.right = y;
 972                        break;
 973                }
 974                x = x->u.binary.right;
 975        }
 976        return z;
 977}
 978
 979static void compile_grep_patterns_real(struct grep_opt *opt)
 980{
 981        struct grep_pat *p;
 982        struct grep_expr *header_expr = prep_header_patterns(opt);
 983
 984        for (p = opt->pattern_list; p; p = p->next) {
 985                switch (p->token) {
 986                case GREP_PATTERN: /* atom */
 987                case GREP_PATTERN_HEAD:
 988                case GREP_PATTERN_BODY:
 989                        compile_regexp(p, opt);
 990                        break;
 991                default:
 992                        opt->extended = 1;
 993                        break;
 994                }
 995        }
 996
 997        if (opt->all_match || header_expr)
 998                opt->extended = 1;
 999        else if (!opt->extended && !opt->debug)
1000                return;
1001
1002        p = opt->pattern_list;
1003        if (p)
1004                opt->pattern_expression = compile_pattern_expr(&p);
1005        if (p)
1006                die("incomplete pattern expression: %s", p->pattern);
1007
1008        if (!header_expr)
1009                return;
1010
1011        if (!opt->pattern_expression)
1012                opt->pattern_expression = header_expr;
1013        else if (opt->all_match)
1014                opt->pattern_expression = grep_splice_or(header_expr,
1015                                                         opt->pattern_expression);
1016        else
1017                opt->pattern_expression = grep_or_expr(opt->pattern_expression,
1018                                                       header_expr);
1019        opt->all_match = 1;
1020}
1021
1022void compile_grep_patterns(struct grep_opt *opt)
1023{
1024        compile_grep_patterns_real(opt);
1025        if (opt->debug)
1026                dump_grep_expression(opt);
1027}
1028
1029static void free_pattern_expr(struct grep_expr *x)
1030{
1031        switch (x->node) {
1032        case GREP_NODE_TRUE:
1033        case GREP_NODE_ATOM:
1034                break;
1035        case GREP_NODE_NOT:
1036                free_pattern_expr(x->u.unary);
1037                break;
1038        case GREP_NODE_AND:
1039        case GREP_NODE_OR:
1040                free_pattern_expr(x->u.binary.left);
1041                free_pattern_expr(x->u.binary.right);
1042                break;
1043        }
1044        free(x);
1045}
1046
1047void free_grep_patterns(struct grep_opt *opt)
1048{
1049        struct grep_pat *p, *n;
1050
1051        for (p = opt->pattern_list; p; p = n) {
1052                n = p->next;
1053                switch (p->token) {
1054                case GREP_PATTERN: /* atom */
1055                case GREP_PATTERN_HEAD:
1056                case GREP_PATTERN_BODY:
1057                        if (p->kws)
1058                                kwsfree(p->kws);
1059                        else if (p->pcre1_regexp)
1060                                free_pcre1_regexp(p);
1061                        else if (p->pcre2_pattern)
1062                                free_pcre2_pattern(p);
1063                        else
1064                                regfree(&p->regexp);
1065                        free(p->pattern);
1066                        break;
1067                default:
1068                        break;
1069                }
1070                free(p);
1071        }
1072
1073        if (!opt->extended)
1074                return;
1075        free_pattern_expr(opt->pattern_expression);
1076}
1077
1078static char *end_of_line(char *cp, unsigned long *left)
1079{
1080        unsigned long l = *left;
1081        while (l && *cp != '\n') {
1082                l--;
1083                cp++;
1084        }
1085        *left = l;
1086        return cp;
1087}
1088
1089static int word_char(char ch)
1090{
1091        return isalnum(ch) || ch == '_';
1092}
1093
1094static void output_color(struct grep_opt *opt, const void *data, size_t size,
1095                         const char *color)
1096{
1097        if (want_color(opt->color) && color && color[0]) {
1098                opt->output(opt, color, strlen(color));
1099                opt->output(opt, data, size);
1100                opt->output(opt, GIT_COLOR_RESET, strlen(GIT_COLOR_RESET));
1101        } else
1102                opt->output(opt, data, size);
1103}
1104
1105static void output_sep(struct grep_opt *opt, char sign)
1106{
1107        if (opt->null_following_name)
1108                opt->output(opt, "\0", 1);
1109        else
1110                output_color(opt, &sign, 1, opt->color_sep);
1111}
1112
1113static void show_name(struct grep_opt *opt, const char *name)
1114{
1115        output_color(opt, name, strlen(name), opt->color_filename);
1116        opt->output(opt, opt->null_following_name ? "\0" : "\n", 1);
1117}
1118
1119static int fixmatch(struct grep_pat *p, char *line, char *eol,
1120                    regmatch_t *match)
1121{
1122        struct kwsmatch kwsm;
1123        size_t offset = kwsexec(p->kws, line, eol - line, &kwsm);
1124        if (offset == -1) {
1125                match->rm_so = match->rm_eo = -1;
1126                return REG_NOMATCH;
1127        } else {
1128                match->rm_so = offset;
1129                match->rm_eo = match->rm_so + kwsm.size[0];
1130                return 0;
1131        }
1132}
1133
1134static int patmatch(struct grep_pat *p, char *line, char *eol,
1135                    regmatch_t *match, int eflags)
1136{
1137        int hit;
1138
1139        if (p->fixed)
1140                hit = !fixmatch(p, line, eol, match);
1141        else if (p->pcre1_regexp)
1142                hit = !pcre1match(p, line, eol, match, eflags);
1143        else if (p->pcre2_pattern)
1144                hit = !pcre2match(p, line, eol, match, eflags);
1145        else
1146                hit = !regexec_buf(&p->regexp, line, eol - line, 1, match,
1147                                   eflags);
1148
1149        return hit;
1150}
1151
1152static int strip_timestamp(char *bol, char **eol_p)
1153{
1154        char *eol = *eol_p;
1155        int ch;
1156
1157        while (bol < --eol) {
1158                if (*eol != '>')
1159                        continue;
1160                *eol_p = ++eol;
1161                ch = *eol;
1162                *eol = '\0';
1163                return ch;
1164        }
1165        return 0;
1166}
1167
1168static struct {
1169        const char *field;
1170        size_t len;
1171} header_field[] = {
1172        { "author ", 7 },
1173        { "committer ", 10 },
1174        { "reflog ", 7 },
1175};
1176
1177static int match_one_pattern(struct grep_pat *p, char *bol, char *eol,
1178                             enum grep_context ctx,
1179                             regmatch_t *pmatch, int eflags)
1180{
1181        int hit = 0;
1182        int saved_ch = 0;
1183        const char *start = bol;
1184
1185        if ((p->token != GREP_PATTERN) &&
1186            ((p->token == GREP_PATTERN_HEAD) != (ctx == GREP_CONTEXT_HEAD)))
1187                return 0;
1188
1189        if (p->token == GREP_PATTERN_HEAD) {
1190                const char *field;
1191                size_t len;
1192                assert(p->field < ARRAY_SIZE(header_field));
1193                field = header_field[p->field].field;
1194                len = header_field[p->field].len;
1195                if (strncmp(bol, field, len))
1196                        return 0;
1197                bol += len;
1198                switch (p->field) {
1199                case GREP_HEADER_AUTHOR:
1200                case GREP_HEADER_COMMITTER:
1201                        saved_ch = strip_timestamp(bol, &eol);
1202                        break;
1203                default:
1204                        break;
1205                }
1206        }
1207
1208 again:
1209        hit = patmatch(p, bol, eol, pmatch, eflags);
1210
1211        if (hit && p->word_regexp) {
1212                if ((pmatch[0].rm_so < 0) ||
1213                    (eol - bol) < pmatch[0].rm_so ||
1214                    (pmatch[0].rm_eo < 0) ||
1215                    (eol - bol) < pmatch[0].rm_eo)
1216                        die("regexp returned nonsense");
1217
1218                /* Match beginning must be either beginning of the
1219                 * line, or at word boundary (i.e. the last char must
1220                 * not be a word char).  Similarly, match end must be
1221                 * either end of the line, or at word boundary
1222                 * (i.e. the next char must not be a word char).
1223                 */
1224                if ( ((pmatch[0].rm_so == 0) ||
1225                      !word_char(bol[pmatch[0].rm_so-1])) &&
1226                     ((pmatch[0].rm_eo == (eol-bol)) ||
1227                      !word_char(bol[pmatch[0].rm_eo])) )
1228                        ;
1229                else
1230                        hit = 0;
1231
1232                /* Words consist of at least one character. */
1233                if (pmatch->rm_so == pmatch->rm_eo)
1234                        hit = 0;
1235
1236                if (!hit && pmatch[0].rm_so + bol + 1 < eol) {
1237                        /* There could be more than one match on the
1238                         * line, and the first match might not be
1239                         * strict word match.  But later ones could be!
1240                         * Forward to the next possible start, i.e. the
1241                         * next position following a non-word char.
1242                         */
1243                        bol = pmatch[0].rm_so + bol + 1;
1244                        while (word_char(bol[-1]) && bol < eol)
1245                                bol++;
1246                        eflags |= REG_NOTBOL;
1247                        if (bol < eol)
1248                                goto again;
1249                }
1250        }
1251        if (p->token == GREP_PATTERN_HEAD && saved_ch)
1252                *eol = saved_ch;
1253        if (hit) {
1254                pmatch[0].rm_so += bol - start;
1255                pmatch[0].rm_eo += bol - start;
1256        }
1257        return hit;
1258}
1259
1260static int match_expr_eval(struct grep_opt *opt, struct grep_expr *x, char *bol,
1261                           char *eol, enum grep_context ctx, ssize_t *col,
1262                           ssize_t *icol, int collect_hits)
1263{
1264        int h = 0;
1265
1266        if (!x)
1267                die("Not a valid grep expression");
1268        switch (x->node) {
1269        case GREP_NODE_TRUE:
1270                h = 1;
1271                break;
1272        case GREP_NODE_ATOM:
1273                {
1274                        regmatch_t tmp;
1275                        h = match_one_pattern(x->u.atom, bol, eol, ctx,
1276                                              &tmp, 0);
1277                        if (h && (*col < 0 || tmp.rm_so < *col))
1278                                *col = tmp.rm_so;
1279                }
1280                break;
1281        case GREP_NODE_NOT:
1282                /*
1283                 * Upon visiting a GREP_NODE_NOT, col and icol become swapped.
1284                 */
1285                h = !match_expr_eval(opt, x->u.unary, bol, eol, ctx, icol, col,
1286                                     0);
1287                break;
1288        case GREP_NODE_AND:
1289                h = match_expr_eval(opt, x->u.binary.left, bol, eol, ctx, col,
1290                                    icol, 0);
1291                if (h || opt->columnnum) {
1292                        /*
1293                         * Don't short-circuit AND when given --column, since a
1294                         * NOT earlier in the tree may turn this into an OR. In
1295                         * this case, see the below comment.
1296                         */
1297                        h &= match_expr_eval(opt, x->u.binary.right, bol, eol,
1298                                             ctx, col, icol, 0);
1299                }
1300                break;
1301        case GREP_NODE_OR:
1302                if (!(collect_hits || opt->columnnum)) {
1303                        /*
1304                         * Don't short-circuit OR when given --column (or
1305                         * collecting hits) to ensure we don't skip a later
1306                         * child that would produce an earlier match.
1307                         */
1308                        return (match_expr_eval(opt, x->u.binary.left, bol, eol,
1309                                                ctx, col, icol, 0) ||
1310                                match_expr_eval(opt, x->u.binary.right, bol,
1311                                                eol, ctx, col, icol, 0));
1312                }
1313                h = match_expr_eval(opt, x->u.binary.left, bol, eol, ctx, col,
1314                                    icol, 0);
1315                if (collect_hits)
1316                        x->u.binary.left->hit |= h;
1317                h |= match_expr_eval(opt, x->u.binary.right, bol, eol, ctx, col,
1318                                     icol, collect_hits);
1319                break;
1320        default:
1321                die("Unexpected node type (internal error) %d", x->node);
1322        }
1323        if (collect_hits)
1324                x->hit |= h;
1325        return h;
1326}
1327
1328static int match_expr(struct grep_opt *opt, char *bol, char *eol,
1329                      enum grep_context ctx, ssize_t *col,
1330                      ssize_t *icol, int collect_hits)
1331{
1332        struct grep_expr *x = opt->pattern_expression;
1333        return match_expr_eval(opt, x, bol, eol, ctx, col, icol, collect_hits);
1334}
1335
1336static int match_line(struct grep_opt *opt, char *bol, char *eol,
1337                      ssize_t *col, ssize_t *icol,
1338                      enum grep_context ctx, int collect_hits)
1339{
1340        struct grep_pat *p;
1341        int hit = 0;
1342
1343        if (opt->extended)
1344                return match_expr(opt, bol, eol, ctx, col, icol,
1345                                  collect_hits);
1346
1347        /* we do not call with collect_hits without being extended */
1348        for (p = opt->pattern_list; p; p = p->next) {
1349                regmatch_t tmp;
1350                if (match_one_pattern(p, bol, eol, ctx, &tmp, 0)) {
1351                        hit |= 1;
1352                        if (!opt->columnnum) {
1353                                /*
1354                                 * Without --column, any single match on a line
1355                                 * is enough to know that it needs to be
1356                                 * printed. With --column, scan _all_ patterns
1357                                 * to find the earliest.
1358                                 */
1359                                break;
1360                        }
1361                        if (*col < 0 || tmp.rm_so < *col)
1362                                *col = tmp.rm_so;
1363                }
1364        }
1365        return hit;
1366}
1367
1368static int match_next_pattern(struct grep_pat *p, char *bol, char *eol,
1369                              enum grep_context ctx,
1370                              regmatch_t *pmatch, int eflags)
1371{
1372        regmatch_t match;
1373
1374        if (!match_one_pattern(p, bol, eol, ctx, &match, eflags))
1375                return 0;
1376        if (match.rm_so < 0 || match.rm_eo < 0)
1377                return 0;
1378        if (pmatch->rm_so >= 0 && pmatch->rm_eo >= 0) {
1379                if (match.rm_so > pmatch->rm_so)
1380                        return 1;
1381                if (match.rm_so == pmatch->rm_so && match.rm_eo < pmatch->rm_eo)
1382                        return 1;
1383        }
1384        pmatch->rm_so = match.rm_so;
1385        pmatch->rm_eo = match.rm_eo;
1386        return 1;
1387}
1388
1389static int next_match(struct grep_opt *opt, char *bol, char *eol,
1390                      enum grep_context ctx, regmatch_t *pmatch, int eflags)
1391{
1392        struct grep_pat *p;
1393        int hit = 0;
1394
1395        pmatch->rm_so = pmatch->rm_eo = -1;
1396        if (bol < eol) {
1397                for (p = opt->pattern_list; p; p = p->next) {
1398                        switch (p->token) {
1399                        case GREP_PATTERN: /* atom */
1400                        case GREP_PATTERN_HEAD:
1401                        case GREP_PATTERN_BODY:
1402                                hit |= match_next_pattern(p, bol, eol, ctx,
1403                                                          pmatch, eflags);
1404                                break;
1405                        default:
1406                                break;
1407                        }
1408                }
1409        }
1410        return hit;
1411}
1412
1413static void show_line(struct grep_opt *opt, char *bol, char *eol,
1414                      const char *name, unsigned lno, ssize_t cno, char sign)
1415{
1416        int rest = eol - bol;
1417        const char *match_color, *line_color = NULL;
1418
1419        if (opt->file_break && opt->last_shown == 0) {
1420                if (opt->show_hunk_mark)
1421                        opt->output(opt, "\n", 1);
1422        } else if (opt->pre_context || opt->post_context || opt->funcbody) {
1423                if (opt->last_shown == 0) {
1424                        if (opt->show_hunk_mark) {
1425                                output_color(opt, "--", 2, opt->color_sep);
1426                                opt->output(opt, "\n", 1);
1427                        }
1428                } else if (lno > opt->last_shown + 1) {
1429                        output_color(opt, "--", 2, opt->color_sep);
1430                        opt->output(opt, "\n", 1);
1431                }
1432        }
1433        if (opt->heading && opt->last_shown == 0) {
1434                output_color(opt, name, strlen(name), opt->color_filename);
1435                opt->output(opt, "\n", 1);
1436        }
1437        opt->last_shown = lno;
1438
1439        if (!opt->heading && opt->pathname) {
1440                output_color(opt, name, strlen(name), opt->color_filename);
1441                output_sep(opt, sign);
1442        }
1443        if (opt->linenum) {
1444                char buf[32];
1445                xsnprintf(buf, sizeof(buf), "%d", lno);
1446                output_color(opt, buf, strlen(buf), opt->color_lineno);
1447                output_sep(opt, sign);
1448        }
1449        /*
1450         * Treat 'cno' as the 1-indexed offset from the start of a non-context
1451         * line to its first match. Otherwise, 'cno' is 0 indicating that we are
1452         * being called with a context line.
1453         */
1454        if (opt->columnnum && cno) {
1455                char buf[32];
1456                xsnprintf(buf, sizeof(buf), "%"PRIuMAX, (uintmax_t)cno);
1457                output_color(opt, buf, strlen(buf), opt->color_columnno);
1458                output_sep(opt, sign);
1459        }
1460        if (opt->color) {
1461                regmatch_t match;
1462                enum grep_context ctx = GREP_CONTEXT_BODY;
1463                int ch = *eol;
1464                int eflags = 0;
1465
1466                if (sign == ':')
1467                        match_color = opt->color_match_selected;
1468                else
1469                        match_color = opt->color_match_context;
1470                if (sign == ':')
1471                        line_color = opt->color_selected;
1472                else if (sign == '-')
1473                        line_color = opt->color_context;
1474                else if (sign == '=')
1475                        line_color = opt->color_function;
1476                *eol = '\0';
1477                while (next_match(opt, bol, eol, ctx, &match, eflags)) {
1478                        if (match.rm_so == match.rm_eo)
1479                                break;
1480
1481                        output_color(opt, bol, match.rm_so, line_color);
1482                        output_color(opt, bol + match.rm_so,
1483                                     match.rm_eo - match.rm_so, match_color);
1484                        bol += match.rm_eo;
1485                        rest -= match.rm_eo;
1486                        eflags = REG_NOTBOL;
1487                }
1488                *eol = ch;
1489        }
1490        output_color(opt, bol, rest, line_color);
1491        opt->output(opt, "\n", 1);
1492}
1493
1494#ifndef NO_PTHREADS
1495int grep_use_locks;
1496
1497/*
1498 * This lock protects access to the gitattributes machinery, which is
1499 * not thread-safe.
1500 */
1501pthread_mutex_t grep_attr_mutex;
1502
1503static inline void grep_attr_lock(void)
1504{
1505        if (grep_use_locks)
1506                pthread_mutex_lock(&grep_attr_mutex);
1507}
1508
1509static inline void grep_attr_unlock(void)
1510{
1511        if (grep_use_locks)
1512                pthread_mutex_unlock(&grep_attr_mutex);
1513}
1514
1515/*
1516 * Same as git_attr_mutex, but protecting the thread-unsafe object db access.
1517 */
1518pthread_mutex_t grep_read_mutex;
1519
1520#else
1521#define grep_attr_lock()
1522#define grep_attr_unlock()
1523#endif
1524
1525static int match_funcname(struct grep_opt *opt, struct grep_source *gs, char *bol, char *eol)
1526{
1527        xdemitconf_t *xecfg = opt->priv;
1528        if (xecfg && !xecfg->find_func) {
1529                grep_source_load_driver(gs);
1530                if (gs->driver->funcname.pattern) {
1531                        const struct userdiff_funcname *pe = &gs->driver->funcname;
1532                        xdiff_set_find_func(xecfg, pe->pattern, pe->cflags);
1533                } else {
1534                        xecfg = opt->priv = NULL;
1535                }
1536        }
1537
1538        if (xecfg) {
1539                char buf[1];
1540                return xecfg->find_func(bol, eol - bol, buf, 1,
1541                                        xecfg->find_func_priv) >= 0;
1542        }
1543
1544        if (bol == eol)
1545                return 0;
1546        if (isalpha(*bol) || *bol == '_' || *bol == '$')
1547                return 1;
1548        return 0;
1549}
1550
1551static void show_funcname_line(struct grep_opt *opt, struct grep_source *gs,
1552                               char *bol, unsigned lno)
1553{
1554        while (bol > gs->buf) {
1555                char *eol = --bol;
1556
1557                while (bol > gs->buf && bol[-1] != '\n')
1558                        bol--;
1559                lno--;
1560
1561                if (lno <= opt->last_shown)
1562                        break;
1563
1564                if (match_funcname(opt, gs, bol, eol)) {
1565                        show_line(opt, bol, eol, gs->name, lno, 0, '=');
1566                        break;
1567                }
1568        }
1569}
1570
1571static int is_empty_line(const char *bol, const char *eol);
1572
1573static void show_pre_context(struct grep_opt *opt, struct grep_source *gs,
1574                             char *bol, char *end, unsigned lno)
1575{
1576        unsigned cur = lno, from = 1, funcname_lno = 0, orig_from;
1577        int funcname_needed = !!opt->funcname, comment_needed = 0;
1578
1579        if (opt->pre_context < lno)
1580                from = lno - opt->pre_context;
1581        if (from <= opt->last_shown)
1582                from = opt->last_shown + 1;
1583        orig_from = from;
1584        if (opt->funcbody) {
1585                if (match_funcname(opt, gs, bol, end))
1586                        comment_needed = 1;
1587                else
1588                        funcname_needed = 1;
1589                from = opt->last_shown + 1;
1590        }
1591
1592        /* Rewind. */
1593        while (bol > gs->buf && cur > from) {
1594                char *next_bol = bol;
1595                char *eol = --bol;
1596
1597                while (bol > gs->buf && bol[-1] != '\n')
1598                        bol--;
1599                cur--;
1600                if (comment_needed && (is_empty_line(bol, eol) ||
1601                                       match_funcname(opt, gs, bol, eol))) {
1602                        comment_needed = 0;
1603                        from = orig_from;
1604                        if (cur < from) {
1605                                cur++;
1606                                bol = next_bol;
1607                                break;
1608                        }
1609                }
1610                if (funcname_needed && match_funcname(opt, gs, bol, eol)) {
1611                        funcname_lno = cur;
1612                        funcname_needed = 0;
1613                        if (opt->funcbody)
1614                                comment_needed = 1;
1615                        else
1616                                from = orig_from;
1617                }
1618        }
1619
1620        /* We need to look even further back to find a function signature. */
1621        if (opt->funcname && funcname_needed)
1622                show_funcname_line(opt, gs, bol, cur);
1623
1624        /* Back forward. */
1625        while (cur < lno) {
1626                char *eol = bol, sign = (cur == funcname_lno) ? '=' : '-';
1627
1628                while (*eol != '\n')
1629                        eol++;
1630                show_line(opt, bol, eol, gs->name, cur, 0, sign);
1631                bol = eol + 1;
1632                cur++;
1633        }
1634}
1635
1636static int should_lookahead(struct grep_opt *opt)
1637{
1638        struct grep_pat *p;
1639
1640        if (opt->extended)
1641                return 0; /* punt for too complex stuff */
1642        if (opt->invert)
1643                return 0;
1644        for (p = opt->pattern_list; p; p = p->next) {
1645                if (p->token != GREP_PATTERN)
1646                        return 0; /* punt for "header only" and stuff */
1647        }
1648        return 1;
1649}
1650
1651static int look_ahead(struct grep_opt *opt,
1652                      unsigned long *left_p,
1653                      unsigned *lno_p,
1654                      char **bol_p)
1655{
1656        unsigned lno = *lno_p;
1657        char *bol = *bol_p;
1658        struct grep_pat *p;
1659        char *sp, *last_bol;
1660        regoff_t earliest = -1;
1661
1662        for (p = opt->pattern_list; p; p = p->next) {
1663                int hit;
1664                regmatch_t m;
1665
1666                hit = patmatch(p, bol, bol + *left_p, &m, 0);
1667                if (!hit || m.rm_so < 0 || m.rm_eo < 0)
1668                        continue;
1669                if (earliest < 0 || m.rm_so < earliest)
1670                        earliest = m.rm_so;
1671        }
1672
1673        if (earliest < 0) {
1674                *bol_p = bol + *left_p;
1675                *left_p = 0;
1676                return 1;
1677        }
1678        for (sp = bol + earliest; bol < sp && sp[-1] != '\n'; sp--)
1679                ; /* find the beginning of the line */
1680        last_bol = sp;
1681
1682        for (sp = bol; sp < last_bol; sp++) {
1683                if (*sp == '\n')
1684                        lno++;
1685        }
1686        *left_p -= last_bol - bol;
1687        *bol_p = last_bol;
1688        *lno_p = lno;
1689        return 0;
1690}
1691
1692static int fill_textconv_grep(struct userdiff_driver *driver,
1693                              struct grep_source *gs)
1694{
1695        struct diff_filespec *df;
1696        char *buf;
1697        size_t size;
1698
1699        if (!driver || !driver->textconv)
1700                return grep_source_load(gs);
1701
1702        /*
1703         * The textconv interface is intimately tied to diff_filespecs, so we
1704         * have to pretend to be one. If we could unify the grep_source
1705         * and diff_filespec structs, this mess could just go away.
1706         */
1707        df = alloc_filespec(gs->path);
1708        switch (gs->type) {
1709        case GREP_SOURCE_OID:
1710                fill_filespec(df, gs->identifier, 1, 0100644);
1711                break;
1712        case GREP_SOURCE_FILE:
1713                fill_filespec(df, &null_oid, 0, 0100644);
1714                break;
1715        default:
1716                BUG("attempt to textconv something without a path?");
1717        }
1718
1719        /*
1720         * fill_textconv is not remotely thread-safe; it may load objects
1721         * behind the scenes, and it modifies the global diff tempfile
1722         * structure.
1723         */
1724        grep_read_lock();
1725        size = fill_textconv(driver, df, &buf);
1726        grep_read_unlock();
1727        free_filespec(df);
1728
1729        /*
1730         * The normal fill_textconv usage by the diff machinery would just keep
1731         * the textconv'd buf separate from the diff_filespec. But much of the
1732         * grep code passes around a grep_source and assumes that its "buf"
1733         * pointer is the beginning of the thing we are searching. So let's
1734         * install our textconv'd version into the grep_source, taking care not
1735         * to leak any existing buffer.
1736         */
1737        grep_source_clear_data(gs);
1738        gs->buf = buf;
1739        gs->size = size;
1740
1741        return 0;
1742}
1743
1744static int is_empty_line(const char *bol, const char *eol)
1745{
1746        while (bol < eol && isspace(*bol))
1747                bol++;
1748        return bol == eol;
1749}
1750
1751static int grep_source_1(struct grep_opt *opt, struct grep_source *gs, int collect_hits)
1752{
1753        char *bol;
1754        char *peek_bol = NULL;
1755        unsigned long left;
1756        unsigned lno = 1;
1757        unsigned last_hit = 0;
1758        int binary_match_only = 0;
1759        unsigned count = 0;
1760        int try_lookahead = 0;
1761        int show_function = 0;
1762        struct userdiff_driver *textconv = NULL;
1763        enum grep_context ctx = GREP_CONTEXT_HEAD;
1764        xdemitconf_t xecfg;
1765
1766        if (!opt->output)
1767                opt->output = std_output;
1768
1769        if (opt->pre_context || opt->post_context || opt->file_break ||
1770            opt->funcbody) {
1771                /* Show hunk marks, except for the first file. */
1772                if (opt->last_shown)
1773                        opt->show_hunk_mark = 1;
1774                /*
1775                 * If we're using threads then we can't easily identify
1776                 * the first file.  Always put hunk marks in that case
1777                 * and skip the very first one later in work_done().
1778                 */
1779                if (opt->output != std_output)
1780                        opt->show_hunk_mark = 1;
1781        }
1782        opt->last_shown = 0;
1783
1784        if (opt->allow_textconv) {
1785                grep_source_load_driver(gs);
1786                /*
1787                 * We might set up the shared textconv cache data here, which
1788                 * is not thread-safe.
1789                 */
1790                grep_attr_lock();
1791                textconv = userdiff_get_textconv(gs->driver);
1792                grep_attr_unlock();
1793        }
1794
1795        /*
1796         * We know the result of a textconv is text, so we only have to care
1797         * about binary handling if we are not using it.
1798         */
1799        if (!textconv) {
1800                switch (opt->binary) {
1801                case GREP_BINARY_DEFAULT:
1802                        if (grep_source_is_binary(gs))
1803                                binary_match_only = 1;
1804                        break;
1805                case GREP_BINARY_NOMATCH:
1806                        if (grep_source_is_binary(gs))
1807                                return 0; /* Assume unmatch */
1808                        break;
1809                case GREP_BINARY_TEXT:
1810                        break;
1811                default:
1812                        BUG("unknown binary handling mode");
1813                }
1814        }
1815
1816        memset(&xecfg, 0, sizeof(xecfg));
1817        opt->priv = &xecfg;
1818
1819        try_lookahead = should_lookahead(opt);
1820
1821        if (fill_textconv_grep(textconv, gs) < 0)
1822                return 0;
1823
1824        bol = gs->buf;
1825        left = gs->size;
1826        while (left) {
1827                char *eol, ch;
1828                int hit;
1829                ssize_t cno;
1830                ssize_t col = -1, icol = -1;
1831
1832                /*
1833                 * look_ahead() skips quickly to the line that possibly
1834                 * has the next hit; don't call it if we need to do
1835                 * something more than just skipping the current line
1836                 * in response to an unmatch for the current line.  E.g.
1837                 * inside a post-context window, we will show the current
1838                 * line as a context around the previous hit when it
1839                 * doesn't hit.
1840                 */
1841                if (try_lookahead
1842                    && !(last_hit
1843                         && (show_function ||
1844                             lno <= last_hit + opt->post_context))
1845                    && look_ahead(opt, &left, &lno, &bol))
1846                        break;
1847                eol = end_of_line(bol, &left);
1848                ch = *eol;
1849                *eol = 0;
1850
1851                if ((ctx == GREP_CONTEXT_HEAD) && (eol == bol))
1852                        ctx = GREP_CONTEXT_BODY;
1853
1854                hit = match_line(opt, bol, eol, &col, &icol, ctx, collect_hits);
1855                *eol = ch;
1856
1857                if (collect_hits)
1858                        goto next_line;
1859
1860                /* "grep -v -e foo -e bla" should list lines
1861                 * that do not have either, so inversion should
1862                 * be done outside.
1863                 */
1864                if (opt->invert)
1865                        hit = !hit;
1866                if (opt->unmatch_name_only) {
1867                        if (hit)
1868                                return 0;
1869                        goto next_line;
1870                }
1871                if (hit) {
1872                        count++;
1873                        if (opt->status_only)
1874                                return 1;
1875                        if (opt->name_only) {
1876                                show_name(opt, gs->name);
1877                                return 1;
1878                        }
1879                        if (opt->count)
1880                                goto next_line;
1881                        if (binary_match_only) {
1882                                opt->output(opt, "Binary file ", 12);
1883                                output_color(opt, gs->name, strlen(gs->name),
1884                                             opt->color_filename);
1885                                opt->output(opt, " matches\n", 9);
1886                                return 1;
1887                        }
1888                        /* Hit at this line.  If we haven't shown the
1889                         * pre-context lines, we would need to show them.
1890                         */
1891                        if (opt->pre_context || opt->funcbody)
1892                                show_pre_context(opt, gs, bol, eol, lno);
1893                        else if (opt->funcname)
1894                                show_funcname_line(opt, gs, bol, lno);
1895                        cno = opt->invert ? icol : col;
1896                        if (cno < 0) {
1897                                /*
1898                                 * A negative cno indicates that there was no
1899                                 * match on the line. We are thus inverted and
1900                                 * being asked to show all lines that _don't_
1901                                 * match a given expression. Therefore, set cno
1902                                 * to 0 to suggest the whole line matches.
1903                                 */
1904                                cno = 0;
1905                        }
1906                        show_line(opt, bol, eol, gs->name, lno, cno + 1, ':');
1907                        last_hit = lno;
1908                        if (opt->funcbody)
1909                                show_function = 1;
1910                        goto next_line;
1911                }
1912                if (show_function && (!peek_bol || peek_bol < bol)) {
1913                        unsigned long peek_left = left;
1914                        char *peek_eol = eol;
1915
1916                        /*
1917                         * Trailing empty lines are not interesting.
1918                         * Peek past them to see if they belong to the
1919                         * body of the current function.
1920                         */
1921                        peek_bol = bol;
1922                        while (is_empty_line(peek_bol, peek_eol)) {
1923                                peek_bol = peek_eol + 1;
1924                                peek_eol = end_of_line(peek_bol, &peek_left);
1925                        }
1926
1927                        if (match_funcname(opt, gs, peek_bol, peek_eol))
1928                                show_function = 0;
1929                }
1930                if (show_function ||
1931                    (last_hit && lno <= last_hit + opt->post_context)) {
1932                        /* If the last hit is within the post context,
1933                         * we need to show this line.
1934                         */
1935                        show_line(opt, bol, eol, gs->name, lno, col + 1, '-');
1936                }
1937
1938        next_line:
1939                bol = eol + 1;
1940                if (!left)
1941                        break;
1942                left--;
1943                lno++;
1944        }
1945
1946        if (collect_hits)
1947                return 0;
1948
1949        if (opt->status_only)
1950                return opt->unmatch_name_only;
1951        if (opt->unmatch_name_only) {
1952                /* We did not see any hit, so we want to show this */
1953                show_name(opt, gs->name);
1954                return 1;
1955        }
1956
1957        xdiff_clear_find_func(&xecfg);
1958        opt->priv = NULL;
1959
1960        /* NEEDSWORK:
1961         * The real "grep -c foo *.c" gives many "bar.c:0" lines,
1962         * which feels mostly useless but sometimes useful.  Maybe
1963         * make it another option?  For now suppress them.
1964         */
1965        if (opt->count && count) {
1966                char buf[32];
1967                if (opt->pathname) {
1968                        output_color(opt, gs->name, strlen(gs->name),
1969                                     opt->color_filename);
1970                        output_sep(opt, ':');
1971                }
1972                xsnprintf(buf, sizeof(buf), "%u\n", count);
1973                opt->output(opt, buf, strlen(buf));
1974                return 1;
1975        }
1976        return !!last_hit;
1977}
1978
1979static void clr_hit_marker(struct grep_expr *x)
1980{
1981        /* All-hit markers are meaningful only at the very top level
1982         * OR node.
1983         */
1984        while (1) {
1985                x->hit = 0;
1986                if (x->node != GREP_NODE_OR)
1987                        return;
1988                x->u.binary.left->hit = 0;
1989                x = x->u.binary.right;
1990        }
1991}
1992
1993static int chk_hit_marker(struct grep_expr *x)
1994{
1995        /* Top level nodes have hit markers.  See if they all are hits */
1996        while (1) {
1997                if (x->node != GREP_NODE_OR)
1998                        return x->hit;
1999                if (!x->u.binary.left->hit)
2000                        return 0;
2001                x = x->u.binary.right;
2002        }
2003}
2004
2005int grep_source(struct grep_opt *opt, struct grep_source *gs)
2006{
2007        /*
2008         * we do not have to do the two-pass grep when we do not check
2009         * buffer-wide "all-match".
2010         */
2011        if (!opt->all_match)
2012                return grep_source_1(opt, gs, 0);
2013
2014        /* Otherwise the toplevel "or" terms hit a bit differently.
2015         * We first clear hit markers from them.
2016         */
2017        clr_hit_marker(opt->pattern_expression);
2018        grep_source_1(opt, gs, 1);
2019
2020        if (!chk_hit_marker(opt->pattern_expression))
2021                return 0;
2022
2023        return grep_source_1(opt, gs, 0);
2024}
2025
2026int grep_buffer(struct grep_opt *opt, char *buf, unsigned long size)
2027{
2028        struct grep_source gs;
2029        int r;
2030
2031        grep_source_init(&gs, GREP_SOURCE_BUF, NULL, NULL, NULL);
2032        gs.buf = buf;
2033        gs.size = size;
2034
2035        r = grep_source(opt, &gs);
2036
2037        grep_source_clear(&gs);
2038        return r;
2039}
2040
2041void grep_source_init(struct grep_source *gs, enum grep_source_type type,
2042                      const char *name, const char *path,
2043                      const void *identifier)
2044{
2045        gs->type = type;
2046        gs->name = xstrdup_or_null(name);
2047        gs->path = xstrdup_or_null(path);
2048        gs->buf = NULL;
2049        gs->size = 0;
2050        gs->driver = NULL;
2051
2052        switch (type) {
2053        case GREP_SOURCE_FILE:
2054                gs->identifier = xstrdup(identifier);
2055                break;
2056        case GREP_SOURCE_OID:
2057                gs->identifier = oiddup(identifier);
2058                break;
2059        case GREP_SOURCE_BUF:
2060                gs->identifier = NULL;
2061                break;
2062        }
2063}
2064
2065void grep_source_clear(struct grep_source *gs)
2066{
2067        FREE_AND_NULL(gs->name);
2068        FREE_AND_NULL(gs->path);
2069        FREE_AND_NULL(gs->identifier);
2070        grep_source_clear_data(gs);
2071}
2072
2073void grep_source_clear_data(struct grep_source *gs)
2074{
2075        switch (gs->type) {
2076        case GREP_SOURCE_FILE:
2077        case GREP_SOURCE_OID:
2078                FREE_AND_NULL(gs->buf);
2079                gs->size = 0;
2080                break;
2081        case GREP_SOURCE_BUF:
2082                /* leave user-provided buf intact */
2083                break;
2084        }
2085}
2086
2087static int grep_source_load_oid(struct grep_source *gs)
2088{
2089        enum object_type type;
2090
2091        grep_read_lock();
2092        gs->buf = read_object_file(gs->identifier, &type, &gs->size);
2093        grep_read_unlock();
2094
2095        if (!gs->buf)
2096                return error(_("'%s': unable to read %s"),
2097                             gs->name,
2098                             oid_to_hex(gs->identifier));
2099        return 0;
2100}
2101
2102static int grep_source_load_file(struct grep_source *gs)
2103{
2104        const char *filename = gs->identifier;
2105        struct stat st;
2106        char *data;
2107        size_t size;
2108        int i;
2109
2110        if (lstat(filename, &st) < 0) {
2111        err_ret:
2112                if (errno != ENOENT)
2113                        error_errno(_("failed to stat '%s'"), filename);
2114                return -1;
2115        }
2116        if (!S_ISREG(st.st_mode))
2117                return -1;
2118        size = xsize_t(st.st_size);
2119        i = open(filename, O_RDONLY);
2120        if (i < 0)
2121                goto err_ret;
2122        data = xmallocz(size);
2123        if (st.st_size != read_in_full(i, data, size)) {
2124                error_errno(_("'%s': short read"), filename);
2125                close(i);
2126                free(data);
2127                return -1;
2128        }
2129        close(i);
2130
2131        gs->buf = data;
2132        gs->size = size;
2133        return 0;
2134}
2135
2136static int grep_source_load(struct grep_source *gs)
2137{
2138        if (gs->buf)
2139                return 0;
2140
2141        switch (gs->type) {
2142        case GREP_SOURCE_FILE:
2143                return grep_source_load_file(gs);
2144        case GREP_SOURCE_OID:
2145                return grep_source_load_oid(gs);
2146        case GREP_SOURCE_BUF:
2147                return gs->buf ? 0 : -1;
2148        }
2149        BUG("invalid grep_source type to load");
2150}
2151
2152void grep_source_load_driver(struct grep_source *gs)
2153{
2154        if (gs->driver)
2155                return;
2156
2157        grep_attr_lock();
2158        if (gs->path)
2159                gs->driver = userdiff_find_by_path(gs->path);
2160        if (!gs->driver)
2161                gs->driver = userdiff_find_by_name("default");
2162        grep_attr_unlock();
2163}
2164
2165static int grep_source_is_binary(struct grep_source *gs)
2166{
2167        grep_source_load_driver(gs);
2168        if (gs->driver->binary != -1)
2169                return gs->driver->binary;
2170
2171        if (!grep_source_load(gs))
2172                return buffer_is_binary(gs->buf, gs->size);
2173
2174        return 0;
2175}