pretty: two phase conversion for non utf-8 commits
[gitweb.git] / pretty.c
index d3a82d22d398ae7234f8917f4d8b0770ba4d0c47..59472750ada226df15ac90092723b044f4eece6f 100644 (file)
--- a/pretty.c
+++ b/pretty.c
@@ -594,6 +594,7 @@ static char *replace_encoding_header(char *buf, const char *encoding)
 }
 
 char *logmsg_reencode(const struct commit *commit,
+                     char **commit_encoding,
                      const char *output_encoding)
 {
        static const char *utf8 = "UTF-8";
@@ -615,9 +616,15 @@ char *logmsg_reencode(const struct commit *commit,
                            sha1_to_hex(commit->object.sha1), typename(type));
        }
 
-       if (!output_encoding || !*output_encoding)
+       if (!output_encoding || !*output_encoding) {
+               if (commit_encoding)
+                       *commit_encoding =
+                               get_header(commit, msg, "encoding");
                return msg;
+       }
        encoding = get_header(commit, msg, "encoding");
+       if (commit_encoding)
+               *commit_encoding = encoding;
        use_encoding = encoding ? encoding : utf8;
        if (same_encoding(use_encoding, output_encoding)) {
                /*
@@ -658,7 +665,8 @@ char *logmsg_reencode(const struct commit *commit,
        if (out)
                out = replace_encoding_header(out, output_encoding);
 
-       free(encoding);
+       if (!commit_encoding)
+               free(encoding);
        /*
         * If the re-encoding failed, out might be NULL here; in that
         * case we just return the commit message verbatim.
@@ -768,12 +776,12 @@ struct format_commit_context {
        unsigned commit_message_parsed:1;
        struct signature_check signature_check;
        char *message;
+       char *commit_encoding;
        size_t width, indent1, indent2;
 
        /* These offsets are relative to the start of the commit message. */
        struct chunk author;
        struct chunk committer;
-       struct chunk encoding;
        size_t message_off;
        size_t subject_off;
        size_t body_off;
@@ -820,9 +828,6 @@ static void parse_commit_header(struct format_commit_context *context)
                } else if (!prefixcmp(msg + i, "committer ")) {
                        context->committer.off = i + 10;
                        context->committer.len = eol - i - 10;
-               } else if (!prefixcmp(msg + i, "encoding ")) {
-                       context->encoding.off = i + 9;
-                       context->encoding.len = eol - i - 9;
                }
                i = eol;
        }
@@ -903,23 +908,6 @@ static void parse_commit_message(struct format_commit_context *c)
        c->commit_message_parsed = 1;
 }
 
-static void format_decoration(struct strbuf *sb, const struct commit *commit)
-{
-       struct name_decoration *d;
-       const char *prefix = " (";
-
-       load_ref_decorations(DECORATE_SHORT_REFS);
-       d = lookup_decoration(&name_decoration, &commit->object);
-       while (d) {
-               strbuf_addstr(sb, prefix);
-               prefix = ", ";
-               strbuf_addstr(sb, d->name);
-               d = d->next;
-       }
-       if (prefix[0] == ',')
-               strbuf_addch(sb, ')');
-}
-
 static void strbuf_wrap(struct strbuf *sb, size_t pos,
                        size_t width, size_t indent1, size_t indent2)
 {
@@ -966,7 +954,8 @@ static int format_reflog_person(struct strbuf *sb,
        return format_person_part(sb, part, ident, strlen(ident), dmode);
 }
 
-static size_t format_commit_one(struct strbuf *sb, const char *placeholder,
+static size_t format_commit_one(struct strbuf *sb, /* in UTF-8 */
+                               const char *placeholder,
                                void *context)
 {
        struct format_commit_context *c = context;
@@ -1098,7 +1087,8 @@ static size_t format_commit_one(struct strbuf *sb, const char *placeholder,
                strbuf_addstr(sb, get_revision_mark(NULL, commit));
                return 1;
        case 'd':
-               format_decoration(sb, commit);
+               load_ref_decorations(DECORATE_SHORT_REFS);
+               format_decorations(sb, commit, 0);
                return 1;
        case 'g':               /* reflog info */
                switch(placeholder[1]) {
@@ -1177,7 +1167,8 @@ static size_t format_commit_one(struct strbuf *sb, const char *placeholder,
                                   msg + c->committer.off, c->committer.len,
                                   c->pretty_ctx->date_mode);
        case 'e':       /* encoding */
-               strbuf_add(sb, msg + c->encoding.off, c->encoding.len);
+               if (c->commit_encoding)
+                       strbuf_addstr(sb, c->commit_encoding);
                return 1;
        case 'B':       /* raw body */
                /* message_off is always left at the initial newline */
@@ -1203,7 +1194,8 @@ static size_t format_commit_one(struct strbuf *sb, const char *placeholder,
        return 0;       /* unknown placeholder */
 }
 
-static size_t format_commit_item(struct strbuf *sb, const char *placeholder,
+static size_t format_commit_item(struct strbuf *sb, /* in UTF-8 */
+                                const char *placeholder,
                                 void *context)
 {
        int consumed;
@@ -1283,16 +1275,37 @@ void format_commit_message(const struct commit *commit,
 {
        struct format_commit_context context;
        const char *output_enc = pretty_ctx->output_encoding;
+       const char *utf8 = "UTF-8";
 
        memset(&context, 0, sizeof(context));
        context.commit = commit;
        context.pretty_ctx = pretty_ctx;
        context.wrap_start = sb->len;
-       context.message = logmsg_reencode(commit, output_enc);
+       context.message = logmsg_reencode(commit,
+                                         &context.commit_encoding,
+                                         output_enc);
 
        strbuf_expand(sb, format, format_commit_item, &context);
        rewrap_message_tail(sb, &context, 0, 0, 0);
 
+       if (output_enc) {
+               if (same_encoding(utf8, output_enc))
+                       output_enc = NULL;
+       } else {
+               if (context.commit_encoding &&
+                   !same_encoding(context.commit_encoding, utf8))
+                       output_enc = context.commit_encoding;
+       }
+
+       if (output_enc) {
+               int outsz;
+               char *out = reencode_string_len(sb->buf, sb->len,
+                                               output_enc, utf8, &outsz);
+               if (out)
+                       strbuf_attach(sb, out, outsz, outsz + 1);
+       }
+
+       free(context.commit_encoding);
        logmsg_free(context.message, commit);
        free(context.signature_check.gpg_output);
        free(context.signature_check.signer);
@@ -1451,7 +1464,7 @@ void pretty_print_commit(const struct pretty_print_context *pp,
        }
 
        encoding = get_log_output_encoding();
-       msg = reencoded = logmsg_reencode(commit, encoding);
+       msg = reencoded = logmsg_reencode(commit, NULL, encoding);
 
        if (pp->fmt == CMIT_FMT_ONELINE || pp->fmt == CMIT_FMT_EMAIL)
                indent = 0;