1#include"cache.h" 2#include"attr.h" 3 4/* 5 * convert.c - convert a file when checking it out and checking it in. 6 * 7 * This should use the pathname to decide on whether it wants to do some 8 * more interesting conversions (automatic gzip/unzip, general format 9 * conversions etc etc), but by default it just does automatic CRLF<->LF 10 * translation when the "auto_crlf" option is set. 11 */ 12 13struct text_stat { 14/* CR, LF and CRLF counts */ 15unsigned cr, lf, crlf; 16 17/* These are just approximations! */ 18unsigned printable, nonprintable; 19}; 20 21static voidgather_stats(const char*buf,unsigned long size,struct text_stat *stats) 22{ 23unsigned long i; 24 25memset(stats,0,sizeof(*stats)); 26 27for(i =0; i < size; i++) { 28unsigned char c = buf[i]; 29if(c =='\r') { 30 stats->cr++; 31if(i+1< size && buf[i+1] =='\n') 32 stats->crlf++; 33continue; 34} 35if(c =='\n') { 36 stats->lf++; 37continue; 38} 39if(c ==127) 40/* DEL */ 41 stats->nonprintable++; 42else if(c <32) { 43switch(c) { 44/* BS, HT, ESC and FF */ 45case'\b':case'\t':case'\033':case'\014': 46 stats->printable++; 47break; 48default: 49 stats->nonprintable++; 50} 51} 52else 53 stats->printable++; 54} 55} 56 57/* 58 * The same heuristics as diff.c::mmfile_is_binary() 59 */ 60static intis_binary(unsigned long size,struct text_stat *stats) 61{ 62 63if((stats->printable >>7) < stats->nonprintable) 64return1; 65/* 66 * Other heuristics? Average line length might be relevant, 67 * as might LF vs CR vs CRLF counts.. 68 * 69 * NOTE! It might be normal to have a low ratio of CRLF to LF 70 * (somebody starts with a LF-only file and edits it with an editor 71 * that adds CRLF only to lines that are added..). But do we 72 * want to support CR-only? Probably not. 73 */ 74return0; 75} 76 77static intautocrlf_to_git(const char*path,char**bufp,unsigned long*sizep) 78{ 79char*buffer, *nbuf; 80unsigned long size, nsize; 81struct text_stat stats; 82 83if(!auto_crlf) 84return0; 85 86 size = *sizep; 87if(!size) 88return0; 89 buffer = *bufp; 90 91gather_stats(buffer, size, &stats); 92 93/* No CR? Nothing to convert, regardless. */ 94if(!stats.cr) 95return0; 96 97/* 98 * We're currently not going to even try to convert stuff 99 * that has bare CR characters. Does anybody do that crazy 100 * stuff? 101 */ 102if(stats.cr != stats.crlf) 103return0; 104 105/* 106 * And add some heuristics for binary vs text, of course... 107 */ 108if(is_binary(size, &stats)) 109return0; 110 111/* 112 * Ok, allocate a new buffer, fill it in, and return true 113 * to let the caller know that we switched buffers on it. 114 */ 115 nsize = size - stats.crlf; 116 nbuf =xmalloc(nsize); 117*bufp = nbuf; 118*sizep = nsize; 119do{ 120unsigned char c = *buffer++; 121if(c !='\r') 122*nbuf++ = c; 123}while(--size); 124 125return1; 126} 127 128static intautocrlf_to_working_tree(const char*path,char**bufp,unsigned long*sizep) 129{ 130char*buffer, *nbuf; 131unsigned long size, nsize; 132struct text_stat stats; 133unsigned char last; 134 135/* 136 * FIXME! Other pluggable conversions should go here, 137 * based on filename patterns. Right now we just do the 138 * stupid auto-CRLF one. 139 */ 140if(auto_crlf <=0) 141return0; 142 143 size = *sizep; 144if(!size) 145return0; 146 buffer = *bufp; 147 148gather_stats(buffer, size, &stats); 149 150/* No LF? Nothing to convert, regardless. */ 151if(!stats.lf) 152return0; 153 154/* Was it already in CRLF format? */ 155if(stats.lf == stats.crlf) 156return0; 157 158/* If we have any bare CR characters, we're not going to touch it */ 159if(stats.cr != stats.crlf) 160return0; 161 162if(is_binary(size, &stats)) 163return0; 164 165/* 166 * Ok, allocate a new buffer, fill it in, and return true 167 * to let the caller know that we switched buffers on it. 168 */ 169 nsize = size + stats.lf - stats.crlf; 170 nbuf =xmalloc(nsize); 171*bufp = nbuf; 172*sizep = nsize; 173 last =0; 174do{ 175unsigned char c = *buffer++; 176if(c =='\n'&& last !='\r') 177*nbuf++ ='\r'; 178*nbuf++ = c; 179 last = c; 180}while(--size); 181 182return1; 183} 184 185static voidsetup_crlf_check(struct git_attr_check *check) 186{ 187static struct git_attr *attr_crlf; 188 189if(!attr_crlf) 190 attr_crlf =git_attr("crlf",4); 191 check->attr = attr_crlf; 192} 193 194static intgit_path_is_binary(const char*path) 195{ 196struct git_attr_check attr_crlf_check; 197 198setup_crlf_check(&attr_crlf_check); 199 200/* 201 * If crlf is not mentioned, default to autocrlf; 202 * disable autocrlf only when crlf attribute is explicitly 203 * unset. 204 */ 205return(!git_checkattr(path,1, &attr_crlf_check) && 206(0== attr_crlf_check.isset)); 207} 208 209intconvert_to_git(const char*path,char**bufp,unsigned long*sizep) 210{ 211if(git_path_is_binary(path)) 212return0; 213returnautocrlf_to_git(path, bufp, sizep); 214} 215 216intconvert_to_working_tree(const char*path,char**bufp,unsigned long*sizep) 217{ 218if(git_path_is_binary(path)) 219return0; 220returnautocrlf_to_working_tree(path, bufp, sizep); 221}