/* * Reads a unidiff-format diff in stdin. Writes it to stdout, but with * underscore-and-backspace highlighting added to differing lines to * show where they differ. Tabs get expanded to spaces assuming * 8-column tabstops, but this happens very late, after decisions * about which characters to highlight are made but before the * highlighting is actually generated, so the spaces are highlighted * exactly when the corresponding tab should be. * * If stdout is a tty, forks ul(1) to convert highlighting into * something terminal-appropriate (but see below for -DNO_UL). * * Hunk portions do not necessarily have old and new lines matching up * 1-1. At its simplest, this is true of any portion for which the * numbers of old and new lines aren't equal, as in * * -old line 1 * -old line 2 * +new line 1 * +new line 1.5 * +new line 2 * * (in which "new line 1.5" is unmatched), but it can also occur even * when the numbers are equal, as in * * -old line 1 * -this line is one of two very similar lines, one each old and new * -old line 3 * -old line 4 * +new line 1 * +new line 2 * +this line is one of two very similar lines, one each new and old * +new line 4 * * The minimal-change setup in this last case is to have old line 3 * and new line 2 unpaired, with the long lines matching up. Matching * up is decided based on how many edits are needed to turn the old * into the new. Unmatched lines do not have any line content * highlighted, but the line type character - the first character of * the diff-output line - gets highlighted. In this last example, * highlighting occurs as indicated here with ^ markers: * * -old line 1 * ^^^ * -this line is one of two very similar lines, one each old and new * ^^^ ^^^ * -old line 3 * ^ * -old line 4 * ^^^ * +new line 1 * ^^^ * +new line 2 * ^ * +this line is one of two very similar lines, one each new and old * ^^^ ^^^ * +new line 4 * ^^^ * * The algorithms used are relatively expensive. Using this on diffs * containing large rewrites is likely to be slow; it is designed for * use on diffs containing scattered small edits. * * Lines not understood are copied to the output unchanged, in an * attempt to make this tolerant of commentary lines (such as the * "index" lines printed by git diff). * * Build with -DNO_UL to suppress the code to fork ul if !isatty(1). * This may useful for basic use on less-unixy systems. * * This file is in the public domain. */ #include #include #include #ifndef NO_UL #include #include #include #include #include #endif extern const char *__progname; typedef struct strarr STRARR; struct strarr { char **v; int n; int a; } ; static int inhunk; static int hlo; static int hln; static int inbatch; static STRARR old; static STRARR new; static STRARR oldtrail; static STRARR newtrail; static STRARR *trail; #ifndef NO_UL static pid_t ul_kid; #endif static void sa_init(STRARR *sa) { sa->v = 0; sa->a = 0; sa->n = 0; } static void sa_clear(STRARR *sa) { while (sa->n > 0) { sa->n --; free(sa->v[sa->n]); } } static void sa_save(STRARR *sa, const char *s, int l) { char *c; c = malloc(l+1); bcopy(s,c,l); c[l] = '\0'; if (sa->n >= sa->a) sa->v = realloc(sa->v,(sa->a=sa->n+8)*sizeof(*sa->v)); sa->v[sa->n++] = c; } static int chgcost(const char *old, int olen, char *odiff, const char *new, int nlen, char *ndiff) { int *cv[olen]; int cvdata[olen*nlen]; int i; int minc(int o, int n, int setdiff) { if (o < 1) { if (n < 1) { return(0); } else { if (setdiff) memset(ndiff,1,n); return(n); } } else { if (n < 1) { if (setdiff) memset(odiff,1,o); return(o); } else { int v; int v2; v = cv[o-1][n-1]; if ((v >= 0) && !setdiff) return(v); if (old[o-1] == new[n-1]) { v = minc(o-1,n-1,setdiff); if (setdiff) { odiff[o-1] = 0; ndiff[n-1] = 0; } } else { v = minc(o-1,n,0) + 1; v2 = minc(o,n-1,0) + 1; if (v2 < v) { v = v2; if (setdiff) { minc(o,n-1,1); ndiff[n-1] = 1; } } else { if (setdiff) { minc(o-1,n,1); odiff[o-1] = 1; } } } cv[o-1][n-1] = v; return(v); } } } for (i=olen-1;i>=0;i--) cv[i] = &cvdata[i*nlen]; for (i=(olen*nlen)-1;i>=0;i--) cvdata[i] = -1; return(minc(olen,nlen,odiff?1:0)); } static void dump_trail(STRARR *sa) { int i; for (i=0;in;i++) printf("%c\b_%s\n",sa->v[i][0],sa->v[i]+1); } static void endbatch(void) { // printf("batch: nold = %d, nnew = %d\n",old.n,new.n); // for (i=0;i= 0) return(v); if (on < 1) { if (nn < 1) { abort(); } else { v = nlen[nn-1] + search(0,nn-1); k = 1; } } else { if (nn < 1) { v = olen[on-1] + search(on-1,0); k = 2; } else { v = chg(on-1,nn-1) + search(on-1,nn-1); k = 3; if (v > olen[on-1]) { v2 = olen[on-1] + search(on-1,nn); if (v2 < v) { v = v2; k = 2; } } if (v > nlen[nn-1]) { v2 = nlen[nn-1] + search(on,nn-1); if (v2 < v) { v = v2; k = 1; } } } } smc[on][nn] = v; smk[on][nn] = k; return(v); } void dump(int o, int n) { if ((o < 0) || (n < 0)) abort(); switch (smk[o][n]) { case 0: return; break; case 1: dump(o,n-1); printf("+\b_%s\n",new.v[n-1]); if (n == new.n) dump_trail(&newtrail); break; case 2: dump(o-1,n); printf("-\b_%s\n",old.v[o-1]); if (o == old.n) dump_trail(&oldtrail); break; case 3: { char odiff[olen[o-1]]; char ndiff[nlen[n-1]]; void dump_line(char marker, const char *s, int l, const char *d) { int i; int c; putchar(marker); c = 1; for (i=0;i=0;i--) cm[i] = &cmdata[i*new.n]; for (i=(old.n*new.n)-1;i>=0;i--) cmdata[i] = -1; for (i=old.n;i>=0;i--) { smc[i] = &smcdata[i*(new.n+1)]; smk[i] = &smkdata[i*(new.n+1)]; } for (i=((old.n+1)*(new.n+1))-1;i>=0;i--) { smcdata[i] = -1; smkdata[i] = 0; } smc[0][0] = 0; smk[0][0] = 0; for (i=old.n-1;i>=0;i--) { omatch[i] = -1; olen[i] = strlen(old.v[i]); } for (i=new.n-1;i>=0;i--) { nmatch[i] = -1; nlen[i] = strlen(new.v[i]); } search(old.n,new.n); #if 0 { int i; int j; printf(" n\\o "); for (i=0;i<=old.n;i++) printf(" %8d",i); printf("\n"); for (i=0;i<=new.n;i++) { char *s; printf(" %2d ",i); for (j=0;j<=old.n;j++) { if ((i < new.n) && (j < old.n)) { asprintf(&s,"%d/%d/%d",cm[j][i],smc[j][i],smk[j][i]); } else { asprintf(&s,"%d/%d",smc[j][i],smk[j][i]); } printf(" %8s",s); free(s); } printf("\n"); } } #endif dump(old.n,new.n); trail = 0; } } static void gotline(const char *b, int l) { int iso; int isn; int isb; void echo(void) { fwrite(b,1,l,stdout); putchar('\n'); } switch (b[0]) { case ' ': iso = 1; isn = 1; isb = 0; if (0) { case '+': iso = 0; isn = 1; isb = 1; } if (0) { case '-': iso = 1; isn = 0; isb = 1; } if (inhunk) { if (iso) { if (--hlo < 0) { fprintf(stderr,"%s: hunk has more old lines than claimed\n",__progname); exit(1); } } if (isn) { if (--hln < 0) { fprintf(stderr,"%s: hunk has more new lines than claimed\n",__progname); exit(1); } } if (isb) { if (! inbatch) { sa_clear(&old); sa_clear(&new); sa_clear(&oldtrail); sa_clear(&newtrail); inbatch = 1; } if (iso) sa_save(&old,b+1,l-1); if (isn) sa_save(&new,b+1,l-1); if ((iso && isn) || !(iso || isn)) abort(); trail = iso ? &oldtrail : &newtrail; } else { if (inbatch) endbatch(); inbatch = 0; echo(); } } else { echo(); } break; case '\\': if (trail) { sa_save(trail,b,l); break; } /* fall through */ default: if (inhunk) { if (hlo || hln) { fprintf(stderr,"%s: hunk ends with lines left (-%d +%d)\n",__progname,hlo,hln); exit(1); } if (inbatch) endbatch(); inhunk = 0; } echo(); if (b[0] == '@') { int nv; inhunk = 1; inbatch = 0; nv = -1; if ( (sscanf(b,"@@ -%*d,%d +%*d,%d @@%n",&hlo,&hln,&nv) == 2) && (nv > 0) ) { } else if ( (sscanf(b,"@@ -1 +%*d,%d @@%n",&hln,&nv) == 1) && (nv > 0) ) { hlo = 1; } else if ( (sscanf(b,"@@ -%*d,%d +1 @@%n",&hlo,&nv) == 1) && (nv > 0) ) { hln = 1; } else if ( (sscanf(b,"@@ -1 +1 @@%n",&nv) == 0) && (nv > 0) ) { hlo = 1; hln = 1; } else { fprintf(stderr,"%s: can't parse @ line\n",__progname); exit(1); } } break; } } #ifdef NO_UL static void start_ul(void) { } static void end_ul(void) { } #else static void start_ul(void) { int dp[2]; int xp[2]; pid_t kid; int e; int n; ul_kid = -1; if (! isatty(1)) return; if (pipe(&dp[0]) < 0) { fprintf(stderr,"%s: pipe; %s\n",__progname,strerror(errno)); exit(1); } if (socketpair(AF_LOCAL,SOCK_STREAM,0,&xp[0]) < 0) { fprintf(stderr,"%s: AF_LOCAL socketpair; %s\n",__progname,strerror(errno)); exit(1); } fflush(0); kid = fork(); if (kid < 0) { fprintf(stderr,"%s: fork; %s\n",__progname,strerror(errno)); exit(1); } if (kid > 0) { close(xp[1]); n = recv(xp[0],&e,sizeof(e),MSG_WAITALL); if (n == 0) { close(xp[0]); close(dp[0]); if (dp[1] != 1) { dup2(dp[1],1); close(dp[1]); } ul_kid = kid; return; } if (n == sizeof(e)) { fprintf(stderr,"%s: can't exec ul: %s\n",__progname,strerror(e)); exit(1); } fprintf(stderr,"%s: exec protocol error (got %d not %d)\n",__progname,n,(int)sizeof(e)); exit(1); } if (dp[0] != 0) { dup2(dp[0],0); close(dp[0]); } close(dp[1]); close(xp[0]); fcntl(xp[1],F_SETFD,1); execlp("ul","ul",(char *)0); e = errno; write(xp[1],&e,sizeof(e)); exit(1); } static void end_ul(void) { if (ul_kid > 0) { fflush(0); close(1); wait4(ul_kid,0,0,0); } } #endif int main(void); int main(void) { char *b; int l; int a; int c; void savec(int ch) { if (l >= a) b = realloc(b,a=l+8); b[l++] = ch; } start_ul(); sa_init(&old); sa_init(&new); sa_init(&oldtrail); sa_init(&newtrail); inhunk = 0; b = 0; a = 0; l = 0; while <"reading"> (1) { c = getchar(); switch (c) { case EOF: if (l > 0) { fprintf(stderr,"%s: missing newline supplied at EOF\n",__progname); savec('\0'); gotline(b,l-1); } break <"reading">; break; case '\n': savec('\0'); gotline(b,l-1); l = 0; break; default: savec(c); break; } } if (inhunk && inbatch) endbatch(); end_ul(); exit(0); }