/* * andsharp -d * * -d specifies decoding. Someday, other options may be specifiable. */ #include #include #include extern const char *__progname; #define MAXCHNAME 64 static int iline; static void printch(const unsigned char *n, int l) { switch (l) { case 2: switch (n[0] | (n[1] * 256)) { case 'l' | ('t' * 256): putchar('<'); return; case 'g' | ('t' * 256): putchar('>'); return; } break; case 3: if (!bcmp(n,"amp",3)) { putchar('&'); return; } break; case 4: if (!bcmp(n,"quot",4)) { putchar('"'); return; } if (!bcmp(n,"nbsp",4)) { /* This theoretically should be ' ', but the way   is actually used, ' ' is a better approximation. */ putchar(' '); return; } break; } fprintf(stderr,"%s: line %d: unknown character name %.*s\n",__progname,iline,l,n); } static void decode(void) { enum { S_IDLE = 1, S_AND, S_SHARP, S_HEX, S_CHNAM } state; unsigned char chname[MAXCHNAME]; int chnl; int ccode; int c; iline = 1; while (1) { c = getchar(); if (c == EOF) break; switch (state) { case S_IDLE: if (c == '&') { state = S_AND; } else { putchar(c); } break; case S_AND: if (c == '#') { state = S_SHARP; ccode = 0; } else { state = S_CHNAM; chname[0] = c; chnl = 1; } break; case S_SHARP: switch (c) { case '0' ... '9': ccode = (ccode * 10) + (c - '0'); if (ccode > 255) { if (ccode <= 2559) /* ccode/10 <= 255 */ { fprintf(stderr,"%s: line %d: out-of-range code\n",__progname,iline); } ccode = 10000; } break; case 'x': /* Slightly wrong; this permits �x.. to be taken as hex. Not troublesome enough to worry about. */ if (ccode == 0) { state = S_HEX; break; } /* fall through */ default: fprintf(stderr,"%s: line %d: invalid character ",__progname,iline); if (((c >= 32) && (c <= 126)) || (c >= 160)) { fprintf(stderr,"`%c'",c); } else { fprintf(stderr,"0x%02x",c); } fprintf(stderr," in code, inserting terminator\n"); ungetc(c,stdin); c = 0; /* fall through */ case ';': if (ccode < 256) putchar(ccode); state = S_IDLE; break; } break; case S_HEX: switch (c) { case '0' ... '9': ccode = (ccode * 16) + (c - '0'); if (0) { case 'a' ... 'f': ccode = (ccode * 16) + (c - 'a') + 10; } if (0) { case 'A' ... 'F': ccode = (ccode * 16) + (c - 'A') + 10; } if (ccode > 255) { if (ccode <= 4095) /* ccode/16 <= 255 */ { fprintf(stderr,"%s: line %d: out-of-range code\n",__progname,iline); } ccode = 10000; } break; default: fprintf(stderr,"%s: line %d: invalid character ",__progname,iline); if (((c >= 32) && (c <= 126)) || (c >= 160)) { fprintf(stderr,"`%c'",c); } else { fprintf(stderr,"0x%02x",c); } fprintf(stderr," in code, inserting terminator\n"); ungetc(c,stdin); c = 0; /* fall through */ case ';': if (ccode < 256) putchar(ccode); state = S_IDLE; break; } break; case S_CHNAM: if (c == ';') { printch(&chname[0],chnl); state = S_IDLE; } else if (chnl < MAXCHNAME) { chname[chnl++] = c; } else { fprintf(stderr,"%s: line %d: character name too long: %.*s\n",__progname,iline,MAXCHNAME,&chname[0]); state = S_IDLE; } break; default: abort(); break; } if (c == '\n') iline ++; } switch (state) { case S_IDLE: break; case S_AND: putchar('&'); break; case S_SHARP: case S_HEX: printf("&#%d",ccode); break; case S_CHNAM: printch(&chname[0],chnl); break; } } int main(int, char **); int main(int ac, char **av) { if (0) { usage:; fprintf(stderr,"Usage: %s -d\n",__progname); exit(1); } if (ac != 2) goto usage; if (!strcmp(av[1],"-d")) decode(); else goto usage; exit(0); }