/* * CIDR block calculator. * * Takes a set of dotted-quad IP addresses, ranges, or CIDR blocks, on * stdin; at EOF, prints a minimal set of CIDR ranges to stdout. * * Input consists of a stream of dotted-quads, pairs of dotted-quads * separated by a dash, or dotted-quads with /number widths after * them, any of which may have a ! prefix. Dash-separated ranges * refer to all addresses between the two, inclusive; an address with * a /number after it refers to a CIDR-style block. It is not an * error for an address to be specified in the input more than once. * Whitespace may appear anywhere except within a dotted-quad or CIDR * width. A ! prefix is a negator: it removes the negated address or * addresses from the set being built. Characters other than digits, * dots, dashes, slashes, and whitespace are errors. If a number in a * dotted-quad is greater than 255, or a CIDR width is greater than * 32, or other syntax errors occur (such as too many dots without * whitespace, dash, or slash) a complaint is printed and the * dotted-quad, range, or block in which it appears is skipped. * * Default output consists of zero or more lines, each a * dotted-quad/width CIDR net-with-mask, including all and only the * addresses in the input. It will be a minimal set, in that no two * blocks in the output can be collapsed without resorting to * noncontiguous netmasks. No attempt is made to use negation syntax * such as the ! accepted on input; I suspect producing optimal output * under those conditions is NP-hard, and certainly is harder than I * want to get into. * * When run with -bind, output consists of a string of lines for use in * an IP-based DNSBL run by BIND. The argument to BIND is taken as a * CNAME target, and the output consists of lines like * *.44.10 IN CNAME CnameTarget * $GENERATE 128-191 *.$.46.10 CNAME CnameTarget * (the above sample corresponds to 10.44.0.0/16 and 10.46.128.0/18). * The CNAME target string may be anything; if it does not conform to * the syntax, the resulting output will not be valid BIND input. * When run this way, adjacent CIDR blocks are collapsed into ranges * when possible; for example, 10.48.0.0/19 plus 10.48.32.0/20 will * turn into a single $GENERATE using 0-47. * * When run with -list, output consists of a stream of /32s (without * any CIDR width markers). Large netblocks produce lots of output in * this mode. :) * * When run with -!, runs as above except that, effectively, the set of * addresses is complemented after reading all input and before * printing. * * Compile-time options: * * -DNO_PROGNAME * Provide __progname, for systems that don't have it. If * it shows up undefined at link time, try compiling with * this turned on. * * This file is in the public domain. */ #include #include #include extern const char *__progname; #ifdef NO_PROGNAME const char *__progname; int main(int, char **); int main_(int, char **); int main(int ac, char **av) { __progname = av[0]; return(main_()); } #define main main_ #endif /* * The CNAME target for -bind mode. This is a nil pointer otherwise, * and also serves as the flag for -bind mode. */ static const char *bind_target = 0; /* * The flag for -list mode. */ static int list_32s = 0; /* * The flag for -! mode. */ static int printneg = 0; /* * True to generate debugging output. */ static int debug = 0; /* * State for BIND block collapsing. */ static unsigned long int bind_l; static unsigned long int bind_h; static int bind_have; /* * We store the input as a binary tree. Conceptually, the tree is a * fully-populated depth-32 binary tree, with each leaf marked as * either present or absent in the input. Of course, that's a totally * impractical representation. What we actually do is to store that * tree, but whenever a subtree has all its leaves absent, the pointer * that would normally point to it is replaced with a NONE pointer; if * a subtree has all its leaves present, an ALL pointer. (Leaf * pointers are always either NONE or ALL, according as the leaf in * question is absent or present.) * * This could probably be stored more efficiently by allowing a single * C structure to represent multiple levels in the tree when there's * only one non-NONE path down through those levels, but the * additional code complexity isn't worth it. * * Since we don't have to store "up" pointers, we don't, and a node * consists of nothing but two child pointers. We use an array[2] * rather than two separate struct elements because at one place it's * convenient to use a computed index, which would otherwise need to * be a ? : expression. * * The reason for choosing this data structure is that it makes * extracting CIDR netblocks - our primary output - trivial. All we * have to do is collapse every node with two ALL children into an ALL * node itself; when this process can go no farther, an optimal CIDR * set consists of the address/mask values corresponding to the ALL * nodes. (We actually do the collapsing as we build the tree, rather * than deferring it until everything's done.) * * We could use a nil pointer for NONE or ALL, but don't, if only for * error checking. */ typedef struct node NODE; struct node { NODE *sub[2]; } ; /* The root of the tree. */ static NODE *root; /* * The NONE and ALL pointers. I know of no portable way of generating * distinctive NODE * values without actually allocating NODEs for * them to point to, except for the nil pointer. Fortunately, NODEs * are small enough that statically allocating two isn't a big deal. */ static NODE none_node; #define NONE (&none_node) static NODE all_node; #define ALL (&all_node) /* * Free a node and all nodes under it. Useful when we're setting ALL * at a point relatively far up in the tree (which happens if a range * or block subsumes some already-entered individual addresses). */ static void free_tree(NODE *n) { if ((n == NONE) || (n == ALL)) return; free_tree(n->sub[0]); free_tree(n->sub[1]); free(n); } /* * Add or remove an address to/from a node. Conceptually, you pass a * node to this routine. But since it may want to replace the node * with ALL or NONE, it needs to actually be passed an additional * level of pointer, NODE ** instead of NODE *. This has the * convenient property that this routine can also handle replacing * NONE or ALL nodes with real nodes. a is the address being added or * removed. bit says how far down in the tree this node is, or more * accurately how far up; 31 corresponds to the root, 0 to the last * level of internal nodes, and -1 to leaves. end is a value which * describes how large a block is being added; it is -1 to add a * single leaf (a /32), 0 to add a pair of addresses (a /31), etc. * setto is what we want this node to be set to; it should be ALL to * add addresses or NONE to remove them. other is the leaf of the * other polarity to setto (ie, NONE if setto is ALL, and vice versa). * * Algorithm: Recursive. If the node is already as we want it, * everything we want is already done, so do nothing. Otherwise, if * we've reached the level at which we want to operate (bit <= end), * free the subtree if it's not NONE/ALL and replace it with setto, * and we're done. Otherwise, we have to walk down either the 0 link * or the 1 link. If this node is presently NONE or ALL, we have to * create a real node; then we recurse down whichever branch of the * tree corresponds to the appropriate bit of a. After adding, we * check, and if both our subtrees are the same (must be ==setto), we * collapse this node. (If further collapsing is possible at the next * level up, our caller will take care of it.) */ static void set_node(NODE **np, unsigned long int a, int bit, int end, NODE *setto, NODE *other) { NODE *n; n = *np; if (n == setto) return; if (bit <= end) { if (n != other) free_tree(n); *np = setto; return; } if (n == other) { n = malloc(sizeof(NODE)); n->sub[0] = other; n->sub[1] = other; *np = n; } set_node(&n->sub[(a>>bit)&1],a,bit-1,end,setto,other); if ((n->sub[0] == setto) && (n->sub[1] == setto)) { free(n); *np = setto; } } /* * Walk the tree, calling a function for each CIDR block found, * internal version. toskip nodes are no-ops; toprint nodes are CIDR * blocks. Otherwise, we recurse, first down the 0 branch, then the 1 * branch. v is the address-so-far, maintained as part of the * recursive calls. * * The abort() is a can't-happen; it indicates that we have a node * that's not NONE or ALL at the bottom level of the tree, which is * supposed to hold only leaves. */ static void dump_walk_aux(NODE *n, unsigned long int v, int bit, void (*cb)(unsigned long int, int), NODE *toprint, NODE *toskip) { if (n == toskip) return; if (n == toprint) { (*cb)(v,31-bit); return; } if (bit < 0) abort(); dump_walk_aux(n->sub[0],v,bit-1,cb,toprint,toskip); dump_walk_aux(n->sub[1],v|(1<>24,(v>>16)&0xff,(v>>8)&0xff,v&0xff,width); } /* * /32 output. We need to be slightly careful here, in case we want to * print a /0 on a 32-bit machine. */ static void output_32(unsigned long int v, int width) { unsigned long int n; unsigned long int nmax; if (width == 0) { output_32(v,1); v |= 0x80000000UL; width = 1; } nmax = 1UL << (32-width); for (n=0;n>24,(v>>16)&0xff,(v>>8)&0xff,v&0xff); v ++; } } /* * Initialize for BIND output. */ static void init_bind(void) { bind_have = 0; } /* * Generate a line of BIND output. * * There are basically nine kinds of line possible: * * 40.30.20.10 IN CNAME %s * $GENERATE 40-50 $.30.20.10 CNAME %s * *.30.20.10 IN CNAME %s * $GENERATE 30-40 *.$.20.10 CNAME %s * *.20.10 IN CNAME %s * $GENERATE 20-30 *.$.10 CNAME %s * *.10 IN CNAME %s * $GENERATE 10-20 *.$ CNAME %s * * Our caller is responsible for ensuring that the arguments fit one of * those patterns. */ static void gen_bind_line(unsigned long int l, unsigned long int h) { int n; int gen; int tabto; if (debug) { printf("; gen_bind_line %lu.%lu.%lu.%lu - %lu.%lu.%lu.%lu\n", l>>24, (l>>16)&0xff, (l>>8)&0xff, l&0xff, h>>24, (h>>16)&0xff, (h>>8)&0xff, h&0xff ); } if (h == l) { n = printf("%lu.%lu.%lu.%lu",l&0xff,(l>>8)&0xff,(l>>16)&0xff,l>>24); gen = 0; } else if (((l & 0xff) == 0) && (h == l + 0xff)) { n = printf("*.%lu.%lu.%lu",(l>>8)&0xff,(l>>16)&0xff,l>>24); gen = 0; } else if (((l^h) & 0xffffff00) == 0) { n = printf("$GENERATE %lu-%lu $.%lu.%lu.%lu CNAME", l&0xff,h&0xff,(l>>8)&0xff,(l>>16)&0xff,l>>24); gen = 1; } else if (((l & 0xffff) == 0) && (h == l + 0xffff)) { n = printf("*.%lu.%lu",(l>>16)&0xff,l>>24); gen = 0; } else if ( ((l & 0xff) == 0) && (((l^h) & 0xffff00ff) == 0xff) ) { n = printf("$GENERATE %lu-%lu *.$.%lu.%lu CNAME", (l>>8)&0xff,(h>>8)&0xff,(l>>16)&0xff,l>>24); gen = 1; } else if (((l & 0xffffff) == 0) && (h == l + 0xffffff)) { n = printf("*.%lu",l>>24); gen = 0; } else if ( ((l & 0xffff) == 0) && (((l^h) & 0xff00ffff) == 0xffff) ) { n = printf("$GENERATE %lu-%lu *.$.%lu CNAME", (l>>16)&0xff,(h>>16)&0xff,l>>24); gen = 1; } else if ( ((l & 0xffffff) == 0) && (((l^h) & 0x00ffffff) == 0xffffff) ) { n = printf("$GENERATE %lu-%lu *.$ CNAME", l>>24,h>>24); gen = 1; } else { abort(); } tabto = gen ? 40 : 24; if (n < tabto) { while (n < tabto) { printf("\t"); n = (n + 8) & ~7; } } else { printf(" "); n ++; } printf("%s%s\n",gen?"":"IN CNAME\t",bind_target); } /* * Flush BIND output. We take bind_l and bind_h and print one or more * lines of output by calling gen_bind_line. * * The reason for "or more" is that the input range may not be * representible in a single line. Consider, for example, the range * 10.1.14.7 - 13.3.67.200, which produces seven $GENERATE lines. */ static void flush_bind(void) { unsigned long int t; if (! bind_have) return; bind_have = 0; while (1) { if (debug) { printf("; flush_bind %lu.%lu.%lu.%lu - %lu.%lu.%lu.%lu\n", bind_l>>24, (bind_l>>16)&0xff, (bind_l>>8)&0xff, bind_l&0xff, bind_h>>24, (bind_h>>16)&0xff, (bind_h>>8)&0xff, bind_h&0xff ); } if (((bind_h^bind_l) & 0xffffff00UL) == 0) { gen_bind_line(bind_l,bind_h); return; } else if (bind_l & 0x000000ffUL) { t = bind_l | 0x000000ffUL; } else if (((bind_h^bind_l) & 0xffff0000UL) == 0) { if ((bind_h & 0x000000ffUL) != 0x000000ffUL) { t = (bind_h & 0xffffff00UL) - 1; } else { gen_bind_line(bind_l,bind_h); return; } } else if (bind_l & 0x0000ffffUL) { t = bind_l | 0x0000ffffUL; } else if (((bind_h^bind_l) & 0xff000000UL) == 0) { if ((bind_h & 0x0000ffffUL) != 0x0000ffffUL) { t = (bind_h & 0xffff0000UL) - 1; } else { gen_bind_line(bind_l,bind_h); return; } } else if (bind_l & 0x00ffffffUL) { t = bind_l | 0x00ffffffUL; } else { if ((bind_h & 0x00ffffffUL) != 0x00ffffffUL) { t = (bind_h & 0xff000000UL) - 1; } else { gen_bind_line(bind_l,bind_h); return; } } gen_bind_line(bind_l,t); bind_l = t + 1; } } /* * BIND output. Notice adjacent blocks; when we get a non-adjacent * block, generate output for whatever we have. */ static void output_bind(unsigned long int v, int width) { if (width == 0) { printf("*\t\t\tIN CNAME\t%s\n",bind_target); return; } if (bind_have && (bind_h+1 == v)) { bind_h = v + (0x7fffffffUL >> (width-1)); return; } flush_bind(); bind_l = v; bind_h = v + (0x7fffffffUL >> (width-1)); bind_have = 1; } /* * Add one address. Used when the input contains an unadorned * dotted-quad. All we need do is call set_node appropriately. */ static void save_one_addr(unsigned long int a, int negated) { set_node(&root,a,31,-1,negated?NONE:ALL,negated?ALL:NONE); } /* * Add a range of addresses. This is used for the * "10.20.30.40 - 10.20.32.77" style of input. All we do is start at * the bottom of the range and loop, each time computing the largest * block that doesn't go below the bottom, shrinking it as far as * necessary to ensure it doesn't go above the top, adding it, and * moving the `bottom' value to just above the block. Lather, rinse, * repeat...until the whole range is covered. */ static void save_range(unsigned long int a1, unsigned long int a2, int negated) { int bit; unsigned long int m; unsigned long int t; if (a1 > a2) { fprintf(stderr,"%s: invalid range (ends reversed)\n",__progname); return; } while (a1 <= a2) { m = (a1 - 1) & ~a1; while (a1+m > a2) m >>= 1; for (bit=-1,t=m;t;bit++,t>>=1) ; set_node(&root,a1,31,bit,negated?NONE:ALL,negated?ALL:NONE); a1 += m+1; } } /* * Add a CIDR-style block. This matches our storage method so well * it's just a single call to set_node. The reason for the ?: * operator is that C doesn't promise that << by 32 actually shifts; * 32-bit machines often use only the low five bits of the shift * count. */ static void save_cidr(unsigned long int a, int n, int negated) { set_node(&root,n?a&0xffffffff&(0xffffffff<<(32-n)):0,31,31-n,negated?NONE:ALL,negated?ALL:NONE); } /* * Read input. Implementation is a simple state machine. * * State values for the various input syntaxes (a=10, b=11, etc): * * input 1 2 3 . 4 5 . 6 7 . 8 9 1 2 3 . 4 5 ... * state 1 1 2 2 2 3 4 4 5 6 6 7 8 8 9 9 9 2 2 2 3 4 4 ... * * input 1 2 3 . 4 5 . 6 7 . 8 9 - 1 1 . 2 2 . 3 3 . 4 4 ... * state 1 1 2 2 2 3 4 4 5 6 6 7 8 8 9 a a b b c d d e f f g h h 1 1 ... * * input 1 2 3 . 4 5 . 6 7 . 8 9 / 1 5 ... * state 1 1 2 2 2 3 4 4 5 6 6 7 8 8 9 i i j j 1 1 ... * * input ! ... ! ... * state 1 1 1 ... 9 9 1 ... (with internal flag) * * ! prefixes are handled with a flag, not represented in the state * machine, which is set when a ! is recognized. This flag is cleared * whenever an address or block is recognized. * * a holds the address being constructed (or, for states 9, i, j, the * address just constructed); n holds the number being accumulated. * a1 is used to hold the first address when a range is being read * (the second address is accumulated into a). * * If an error occurs, n is set to -1, and further errors are * suppressed; we stay this way until we begin a new dotted-quad, by * entering state 2 from state 9 or by entering state 1 upon seeing * whitespace in most other states. * * The "default: abort();" cases are can't-happen firewalls. */ static void read_input(void) { unsigned long int a1; unsigned long int a; int line; int n; int c; int state; int negated; state = 1; line = 1; n = 0; negated = 0; while (1) { c = getchar(); if (c == EOF) break; switch (c) { case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': switch (state) { default: abort(); break; case 1: n = c - '0'; state = 2; break; case 3: case 5: case 7: case 12: case 14: case 16: state ++; case 2: case 4: case 6: case 8: case 11: case 13: case 15: case 17: if (n < 0) break; n = (n * 10) + (c - '0'); if (n > 255) { fprintf(stderr,"%s: line %d: out-of-range number in input\n",__progname,line); n = -1; } break; case 9: if (n >= 0) { save_one_addr(a,negated); negated = 0; n = c - '0'; } state = 2; break; case 10: case 18: if (n >= 0) n = c - '0'; state ++; break; case 19: if (n < 0) break; n = (n * 10) + (c - '0'); if (n > 32) { fprintf(stderr,"%s: line %d: out-of-range width in input\n",__progname,line); n = -1; } break; } break; case '!': switch (state) { default: abort(); break; case 9: if (n >= 0) { save_one_addr(a,negated); negated = 0; } state = 1; /* fall through */ case 1: if (! negated) { negated = 1; break; } /* fall through */ case 2 ... 8: case 10 ... 19: fprintf(stderr,"%s: line %d: misplaced ! in input\n",__progname,line); break; } break; case '.': switch (state) { default: abort(); break; case 1: case 3: case 5: case 7 ... 8: case 10: case 12: case 14: case 16 ... 19: if (n >= 0) fprintf(stderr,"%s: line %d: . at an inappropriate place\n",__progname,line); n = -1; break; case 2: case 11: a = 0; case 4: case 6: case 13: case 15: if (n >= 0) { a = (a << 8) | n; n = 0; } state ++; break; case 9: if (n >= 0) { save_one_addr(a,negated); negated = 0; fprintf(stderr,"%s: line %d: . at an inappropriate place\n",__progname,line); } n = -1; break; } break; case '-': switch (state) { default: abort(); break; case 1 ... 7: case 10 ... 19: fprintf(stderr,"%s: line %d: - at an inappropriate place\n",__progname,line); n = -1; break; case 8: if (n >= 0) a1 = (a << 8) | n; state = 10; break; case 9: a1 = a; state = 10; break; } break; case '/': switch (state) { default: abort(); break; case 1 ... 7: case 10 ... 19: fprintf(stderr,"%s: line %d: / at an inappropriate place\n",__progname,line); n = -1; break; case 8: if (n >= 0) a = (a << 8) | n; state = 18; break; case 9: state = 18; break; } break; case '\n': line ++; case ' ': case '\t': case '\r': switch (state) { default: abort(); break; case 1: case 9 ... 10: case 18: break; case 2 ... 7: case 11 ... 16: if (n >= 0) fprintf(stderr,"%s: line %d: whitespace at an inappropriate place\n",__progname,line); state = 1; break; case 8: if (n >= 0) a = (a << 8) | n; state = 9; break; case 17: if (n >= 0) { save_range(a1,(a<<8)|n,negated); negated = 0; } state = 1; break; case 19: if (n >= 0) { save_cidr(a,n,negated); negated = 0; } state = 1; break; } break; default: fprintf(stderr,"%s: invalid character 0x%02x in input\n",__progname,c); n = -1; state = 2; break; } } /* Don't bother clearing negated below, since we're returning */ switch (state) { default: abort(); break; case 1: break; case 2 ... 7: case 10 ... 16: if (n >= 0) fprintf(stderr,"%s: line %d: EOF at an inappropriate place\n",__progname,line); break; case 8: if (n >= 0) save_one_addr((a<<8)|n,negated); break; case 9: if (n >= 0) save_one_addr(a,negated); break; case 17: if (n >= 0) save_range(a1,(a<<8)|n,negated); break; } } /* * After accumulating all input, dump out the output. Just check the * mode and call dump_walk with the corresponding output generation * function. But in -bind mode, don't forget to init_bind() and * flush_bind(). */ static void dump_output(NODE *toprint, NODE *toskip) { if (list_32s) { dump_walk(root,&output_32,toprint,toskip); } else if (bind_target) { init_bind(); dump_walk(root,&output_bind,toprint,toskip); flush_bind(); } else { dump_walk(root,&output_cidr,toprint,toskip); } } /* * Parse the command line. */ static void handleargs(int ac, char **av) { int skip; int errs; skip = 0; errs = 0; for (ac--,av++;ac;ac--,av++) { if (skip > 0) { skip --; continue; } if (**av != '-') { fprintf(stderr,"%s: stray argument `%s'\n",__progname,*av); errs ++; continue; } if (0) { needarg:; fprintf(stderr,"%s: %s needs a following argument\n",__progname,*av); errs ++; continue; } #define WANTARG() do { if (++skip >= ac) goto needarg; } while (0) if (!strcmp(*av,"-bind")) { WANTARG(); bind_target = av[skip]; continue; } if (!strcmp(*av,"-list")) { list_32s = 1; continue; } if (!strcmp(*av,"-!")) { printneg = 1; continue; } if (!strcmp(*av,"-debug")) { debug = 1; continue; } #undef WANTARG fprintf(stderr,"%s: unrecognized option `%s'\n",__progname,*av); errs ++; } if (errs) exit(1); } /* * By this point, main() is pretty much trivial. */ int main(int, char **); int main(int ac, char **av) { handleargs(ac,av); root = NONE; read_input(); dump_output(printneg?NONE:ALL,printneg?ALL:NONE); exit(0); }