/* GPL'd: Reads a C program and prints in alphabetical order each * group of variable names that are identical in the first 6 * characters, but different somewhat thereafter. Kludgey, be warned. */ #include #include #include #include #include "getinput.h" #define MAXNAME 20 /* maximum number of characters in a name */ #define MAXUNIQ 1000 /* maximum number of unique names per group */ #define MAXGRP 10000 /* maximum number of name groups */ #define DEFIDNT 6 /* default number of characters that must be * identical between variables */ struct name_t { char name[MAXNAME]; struct name_t *left; /* less than */ struct name_t *right; /* greater than */ }; struct group_t { char basename[MAXNAME]; struct name_t *names; struct group_t *left; /* less than */ struct group_t *right; /* greater than */ }; int base = DEFIDNT; /* the number of characters that must be in common * between words of the same group */ /* return nonzero if word isn't a C keyword, ignore comments and * string literals */ int isvarname(char *s) { static const char *keywords[32] = { "auto", "double", "int", "struct", "break", "else", "long", "switch", "case", "enum", "register", "typedef", "char", "extern", "return", "union", "const", "float", "short", "unsigned", "continue", "for", "unsigned", "void", "default", "goto", "sizeof", "volatile", "do", "if", "static", "while" }; int i; if (isalpha(*s)) for (i = 0; i < 32; i++) { if (strcmp(keywords[i], s) == 0) /* match */ return 0; } return 1; } /* return 0 if s == t, > 0 if s > t, < 0 if s < t */ int basecmp(char *s, char *t) { int i; for (i = 0; i < base; i++) if (s[i] != t[i]) return s[i] - t[i]; return 0; } struct group_t *galloc(void) { return (struct group_t *) malloc(sizeof(struct group_t)); } struct name_t *nalloc(void) { return (struct name_t *) malloc(sizeof(struct name_t)); } struct name_t *fileword(char *word, struct name_t *root) { int cond; if (root == NULL) { root = nalloc(); strcpy(root->name, word); } else if ((cond = strcmp(word, root->name)) > 0) { root->right = fileword(word, root->right); } else if (cond < 0) { root->left = fileword(word, root->left); } /* ignore repeats */ return root; } struct group_t *addword(char *word, struct group_t *root) { int cond; if (root == NULL) { root = galloc(); strcpy(root->basename, word); root->names = fileword(word, root->names); } else if ((cond = basecmp(word, root->basename)) > 0) { root->right = addword(word, root->right); } else if (cond < 0) { root->left = addword(word, root->left); } else { root->names = fileword(word, root->names); } return root; } void printnames(struct name_t *root) { if (root != NULL) { printnames(root->left); printf("%s, ", root->name); printnames(root->right); } } void printgroups(struct group_t *root) { if (root != NULL) { printgroups(root->left); printf("%s: ", root->basename); printnames(root->names); putchar('\n'); printgroups(root->right); } } int main(int argc, char *argv[]) { char word[MAXNAME]; struct group_t *groups; /* get arguments */ if (argc == 2) { base = atoi(argv[1]); if (base == 0) { printf("Invalid bullshit: %s\n", argv[1]); exit(1); } } else if (argc != 1) { printf("Too many arguments\n"); exit(1); } /* process words */ groups = NULL; while (getword(word, MAXNAME) != EOF) { if (strlen(word) >= DEFIDNT && isvarname(word)) { groups = addword(word, groups); } } /* print groups */ printgroups(groups); return 0; }