/** * Search for string in a text after first preprocess the text for all bigrams. * @author gtowell * Created: May 2021 * **/ #include #include #include #ifndef NULL #define NULL (void *)0 #endif /** A node for part of a linked list. the locations are stored in this linked list * It would be better to use a linked list wrapper struct, but ... * **/ typedef struct llInt { int loc; struct llInt *next; } llInt; /* Actually hold all of the bigram pointers */ llInt *bigrams[128][128]; /** * Return true iff needle and stackpart are identical out to the length of needle * @param needle -- the string to look for * @param stackpart -- the string to look for the deele within * @return true or false * **/ int compare(char *needle, char *stackpart) { while (*needle && *stackpart) { if (*needle != *stackpart) { return 0; } needle++; stackpart++; } return (*needle == '\0'); } /** * A riduculously complex approach. * This function return a pointer to a linked list of locations at with needle * can be found within haystack. * @param needle -- the string to be looked for * @param haystack -- the place to look * @return a linked list of locations (all of the linked list elements must be freed eventually) * **/ llInt* findNeedle(char* needle, char* haystack) { llInt *rtn = NULL; if (needle == NULL) return NULL; if ((needle+1)== NULL) return NULL; char aaa = *needle; char bbb = *(needle + 1); llInt * candidates = bigrams[aaa][bbb]; while (candidates!=NULL) { if (compare((needle+2), (haystack+candidates->loc+2))) { llInt * newLL = malloc(1 * sizeof(llInt)); newLL->loc = candidates->loc; newLL->next = rtn; rtn = newLL; } candidates = candidates->next; } return rtn; } /** * Preprocess the haystack to extract evey bigram location * This structure is actually larger than the original text * @param aa the text to be preprocessed. * **/ void preprocess(char * aa) { int loc = 0; while (*(aa + 1) != '\0') { char aaa = *aa; char bbb = *(aa + 1); llInt *newll = malloc(1 * sizeof(llInt)); newll->loc = loc; if (bigrams[aaa][bbb]!=NULL) { newll->next = bigrams[aaa][bbb]; } bigrams[aaa][bbb] = newll; aa++; loc++; } } /** * Make a random string of the given length * **/ char* makeString(long len) { char *aa = malloc((len + 1) * sizeof(char)); for (int i = 0; i < len; i++) aa[i] = 'a' + (rand() % 26); aa[len] = '\0'; return aa; } /** * Print the bigram structure * Only use this for small strings * **/ void printbigrams() { for (int i = 0; i < 128; i++) { for (int j = 0; j < 128; j++) { if (bigrams[i][j]!=NULL) { printf("\n%c%c ", i, j); llInt *ll = bigrams[i][j]; while (ll!=NULL) { printf(" %d", ll->loc); ll = ll->next; } } } } } /** * Free a linked list * **/ void freeLocs(llInt * locs) { while (locs!=NULL) { llInt *n = locs->next; free(locs); locs = n; } } int main(int argc, char const *argv[]) { srand(time(NULL)); for (int i = 0; i < 128; i++) { for (int j = 0; j < 128; j++) { bigrams[i][j] = NULL; } } char *haystack = makeString(atol(argv[2])); //printf("%s\n", haystack); preprocess(haystack); //printbigrams(); int cou = atoi(argv[3]); for (int i = 0; i < cou; i++) { char *needle = makeString(atol(argv[1])); llInt *locs = findNeedle(needle, haystack); llInt *ll = locs; //printf("\n%s\n", needle); while (ll != NULL) { printf(" %d", ll->loc); ll = ll->next; } freeLocs(locs); free(needle); } for (int i = 0; i < 128; i++) { for (int j = 0; j < 128; j++) { freeLocs(bigrams[i][j]); } } free(haystack); return 0; }