#include "parse.h" #include "indextext.h" #include #include #include #include #include "database.h" void yyerror(const char* errorMessage) { fprintf(stderr, "YYERROR DURING PARSING LINE %i: %s\n", currentLine, errorMessage); } int is_binary_file(const char* file_path) { int is_binary = 0; if (strlen(file_path) > 4) { const char* suffix = file_path + (strlen(file_path) - 4); if ((strcasecmp(suffix, ".gif") == 0) || (strcasecmp(suffix, ".xbm") == 0) || (strcasecmp(suffix + 2, ".Z") == 0) || (strcasecmp(suffix + 1, ".gz") == 0) || (strcasecmp(suffix, ".tar") == 0) || (strcasecmp(suffix, ".jpg") == 0)) { is_binary = 1; } } return is_binary; } int index_next_file(FILE* list_file) { static int number_of_files = 0; static char input_line[1024]; char* additional_terms; FILE* infile; wordlist* words; wordlist* nextwords; int wordct; FileID fileID; int success = 0; if (fgets(input_line, 1024, list_file) != 0) { input_line[strlen(input_line) - 1] = '\0'; additional_terms = strchr(input_line, ' '); if (additional_terms != 0) { *additional_terms++ = '\0'; } if ((infile = fopen(input_line, "r")) == (FILE*)0) { perror(strerror(errno)); } else { initializeindex(); fileID = add_file(input_line); printf("INDEXING %s\n", input_line); if (!is_binary_file(input_line)) { if (number_of_files++ == 0) { lex_init(infile); } else { lex_restart(infile); /* pack up the database once for every 128 files indexed */ if ((number_of_files % 128) == 0) { pack_database(); } } yyparse(); if (strlen(currentTitle) > 0) { add_file_property(fileID, "title", currentTitle); } } if (additional_terms != 0) { int tag_length; while ((tag_length = simplify_tag(additional_terms)) > 0) { /* printf("additional_terms: %s\n", additional_terms); */ add_word(additional_terms, fileID,5000,1); additional_terms += tag_length + 1; } } for (wordct = 0, words = orphanindex(); words != NULL; wordct++) { add_word(words->word, fileID, words->density,words->firstline); nextwords = words->next; free(words->word); free(words); words = nextwords; } fclose(infile); success = 1; } } return success; } void main(int argc, char* argv[]) { if (argc != 2) { fprintf(stderr, "\tSyntax: %s \n", argv[0]); exit(1); } if (open_database(argv[1], database_write) == 0) { fprintf(stderr, "\tUnable to open database %s\n", argv[1]); exit(1); } fprintf(stderr, "Reading file names from stdin...\n"); while (index_next_file(stdin)); close_database(); printf("All files indexed. Exiting normally.\n"); exit(0); }