diff --git a/cmake-build-debug/CMakeFiles/clion-log.txt b/cmake-build-debug/CMakeFiles/clion-log.txt index f239e1bd9a1b88a2aa7159e5bd73184452a18939..c7ba5e8f9efad023df0acf3c59e29fd413f530aa 100644 --- a/cmake-build-debug/CMakeFiles/clion-log.txt +++ b/cmake-build-debug/CMakeFiles/clion-log.txt @@ -1,4 +1 @@ -/Applications/CLion.app/Contents/bin/cmake/bin/cmake -DCMAKE_BUILD_TYPE=Debug -G "CodeBlocks - Unix Makefiles" /Users/veronicaday/Desktop/EECS398/eecs_398/project/eecs398-search --- Configuring done --- Generating done --- Build files have been written to: /Users/veronicaday/Desktop/EECS398/eecs_398/project/eecs398-search/cmake-build-debug +Toolchains are not configured Configure diff --git a/parser/Parser.h b/parser/Parser.h index c19fff2c4e47f9e1c8da325d77fa47123c5a1e55..357c5803c4d72f6fb7a08025c0d13bef90125480 100644 --- a/parser/Parser.h +++ b/parser/Parser.h @@ -22,6 +22,7 @@ class Parser public: + Parser ( ProducerConsumerQueue < string > * urlFrontierIn) { urlFrontier = urlFrontierIn; @@ -33,7 +34,7 @@ public: * @return */ // TODO need to change vector type to word data, change where struct is declared - const unordered_map< string, vector< int>> * execute ( Document* document) + const unordered_map< string, vector< Tokenizer::wordData>> * execute ( Document* document) { Tokenizer tokenizer; parse ( document->DocToString (), &tokenizer ); diff --git a/util/Tokenizer.h b/util/Tokenizer.h index bf867b5003a6d79600517b9b4b3ddd1b6c4bc3ae..c0c11ab13990a9450f64207c0f78f7fb53605acd 100644 --- a/util/Tokenizer.h +++ b/util/Tokenizer.h @@ -5,18 +5,20 @@ #include <vector> #include "stringProcessing.h" #include "Stemmer.h" +#include "../parser/Parser.h" using namespace std; -struct wordData { - int offset; - int frequency = 0; - //total num words/unique words?? -}; + class Tokenizer { public: + struct wordData { + int frequency = 0; + int offset; + }; + Tokenizer ( ) { docIndex = new unordered_map< string, vector<wordData>>;