From fe658a599e5507fac12b112b3ccb24ab2c9ae0f4 Mon Sep 17 00:00:00 2001 From: aanvi <aanvi@umich.edu> Date: Sun, 18 Mar 2018 22:56:36 -0400 Subject: [PATCH] Integrating wordData struct --- cmake-build-debug/CMakeFiles/clion-log.txt | 5 +---- parser/Parser.h | 3 ++- util/Tokenizer.h | 12 +++++++----- 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/cmake-build-debug/CMakeFiles/clion-log.txt b/cmake-build-debug/CMakeFiles/clion-log.txt index f239e1b..c7ba5e8 100644 --- a/cmake-build-debug/CMakeFiles/clion-log.txt +++ b/cmake-build-debug/CMakeFiles/clion-log.txt @@ -1,4 +1 @@ -/Applications/CLion.app/Contents/bin/cmake/bin/cmake -DCMAKE_BUILD_TYPE=Debug -G "CodeBlocks - Unix Makefiles" /Users/veronicaday/Desktop/EECS398/eecs_398/project/eecs398-search --- Configuring done --- Generating done --- Build files have been written to: /Users/veronicaday/Desktop/EECS398/eecs_398/project/eecs398-search/cmake-build-debug +Toolchains are not configured Configure diff --git a/parser/Parser.h b/parser/Parser.h index c19fff2..357c580 100644 --- a/parser/Parser.h +++ b/parser/Parser.h @@ -22,6 +22,7 @@ class Parser public: + Parser ( ProducerConsumerQueue < string > * urlFrontierIn) { urlFrontier = urlFrontierIn; @@ -33,7 +34,7 @@ public: * @return */ // TODO need to change vector type to word data, change where struct is declared - const unordered_map< string, vector< int>> * execute ( Document* document) + const unordered_map< string, vector< Tokenizer::wordData>> * execute ( Document* document) { Tokenizer tokenizer; parse ( document->DocToString (), &tokenizer ); diff --git a/util/Tokenizer.h b/util/Tokenizer.h index bf867b5..c0c11ab 100644 --- a/util/Tokenizer.h +++ b/util/Tokenizer.h @@ -5,18 +5,20 @@ #include <vector> #include "stringProcessing.h" #include "Stemmer.h" +#include "../parser/Parser.h" using namespace std; -struct wordData { - int offset; - int frequency = 0; - //total num words/unique words?? -}; + class Tokenizer { public: + struct wordData { + int frequency = 0; + int offset; + }; + Tokenizer ( ) { docIndex = new unordered_map< string, vector<wordData>>; -- GitLab