Skip to content
Snippets Groups Projects
Commit fe658a59 authored by aanvi's avatar aanvi
Browse files

Integrating wordData struct

parent 8daddacb
No related branches found
No related tags found
1 merge request!2WIP:Crawler parser 2 merge into duplicate url-crawler
/Applications/CLion.app/Contents/bin/cmake/bin/cmake -DCMAKE_BUILD_TYPE=Debug -G "CodeBlocks - Unix Makefiles" /Users/veronicaday/Desktop/EECS398/eecs_398/project/eecs398-search Toolchains are not configured Configure
-- Configuring done
-- Generating done
-- Build files have been written to: /Users/veronicaday/Desktop/EECS398/eecs_398/project/eecs398-search/cmake-build-debug
...@@ -22,6 +22,7 @@ class Parser ...@@ -22,6 +22,7 @@ class Parser
public: public:
Parser ( ProducerConsumerQueue < string > * urlFrontierIn) Parser ( ProducerConsumerQueue < string > * urlFrontierIn)
{ {
urlFrontier = urlFrontierIn; urlFrontier = urlFrontierIn;
...@@ -33,7 +34,7 @@ public: ...@@ -33,7 +34,7 @@ public:
* @return * @return
*/ */
// TODO need to change vector type to word data, change where struct is declared // TODO need to change vector type to word data, change where struct is declared
const unordered_map< string, vector< int>> * execute ( Document* document) const unordered_map< string, vector< Tokenizer::wordData>> * execute ( Document* document)
{ {
Tokenizer tokenizer; Tokenizer tokenizer;
parse ( document->DocToString (), &tokenizer ); parse ( document->DocToString (), &tokenizer );
......
...@@ -5,18 +5,20 @@ ...@@ -5,18 +5,20 @@
#include <vector> #include <vector>
#include "stringProcessing.h" #include "stringProcessing.h"
#include "Stemmer.h" #include "Stemmer.h"
#include "../parser/Parser.h"
using namespace std; using namespace std;
struct wordData {
int offset;
int frequency = 0;
//total num words/unique words??
};
class Tokenizer class Tokenizer
{ {
public: public:
struct wordData {
int frequency = 0;
int offset;
};
Tokenizer ( ) Tokenizer ( )
{ {
docIndex = new unordered_map< string, vector<wordData>>; docIndex = new unordered_map< string, vector<wordData>>;
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment