Newer
Older
#include "../ProducerConsumerQueue.h"
#include "../ProducerConsumerQueue.cpp"
/*
Objective: Pulls small dictionaries from the parser and merges them into the
master index.
TODO:
Use deltas between the offsets
Save with UTF-8 encoding
Concrete block size - 100MB per block?
Save document endings and other relevant metadata?
void run();
void verbose_run();
ProducerConsumerQueue<unordered_map<string, vector<int> > * > pointerToDictionaries;
unordered_map<string, vector<size_t> > masterDictionary;
vector<DocumentEnding> docEndings;
size_t currentlyIndexed;
size_t currentBlockNumberWords;
size_t currentBlockNumberDocs;
#endif /*indexer_h*/