diff --git a/ISRWord-tests b/ISRWord-tests index 41505cee0a4e4ce14664ef97256d66e1933e5621..9bfea448e62a90dd810d260f411a5676039a6bdb 100755 Binary files a/ISRWord-tests and b/ISRWord-tests differ diff --git a/Indexer-twitter-tests b/Indexer-twitter-tests index 2414057af7581d3bd95bba9cc353255cde8ce68d..66426b893f1f10119f16dc08d865cd7d0191b3fe 100755 Binary files a/Indexer-twitter-tests and b/Indexer-twitter-tests differ diff --git a/indexer/Indexer.cpp b/indexer/Indexer.cpp index 3b4b6bc0971568a72e58ef1fd9e8c5d7665b7889..e31be2aeca4dcb1dec0d535af1930e72bb115349 100755 --- a/indexer/Indexer.cpp +++ b/indexer/Indexer.cpp @@ -43,11 +43,13 @@ void Indexer::run ( ) if(currentBlockNumberWords >= 20000) { save(); + saveWordSeek(); reset(); } } save(); + saveWordSeek(); reset(); saveChunkDictionary(); } @@ -146,12 +148,11 @@ void Indexer::save ( ) } close( file ); - currentFile++; } void Indexer::saveChunkDictionary ( ) { - MMDiskHashTable dhtChunk = MMDiskHashTable(util::GetCurrentWorkingDir() + "/indexer/output/index-master.txt", 30, 168); + MMDiskHashTable dhtChunk = MMDiskHashTable(util::GetCurrentWorkingDir() + "/indexer/output/master.txt", 30, 168); for ( auto word : chunkDictionary ) { string key = word.first; @@ -167,6 +168,22 @@ void Indexer::saveChunkDictionary ( ) } } +void Indexer::saveWordSeek() { + MMDiskHashTable wordSeek = MMDiskHashTable( + util::GetCurrentWorkingDir() + "/indexer/output/" + to_string(currentFile) + "-wordseek.txt", 30, 168); + for (auto word : postingsSeekTable) { + string key = word.first; + if (key.size() > 30) { + key.resize(30); + } + string value = ""; + for (auto entry : word.second) { + value += ("<" + to_string(entry.offset) + ", " + to_string(entry.realLocation) + "> "); + } + wordSeek.insert(key, value); + } +} + void Indexer::verbose_save ( ) { map< string, vector< size_t > > maps( masterDictionary.begin( ), masterDictionary.end( ) ); @@ -190,4 +207,5 @@ void Indexer::reset ( ) currentBlockNumberWords = 0; currentBlockNumberDocs = 0; - } + currentFile++; + } diff --git a/indexer/Indexer.h b/indexer/Indexer.h index 7b820050ad995f25fa8522d237c114f040b7eb27..13112ae22c76c039da909fd7cb9e5568ddf5e1bf 100755 --- a/indexer/Indexer.h +++ b/indexer/Indexer.h @@ -46,8 +46,8 @@ public: private: void save ( ); - - void saveChunkDictionary ( ); + void saveWordSeek(); + void saveChunkDictionary ( ); void reset ( ); @@ -63,6 +63,7 @@ private: size_t currentBlockNumberWords; size_t currentBlockNumberDocs; - }; + +}; #endif /*indexer_h*/