diff --git a/Corpus-tests b/Corpus-tests index f187735af5ca51c3ccffb6cb67e7e03c5c379967..139586c0da7025684cbfac27aa56c9220c735ee4 100755 Binary files a/Corpus-tests and b/Corpus-tests differ diff --git a/indexer/Corpus.h b/indexer/Corpus.h index c8c244a90fb5183c461fc0a43ef8edcba114597b..ce3b8c55655d7d6125255aee8af909fdc1d67809 100644 --- a/indexer/Corpus.h +++ b/indexer/Corpus.h @@ -16,9 +16,6 @@ public: size_t numberDocuments; size_t numberWords; std::vector<Chunk> chunks; - -private: - MMDiskHashTable master = MMDiskHashTable(util::GetCurrentWorkingDir() + IndexerConstants::pathToIndex + "master.txt", diff --git a/indexer/WordInfo.h b/indexer/WordInfo.h index f089da292a1481bd4d68539788e6c0ffeb162afd..41b5afb421573bdc5f580d08f7bec0cf19744b39 100644 --- a/indexer/WordInfo.h +++ b/indexer/WordInfo.h @@ -6,11 +6,6 @@ class WordInfo { public: - /** - * Meant to parse in input from master DHT - * @param value Value from DHT - */ - WordInfo() { chunks = std::vector<int>(); frequency = 0; @@ -18,6 +13,10 @@ public: lastLocation = 0; } + /** + * Meant to parse in input from master DHT + * @param value Value from DHT + */ WordInfo(string value) { if(value == "") { return; diff --git a/indexer/tests/CorpusTests.cpp b/indexer/tests/CorpusTests.cpp index a64b7c92f48d07349348b592dfe5dba059cbea12..9a285f5f9ebbea57b5afc413050680b8f2fda8f3 100644 --- a/indexer/tests/CorpusTests.cpp +++ b/indexer/tests/CorpusTests.cpp @@ -15,5 +15,27 @@ int main() { cout << "\tNumber documents: " << corpus.chunks[i].numberDocs << endl; cout << "\tEnding offset: " << corpus.chunks[i].lastLocation << endl; } + WordInfo momentInfo = corpus.getWordInfo("moment"); + cout << "Moment" << endl; + cout << "\tChunks: "; + for(auto chunk : momentInfo.chunks) { + cout << chunk << " "; + } + cout << endl; + cout << "\tDocument frequency: " << momentInfo.docFrequency << endl; + cout << "\tFrequency: " << momentInfo.frequency << endl; + cout << "\tLast location: " << momentInfo.lastLocation << endl; + + WordInfo lifeInfo = corpus.getWordInfo("life"); + cout << "Life" << endl; + cout << "\tChunks: "; + for(auto chunk : lifeInfo.chunks) { + cout << chunk << " "; + } + cout << endl; + cout << "\tDocument frequency: " << lifeInfo.docFrequency << endl; + cout << "\tFrequency: " << lifeInfo.frequency << endl; + cout << "\tLast location: " << lifeInfo.lastLocation << endl; + return 0; }