diff --git a/indexer/Indexer.cpp b/indexer/Indexer.cpp index f894020ac6962f15b0719664c1545b70df44fa8c..05dedc6b5903bbb44149a794d9989dfa8f93f18f 100644 --- a/indexer/Indexer.cpp +++ b/indexer/Indexer.cpp @@ -1,15 +1,42 @@ #include "Indexer.h" Indexer::Indexer() { + indexedCount = 0; + currentFile = 0; } void Indexer::run() { while(pointerToDictionaries.Size() != 0) { - unordered_map<string, vector<int>>* dictionary = pointerToDictionaries.Pop(); - for(auto it : *dictionary) { - for(auto location : it.second) { - cout << location << endl; + if(indexedCount > 50000000) { + save(); + reset(); + } + unordered_map<string, vector<int>> dictionary = *pointerToDictionaries.Pop(); + for(auto word : dictionary) { + for(auto location : word.second) { + indexedCount++; + masterDictionary[word.first].push_back(location); } } } -} \ No newline at end of file + save(); +} + +void Indexer::save() { + map<string, vector<int> > maps(masterDictionary.begin(), masterDictionary.end()); + ofstream file("index" + to_string(currentFile) + ".txt"); + for(auto word : maps) { + file << word.first << endl; + for(auto location : word.second) { + file << location << " "; + } + file << endl; + } + file.close(); + currentFile++; +} + +void Indexer::reset() { + masterDictionary.clear(); + indexedCount = 0; +} diff --git a/indexer/Indexer.h b/indexer/Indexer.h index 7edf6e5cabbb37d54d3bb2019244574f767ffc3a..b40b48f3fc89835ae19ffb3db5ba5a2a16308399 100644 --- a/indexer/Indexer.h +++ b/indexer/Indexer.h @@ -1,8 +1,10 @@ #include "../ProducerConsumerQueue.h" #include "../ProducerConsumerQueue.cpp" #include <unordered_map> +#include <map> #include <vector> #include <iostream> +#include <fstream> /* @@ -18,5 +20,10 @@ class Indexer { Indexer(); void run(); ProducerConsumerQueue<unordered_map<string, vector<int> >*> pointerToDictionaries; -private: + private: + void save(); + void reset(); + unordered_map<string, vector<int> > masterDictionary; + size_t indexedCount; + size_t currentFile; }; \ No newline at end of file diff --git a/indexer/IndexerTests.cpp b/indexer/IndexerTests.cpp index 56676e2e00c2b289ca57b2bb3afade411b27ffe1..26527c62682aa92d91a7065480ee96eae954373a 100644 --- a/indexer/IndexerTests.cpp +++ b/indexer/IndexerTests.cpp @@ -12,7 +12,9 @@ int main() { Indexer indexer = Indexer(); unordered_map<string, vector<int>> test1; test1["cat"] = { 12, 15, 17 }; + test1["whale"] = { 3, 6, 12 }; test1["dog"] = { 1, 5, 15 }; + test1["agouti"] = {2, 8, 41 }; indexer.pointerToDictionaries.Push(&test1); indexer.run(); } \ No newline at end of file