From 435e1584e120ef9480d8a0dab970d1be1883cc81 Mon Sep 17 00:00:00 2001 From: Nicholas Yang <parablank@gmail.com> Date: Thu, 8 Feb 2018 14:20:11 -0500 Subject: [PATCH] more simplified implementation --- indexer/Indexer.cpp | 37 ++++++++++++++++++++++++++++++++----- indexer/Indexer.h | 9 ++++++++- indexer/IndexerTests.cpp | 2 ++ 3 files changed, 42 insertions(+), 6 deletions(-) diff --git a/indexer/Indexer.cpp b/indexer/Indexer.cpp index f894020..05dedc6 100644 --- a/indexer/Indexer.cpp +++ b/indexer/Indexer.cpp @@ -1,15 +1,42 @@ #include "Indexer.h" Indexer::Indexer() { + indexedCount = 0; + currentFile = 0; } void Indexer::run() { while(pointerToDictionaries.Size() != 0) { - unordered_map<string, vector<int>>* dictionary = pointerToDictionaries.Pop(); - for(auto it : *dictionary) { - for(auto location : it.second) { - cout << location << endl; + if(indexedCount > 50000000) { + save(); + reset(); + } + unordered_map<string, vector<int>> dictionary = *pointerToDictionaries.Pop(); + for(auto word : dictionary) { + for(auto location : word.second) { + indexedCount++; + masterDictionary[word.first].push_back(location); } } } -} \ No newline at end of file + save(); +} + +void Indexer::save() { + map<string, vector<int> > maps(masterDictionary.begin(), masterDictionary.end()); + ofstream file("index" + to_string(currentFile) + ".txt"); + for(auto word : maps) { + file << word.first << endl; + for(auto location : word.second) { + file << location << " "; + } + file << endl; + } + file.close(); + currentFile++; +} + +void Indexer::reset() { + masterDictionary.clear(); + indexedCount = 0; +} diff --git a/indexer/Indexer.h b/indexer/Indexer.h index 7edf6e5..b40b48f 100644 --- a/indexer/Indexer.h +++ b/indexer/Indexer.h @@ -1,8 +1,10 @@ #include "../ProducerConsumerQueue.h" #include "../ProducerConsumerQueue.cpp" #include <unordered_map> +#include <map> #include <vector> #include <iostream> +#include <fstream> /* @@ -18,5 +20,10 @@ class Indexer { Indexer(); void run(); ProducerConsumerQueue<unordered_map<string, vector<int> >*> pointerToDictionaries; -private: + private: + void save(); + void reset(); + unordered_map<string, vector<int> > masterDictionary; + size_t indexedCount; + size_t currentFile; }; \ No newline at end of file diff --git a/indexer/IndexerTests.cpp b/indexer/IndexerTests.cpp index 56676e2..26527c6 100644 --- a/indexer/IndexerTests.cpp +++ b/indexer/IndexerTests.cpp @@ -12,7 +12,9 @@ int main() { Indexer indexer = Indexer(); unordered_map<string, vector<int>> test1; test1["cat"] = { 12, 15, 17 }; + test1["whale"] = { 3, 6, 12 }; test1["dog"] = { 1, 5, 15 }; + test1["agouti"] = {2, 8, 41 }; indexer.pointerToDictionaries.Push(&test1); indexer.run(); } \ No newline at end of file -- GitLab