From 435e1584e120ef9480d8a0dab970d1be1883cc81 Mon Sep 17 00:00:00 2001
From: Nicholas Yang <parablank@gmail.com>
Date: Thu, 8 Feb 2018 14:20:11 -0500
Subject: [PATCH] more simplified implementation

---
 indexer/Indexer.cpp      | 37 ++++++++++++++++++++++++++++++++-----
 indexer/Indexer.h        |  9 ++++++++-
 indexer/IndexerTests.cpp |  2 ++
 3 files changed, 42 insertions(+), 6 deletions(-)

diff --git a/indexer/Indexer.cpp b/indexer/Indexer.cpp
index f894020..05dedc6 100644
--- a/indexer/Indexer.cpp
+++ b/indexer/Indexer.cpp
@@ -1,15 +1,42 @@
 #include "Indexer.h"
 
 Indexer::Indexer() {
+    indexedCount = 0;
+    currentFile = 0;
 }
 
 void Indexer::run() {
     while(pointerToDictionaries.Size() != 0) {
-        unordered_map<string, vector<int>>* dictionary = pointerToDictionaries.Pop();
-        for(auto it : *dictionary) {
-            for(auto location : it.second) {
-                cout << location << endl;
+        if(indexedCount > 50000000) {
+            save();
+            reset();
+        }
+        unordered_map<string, vector<int>> dictionary = *pointerToDictionaries.Pop();
+        for(auto word : dictionary) {
+            for(auto location : word.second) {
+                indexedCount++;
+                masterDictionary[word.first].push_back(location);
             }
         }
     }
-}
\ No newline at end of file
+    save();
+}
+
+void Indexer::save() {
+    map<string, vector<int> > maps(masterDictionary.begin(), masterDictionary.end());
+    ofstream file("index" + to_string(currentFile) + ".txt");
+    for(auto word : maps) {
+        file << word.first << endl;
+        for(auto location : word.second) {
+            file << location << " ";
+        }
+        file << endl;
+    }
+    file.close();
+    currentFile++;
+}
+
+void Indexer::reset() {
+    masterDictionary.clear();
+    indexedCount = 0;
+}
diff --git a/indexer/Indexer.h b/indexer/Indexer.h
index 7edf6e5..b40b48f 100644
--- a/indexer/Indexer.h
+++ b/indexer/Indexer.h
@@ -1,8 +1,10 @@
 #include "../ProducerConsumerQueue.h"
 #include "../ProducerConsumerQueue.cpp"
 #include <unordered_map>
+#include <map>
 #include <vector>
 #include <iostream>
+#include <fstream>
 
 /*
 
@@ -18,5 +20,10 @@ class Indexer {
         Indexer();
         void run();
         ProducerConsumerQueue<unordered_map<string, vector<int> >*> pointerToDictionaries;
-private:
+    private:
+        void save();
+        void reset();
+        unordered_map<string, vector<int> > masterDictionary;
+        size_t indexedCount;
+        size_t currentFile;
 };
\ No newline at end of file
diff --git a/indexer/IndexerTests.cpp b/indexer/IndexerTests.cpp
index 56676e2..26527c6 100644
--- a/indexer/IndexerTests.cpp
+++ b/indexer/IndexerTests.cpp
@@ -12,7 +12,9 @@ int main() {
     Indexer indexer = Indexer();
     unordered_map<string, vector<int>> test1;
     test1["cat"] = { 12, 15, 17 };
+    test1["whale"] = { 3, 6, 12 };
     test1["dog"] = { 1, 5, 15 };
+    test1["agouti"] = {2, 8, 41 };
     indexer.pointerToDictionaries.Push(&test1);
     indexer.run();
 }
\ No newline at end of file
-- 
GitLab