From 94ced410bfe75b4c1bb57480a855c1cecaa30eeb Mon Sep 17 00:00:00 2001
From: Nicholas Yang <parablank@gmail.com>
Date: Tue, 27 Mar 2018 00:07:53 -0400
Subject: [PATCH] dht for chunk dictionary

---
 indexer/Indexer.cpp | 65 +++++++++++++++++++++++++++++++++------------
 1 file changed, 48 insertions(+), 17 deletions(-)

diff --git a/indexer/Indexer.cpp b/indexer/Indexer.cpp
index 90edd43..f78cee7 100644
--- a/indexer/Indexer.cpp
+++ b/indexer/Indexer.cpp
@@ -70,7 +70,7 @@ void Indexer::verbose_run() {
 void Indexer::save ( )
 	{
 	map< string, vector< size_t > > maps( masterDictionary.begin( ), masterDictionary.end( ) );
-	DiskHashTable seeker(util::GetCurrentWorkingDir( ) + "/indexer/output/" + to_string( currentFile ) + "-seek.txt", 30, 8 );
+	DiskHashTable seeker(util::GetCurrentWorkingDir( ) + "/indexer/output/" + to_string( currentFile ) + "-seek.txt", 30, 8);
 	string fileName = util::GetCurrentWorkingDir( ) + "/indexer/output/" + to_string( currentFile ) + ".txt";
 	int file = open( fileName.c_str( ), O_CREAT | O_WRONLY, S_IRWXU );
 
@@ -87,7 +87,13 @@ void Indexer::save ( )
 
 	for ( auto word : maps )
 		{
-		seeker.insert(word.first, to_string(seekOffset));
+        if(word.first.size() > 30) {
+            string resized = word.first;
+            resized.resize(30);
+            seeker.insert(resized, to_string(seekOffset));
+        } else {
+            seeker.insert(word.first, to_string(seekOffset));
+        }
 		chunkDictionary[ word.first ].push_back( currentFile );
 //        string wordBreak = word.first + "\n";
 //        write(file, wordBreak.c_str(), strlen(wordBreak.c_str()));
@@ -123,8 +129,21 @@ void Indexer::save ( )
 			lastOne = location;
 			}
 		write( file, "\n", 1 );
-		seekOffset += 1;
-		}
+        seekOffset += 1;
+//        if(postingsSeekTable.find(word.first) != postingsSeekTable.end()) {
+//            string offsetLine = "\t";
+//            for (int i = 0; i < postingsSeekTable[word.first].size(); i++) {
+//                offsetLine += "<" +
+//                              to_string( postingsSeekTable[word.first][i].realLocation) +
+//                              ", " +
+//                              to_string( postingsSeekTable[word.first][i].offset) +
+//                              "> ";
+//            }
+//            offsetLine += "\n";
+//            write( file, offsetLine.c_str( ), strlen( offsetLine.c_str( ) ) );
+//            seekOffset += strlen(offsetLine.c_str());
+//        }
+//        }
 
 	string docEndingHeader = "===Document Endings===\n";
 	write( file, docEndingHeader.c_str( ), strlen( docEndingHeader.c_str( ) ) );
@@ -146,20 +165,32 @@ void Indexer::save ( )
 
 void Indexer::saveChunkDictionary ( )
 	{
-	string fileName = util::GetCurrentWorkingDir( ) + "/indexer/output/master-index.txt";
-
-	int file = open( fileName.c_str( ), O_CREAT | O_WRONLY, S_IRWXU );
-	for ( auto word : chunkDictionary )
-		{
-		string wordDictionary = word.first + " ";
-		for ( auto chunk : word.second )
-			{
-			wordDictionary += to_string( chunk ) + " ";
-			}
-		wordDictionary += "\n";
-		write( file, wordDictionary.c_str( ), strlen( wordDictionary.c_str( ) ) );
+    DiskHashTable dhtChunk = DiskHashTable(util::GetCurrentWorkingDir() + "/indexer/output/index-master.txt", 30, 168);
+    for(auto word : chunkDictionary) {
+        string key = word.first;
+        if(key.size() > 30) {
+            key.resize(30);
+        }
+		string value = "";
+		for (auto chunk : word.second) {
+			value += to_string(chunk) + " ";
 		}
-	close( file );
+		dhtChunk.insert(word.first, value);
+	}
+//	string fileName = util::GetCurrentWorkingDir( ) + "/indexer/output/master-index.txt";
+//
+//	int file = open( fileName.c_str( ), O_CREAT | O_WRONLY, S_IRWXU );
+//	for ( auto word : chunkDictionary )
+//		{
+//		string wordDictionary = word.first + " ";
+//		for ( auto chunk : word.second )
+//			{
+//			wordDictionary += to_string( chunk ) + " ";
+//			}
+//		wordDictionary += "\n";
+//		write( file, wordDictionary.c_str( ), strlen( wordDictionary.c_str( ) ) );
+//		}
+//	close( file );
 	}
 
 void Indexer::verbose_save ( )
-- 
GitLab