Skip to content
Snippets Groups Projects
Commit 94ced410 authored by Nicholas Yang's avatar Nicholas Yang
Browse files

dht for chunk dictionary

parent 8158a10c
No related branches found
No related tags found
1 merge request!3Indexer
...@@ -70,7 +70,7 @@ void Indexer::verbose_run() { ...@@ -70,7 +70,7 @@ void Indexer::verbose_run() {
void Indexer::save ( ) void Indexer::save ( )
{ {
map< string, vector< size_t > > maps( masterDictionary.begin( ), masterDictionary.end( ) ); map< string, vector< size_t > > maps( masterDictionary.begin( ), masterDictionary.end( ) );
DiskHashTable seeker(util::GetCurrentWorkingDir( ) + "/indexer/output/" + to_string( currentFile ) + "-seek.txt", 30, 8 ); DiskHashTable seeker(util::GetCurrentWorkingDir( ) + "/indexer/output/" + to_string( currentFile ) + "-seek.txt", 30, 8);
string fileName = util::GetCurrentWorkingDir( ) + "/indexer/output/" + to_string( currentFile ) + ".txt"; string fileName = util::GetCurrentWorkingDir( ) + "/indexer/output/" + to_string( currentFile ) + ".txt";
int file = open( fileName.c_str( ), O_CREAT | O_WRONLY, S_IRWXU ); int file = open( fileName.c_str( ), O_CREAT | O_WRONLY, S_IRWXU );
...@@ -87,7 +87,13 @@ void Indexer::save ( ) ...@@ -87,7 +87,13 @@ void Indexer::save ( )
for ( auto word : maps ) for ( auto word : maps )
{ {
seeker.insert(word.first, to_string(seekOffset)); if(word.first.size() > 30) {
string resized = word.first;
resized.resize(30);
seeker.insert(resized, to_string(seekOffset));
} else {
seeker.insert(word.first, to_string(seekOffset));
}
chunkDictionary[ word.first ].push_back( currentFile ); chunkDictionary[ word.first ].push_back( currentFile );
// string wordBreak = word.first + "\n"; // string wordBreak = word.first + "\n";
// write(file, wordBreak.c_str(), strlen(wordBreak.c_str())); // write(file, wordBreak.c_str(), strlen(wordBreak.c_str()));
...@@ -123,8 +129,21 @@ void Indexer::save ( ) ...@@ -123,8 +129,21 @@ void Indexer::save ( )
lastOne = location; lastOne = location;
} }
write( file, "\n", 1 ); write( file, "\n", 1 );
seekOffset += 1; seekOffset += 1;
} // if(postingsSeekTable.find(word.first) != postingsSeekTable.end()) {
// string offsetLine = "\t";
// for (int i = 0; i < postingsSeekTable[word.first].size(); i++) {
// offsetLine += "<" +
// to_string( postingsSeekTable[word.first][i].realLocation) +
// ", " +
// to_string( postingsSeekTable[word.first][i].offset) +
// "> ";
// }
// offsetLine += "\n";
// write( file, offsetLine.c_str( ), strlen( offsetLine.c_str( ) ) );
// seekOffset += strlen(offsetLine.c_str());
// }
// }
string docEndingHeader = "===Document Endings===\n"; string docEndingHeader = "===Document Endings===\n";
write( file, docEndingHeader.c_str( ), strlen( docEndingHeader.c_str( ) ) ); write( file, docEndingHeader.c_str( ), strlen( docEndingHeader.c_str( ) ) );
...@@ -146,20 +165,32 @@ void Indexer::save ( ) ...@@ -146,20 +165,32 @@ void Indexer::save ( )
void Indexer::saveChunkDictionary ( ) void Indexer::saveChunkDictionary ( )
{ {
string fileName = util::GetCurrentWorkingDir( ) + "/indexer/output/master-index.txt"; DiskHashTable dhtChunk = DiskHashTable(util::GetCurrentWorkingDir() + "/indexer/output/index-master.txt", 30, 168);
for(auto word : chunkDictionary) {
int file = open( fileName.c_str( ), O_CREAT | O_WRONLY, S_IRWXU ); string key = word.first;
for ( auto word : chunkDictionary ) if(key.size() > 30) {
{ key.resize(30);
string wordDictionary = word.first + " "; }
for ( auto chunk : word.second ) string value = "";
{ for (auto chunk : word.second) {
wordDictionary += to_string( chunk ) + " "; value += to_string(chunk) + " ";
}
wordDictionary += "\n";
write( file, wordDictionary.c_str( ), strlen( wordDictionary.c_str( ) ) );
} }
close( file ); dhtChunk.insert(word.first, value);
}
// string fileName = util::GetCurrentWorkingDir( ) + "/indexer/output/master-index.txt";
//
// int file = open( fileName.c_str( ), O_CREAT | O_WRONLY, S_IRWXU );
// for ( auto word : chunkDictionary )
// {
// string wordDictionary = word.first + " ";
// for ( auto chunk : word.second )
// {
// wordDictionary += to_string( chunk ) + " ";
// }
// wordDictionary += "\n";
// write( file, wordDictionary.c_str( ), strlen( wordDictionary.c_str( ) ) );
// }
// close( file );
} }
void Indexer::verbose_save ( ) void Indexer::verbose_save ( )
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment