Skip to content
Snippets Groups Projects
Commit 94ced410 authored by Nicholas Yang's avatar Nicholas Yang
Browse files

dht for chunk dictionary

parent 8158a10c
No related branches found
No related tags found
1 merge request!3Indexer
......@@ -70,7 +70,7 @@ void Indexer::verbose_run() {
void Indexer::save ( )
{
map< string, vector< size_t > > maps( masterDictionary.begin( ), masterDictionary.end( ) );
DiskHashTable seeker(util::GetCurrentWorkingDir( ) + "/indexer/output/" + to_string( currentFile ) + "-seek.txt", 30, 8 );
DiskHashTable seeker(util::GetCurrentWorkingDir( ) + "/indexer/output/" + to_string( currentFile ) + "-seek.txt", 30, 8);
string fileName = util::GetCurrentWorkingDir( ) + "/indexer/output/" + to_string( currentFile ) + ".txt";
int file = open( fileName.c_str( ), O_CREAT | O_WRONLY, S_IRWXU );
......@@ -87,7 +87,13 @@ void Indexer::save ( )
for ( auto word : maps )
{
seeker.insert(word.first, to_string(seekOffset));
if(word.first.size() > 30) {
string resized = word.first;
resized.resize(30);
seeker.insert(resized, to_string(seekOffset));
} else {
seeker.insert(word.first, to_string(seekOffset));
}
chunkDictionary[ word.first ].push_back( currentFile );
// string wordBreak = word.first + "\n";
// write(file, wordBreak.c_str(), strlen(wordBreak.c_str()));
......@@ -123,8 +129,21 @@ void Indexer::save ( )
lastOne = location;
}
write( file, "\n", 1 );
seekOffset += 1;
}
seekOffset += 1;
// if(postingsSeekTable.find(word.first) != postingsSeekTable.end()) {
// string offsetLine = "\t";
// for (int i = 0; i < postingsSeekTable[word.first].size(); i++) {
// offsetLine += "<" +
// to_string( postingsSeekTable[word.first][i].realLocation) +
// ", " +
// to_string( postingsSeekTable[word.first][i].offset) +
// "> ";
// }
// offsetLine += "\n";
// write( file, offsetLine.c_str( ), strlen( offsetLine.c_str( ) ) );
// seekOffset += strlen(offsetLine.c_str());
// }
// }
string docEndingHeader = "===Document Endings===\n";
write( file, docEndingHeader.c_str( ), strlen( docEndingHeader.c_str( ) ) );
......@@ -146,20 +165,32 @@ void Indexer::save ( )
void Indexer::saveChunkDictionary ( )
{
string fileName = util::GetCurrentWorkingDir( ) + "/indexer/output/master-index.txt";
int file = open( fileName.c_str( ), O_CREAT | O_WRONLY, S_IRWXU );
for ( auto word : chunkDictionary )
{
string wordDictionary = word.first + " ";
for ( auto chunk : word.second )
{
wordDictionary += to_string( chunk ) + " ";
}
wordDictionary += "\n";
write( file, wordDictionary.c_str( ), strlen( wordDictionary.c_str( ) ) );
DiskHashTable dhtChunk = DiskHashTable(util::GetCurrentWorkingDir() + "/indexer/output/index-master.txt", 30, 168);
for(auto word : chunkDictionary) {
string key = word.first;
if(key.size() > 30) {
key.resize(30);
}
string value = "";
for (auto chunk : word.second) {
value += to_string(chunk) + " ";
}
close( file );
dhtChunk.insert(word.first, value);
}
// string fileName = util::GetCurrentWorkingDir( ) + "/indexer/output/master-index.txt";
//
// int file = open( fileName.c_str( ), O_CREAT | O_WRONLY, S_IRWXU );
// for ( auto word : chunkDictionary )
// {
// string wordDictionary = word.first + " ";
// for ( auto chunk : word.second )
// {
// wordDictionary += to_string( chunk ) + " ";
// }
// wordDictionary += "\n";
// write( file, wordDictionary.c_str( ), strlen( wordDictionary.c_str( ) ) );
// }
// close( file );
}
void Indexer::verbose_save ( )
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment