Skip to content
Snippets Groups Projects
Commit fa189108 authored by jsclose's avatar jsclose
Browse files

debugged isrword properly, now docends are working, improved isr and

parent 9e71946f
No related branches found
No related tags found
No related merge requests found
No preview for this file type
No preview for this file type
No preview for this file type
...@@ -106,6 +106,7 @@ Location ISRAnd::Seek( Location target ) ...@@ -106,6 +106,7 @@ Location ISRAnd::Seek( Location target )
//DocumentEnd->Seek( furthest ); //DocumentEnd->Seek( furthest );
if(nearestTerm->GetEndDocument()->getCurrentDoc().docEndPosition == furthestTerm->GetEndDocument()->getCurrentDoc().docEndPosition ) if(nearestTerm->GetEndDocument()->getCurrentDoc().docEndPosition == furthestTerm->GetEndDocument()->getCurrentDoc().docEndPosition )
{ {
cout << "Found Match " << endl; cout << "Found Match " << endl;
...@@ -115,6 +116,7 @@ Location ISRAnd::Seek( Location target ) ...@@ -115,6 +116,7 @@ Location ISRAnd::Seek( Location target )
//set next target to be starting location of document //set next target to be starting location of document
Location lastDocStart = GetEndDocument( )->GetStartingPositionOfDoc( ); Location lastDocStart = GetEndDocument( )->GetStartingPositionOfDoc( );
if ( nearest >= lastDocStart ) if ( nearest >= lastDocStart )
{ {
//MATCH //MATCH
...@@ -126,8 +128,8 @@ Location ISRAnd::Seek( Location target ) ...@@ -126,8 +128,8 @@ Location ISRAnd::Seek( Location target )
{ {
//No Match yet, keep searching //No Match yet, keep searching
//docs before the fursther? may be hit //docs before the fursther? may be hit
target = furthest-1; // => works but slow target = lastDocStart; // => works but slow
//target = lastDocStart-1; //target = furthest;
} }
......
...@@ -11,7 +11,7 @@ ISREndDoc::ISREndDoc() { ...@@ -11,7 +11,7 @@ ISREndDoc::ISREndDoc() {
} }
DocumentEnding ISREndDoc::next() { DocumentEnding ISREndDoc::next() {
if(memMap == nullptr) { if(*memMap == '\0' || memMap == nullptr) {
string fileName = util::GetCurrentWorkingDir() + pathToIndex + to_string(currentChunk) + ".txt"; string fileName = util::GetCurrentWorkingDir() + pathToIndex + to_string(currentChunk) + ".txt";
currentFile = open(fileName.c_str(), O_RDONLY); currentFile = open(fileName.c_str(), O_RDONLY);
string seekFileName = util::GetCurrentWorkingDir() + pathToIndex + to_string(currentChunk) + "-seek.txt"; string seekFileName = util::GetCurrentWorkingDir() + pathToIndex + to_string(currentChunk) + "-seek.txt";
...@@ -60,6 +60,8 @@ DocumentEnding ISREndDoc::next() { ...@@ -60,6 +60,8 @@ DocumentEnding ISREndDoc::next() {
break; break;
} }
} }
return currentDoc; return currentDoc;
} }
...@@ -75,6 +77,8 @@ void ISREndDoc::seek(Location target) { ...@@ -75,6 +77,8 @@ void ISREndDoc::seek(Location target) {
string input = ""; string input = "";
bool init = false; bool init = false;
bool breakout = false; bool breakout = false;
bool between = false;
size_t foundChunk;
while(!found) { while(!found) {
string fileName = util::GetCurrentWorkingDir() + string fileName = util::GetCurrentWorkingDir() +
pathToIndex + pathToIndex +
...@@ -93,14 +97,18 @@ void ISREndDoc::seek(Location target) { ...@@ -93,14 +97,18 @@ void ISREndDoc::seek(Location target) {
case '<': case '<':
break; break;
case '>': case '>':
if (target < tempLocation && target > docEndingWordSeek.first) { if (target < tempLocation && target > docEndingWordSeek.first)
{
if(!init) { if(!init) {
breakout = true; breakout = true;
break; break;
} }
breakout = true;
found = true; found = true;
foundChunk = between ? currentChunk - 1 : currentChunk ;
break; break;
} }
between = false;
init = true; init = true;
docEndingWordSeek.first = tempLocation; docEndingWordSeek.first = tempLocation;
docEndingWordSeek.second = stoll(input); docEndingWordSeek.second = stoll(input);
...@@ -115,12 +123,11 @@ void ISREndDoc::seek(Location target) { ...@@ -115,12 +123,11 @@ void ISREndDoc::seek(Location target) {
break; break;
} }
if (found) { if (found) {
string fileName = util::GetCurrentWorkingDir() + pathToIndex + to_string(currentChunk) + ".txt"; string fileName = util::GetCurrentWorkingDir() + pathToIndex + to_string(foundChunk) + ".txt";
currentFile = open(fileName.c_str(), O_RDONLY); currentFile = open(fileName.c_str(), O_RDONLY);
memMap = (char *) mmap(nullptr, util::FileSize(currentFile), PROT_READ, MAP_PRIVATE, currentFile, memMap = (char *) mmap(nullptr, util::FileSize(currentFile), PROT_READ, MAP_PRIVATE, currentFile,
0); 0);
memMap += docEndingWordSeek.second; memMap += docEndingWordSeek.second;
break;
} }
if(breakout) { if(breakout) {
break; break;
...@@ -136,9 +143,12 @@ void ISREndDoc::seek(Location target) { ...@@ -136,9 +143,12 @@ void ISREndDoc::seek(Location target) {
break; break;
} }
currentChunk++; currentChunk++;
between = true;
} }
while(target > (next().docEndPosition - 1)) { while(target > (next().docEndPosition - 1)) {
} }
//next();
} }
DocumentEnding ISREndDoc::getCurrentDoc() { DocumentEnding ISREndDoc::getCurrentDoc() {
...@@ -147,5 +157,5 @@ DocumentEnding ISREndDoc::getCurrentDoc() { ...@@ -147,5 +157,5 @@ DocumentEnding ISREndDoc::getCurrentDoc() {
Location ISREndDoc::GetStartingPositionOfDoc( ) Location ISREndDoc::GetStartingPositionOfDoc( )
{ {
return currentDoc.docEndPosition - currentDoc.docNumWords; return currentDoc.docEndPosition - currentDoc.docNumWords - 1;
} }
\ No newline at end of file
...@@ -108,7 +108,9 @@ Location ISRWord::Next ( ) ...@@ -108,7 +108,9 @@ Location ISRWord::Next ( )
currentLocation += stoll( delta ); currentLocation += stoll( delta );
currentMemMap++; currentMemMap++;
} }
DocumentEnd->seek( currentLocation );
//DocumentEnd->seek( currentLocation );
return currentLocation; return currentLocation;
} }
...@@ -169,12 +171,16 @@ Location ISRWord::Seek( Location target ) { ...@@ -169,12 +171,16 @@ Location ISRWord::Seek( Location target ) {
currentMemMap = (char*) mmap(nullptr, currentChunkFileSize, PROT_READ, MAP_PRIVATE, currentChunkFile, 0); currentMemMap = (char*) mmap(nullptr, currentChunkFileSize, PROT_READ, MAP_PRIVATE, currentChunkFile, 0);
currentMemMap += best.seekOffset; currentMemMap += best.seekOffset;
currentLocation = best.realLocation; currentLocation = best.realLocation;
DocumentEnd->seek( currentLocation );
return best.realLocation; return best.realLocation;
} }
} }
} else { } else {
while(Next() <= target) { while(Next() <= target) {
} }
if( currentLocation == MAX_Location)
return MAX_Location;
DocumentEnd->seek( currentLocation ); DocumentEnd->seek( currentLocation );
return currentLocation; return currentLocation;
......
...@@ -14,8 +14,13 @@ using namespace std; ...@@ -14,8 +14,13 @@ using namespace std;
int main ( ) int main ( )
{ {
char* query; char* query;
ISRWord *q1 = new ISRWord("moment"); ISRWord *q1 = new ISRWord("fake");
ISRWord *q2 = new ISRWord("life"); ISRWord *q2 = new ISRWord("time");
ISRWord *q3 = new ISRWord("time");
ISRWord *q4 = new ISRWord("time");
ISRWord *q5 = new ISRWord("time");
ISRWord *q6 = new ISRWord("time");
vector< ISR* > input; vector< ISR* > input;
input.push_back(q1); input.push_back(q1);
input.push_back(q2); input.push_back(q2);
......
...@@ -20,8 +20,8 @@ int main ( ) ...@@ -20,8 +20,8 @@ int main ( )
vector< vector< size_t > > locations; vector< vector< size_t > > locations;
ISRWord q1 = ISRWord("token"); ISRWord q1 = ISRWord("fake");
ISRWord q2 = ISRWord("life"); ISRWord q2 = ISRWord("time");
queries.push_back(q2); queries.push_back(q2);
queries.push_back(q1); queries.push_back(q1);
...@@ -83,7 +83,7 @@ int main ( ) ...@@ -83,7 +83,7 @@ int main ( )
std::set_intersection(v1.begin(), v1.end(), std::set_intersection(v1.begin(), v1.end(),
v2.begin(), v2.end(), v2.begin(), v2.end(),
std::back_inserter(v_intersection)); std::inserter(v_intersection, v_intersection.begin()));
for(auto url : v_intersection) for(auto url : v_intersection)
std::cout << url << endl; std::cout << url << endl;
......
...@@ -13,11 +13,24 @@ using namespace std; ...@@ -13,11 +13,24 @@ using namespace std;
int main ( ) { int main ( ) {
//char* query = "iphone"; //char* query = "iphone";
ISRWord queryWord("fake"); ISRWord queryWord("news");
ISREndDoc endDocs; ISREndDoc endDocs;
vector<size_t> locations; vector<size_t> locations;
vector<DocumentEnding> docEnds; vector<DocumentEnding> docEnds;
set<string> urls; set<string> urls;
clock_t start = clock();
while(queryWord.getCurrentLocation() != MAX_Location) {
auto url = queryWord.DocumentEnd->getCurrentDoc().url;
cout << url << endl;
urls.insert( url );
queryWord.NextDocument();
}
/*
while(queryWord.getCurrentLocation() != MAX_Location) { while(queryWord.getCurrentLocation() != MAX_Location) {
locations.push_back(queryWord.Next()); locations.push_back(queryWord.Next());
} }
...@@ -39,10 +52,9 @@ int main ( ) { ...@@ -39,10 +52,9 @@ int main ( ) {
} }
} }
} }
end = clock(); */
for(auto urrl : urls) { clock_t end = clock();
cout << urrl << endl;
}
cout << "Time to complete query: " << (end - start) / (double) CLOCKS_PER_SEC << endl; cout << "Time to complete query: " << (end - start) / (double) CLOCKS_PER_SEC << endl;
return 0; return 0;
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment