diff --git a/ISRWord-tests b/ISRWord-tests index a93ea99fd1e8eea9c8df5e1cf848004b12be35f9..f3264270a64f0b43b493525fe8c93e8639cb51ea 100755 Binary files a/ISRWord-tests and b/ISRWord-tests differ diff --git a/constraintSolver/ISREndDoc.cpp b/constraintSolver/ISREndDoc.cpp index 3789655c8dbb465e61095822b379402696f04cd3..aa986dff789ce076a23fdb6d4ecc538b80328d11 100644 --- a/constraintSolver/ISREndDoc.cpp +++ b/constraintSolver/ISREndDoc.cpp @@ -11,11 +11,11 @@ ISREndDoc::ISREndDoc() { DocumentEnding ISREndDoc::next() { if(memMap == nullptr) { - string fileName = util::GetCurrentWorkingDir() + "/constraintSolver/index-test-files/twitter/index" + to_string(currentChunk) + ".txt"; + string fileName = util::GetCurrentWorkingDir() + "/constraintSolver/index-test-files/twitter/" + to_string(currentChunk) + ".txt"; currentFile = open(fileName.c_str(), O_RDONLY); - vector<size_t> contents = getSeekContents(); + MMDiskHashTable de(util::GetCurrentWorkingDir() + "/constraintSolver/index-test-files/twitter/" + to_string(currentChunk) + "-seek.txt", 30, 8); memMap = (char*) mmap(nullptr, util::FileSize(currentFile), PROT_READ, MAP_PRIVATE, currentFile, 0); - memMap += contents[0]; + memMap += stoll(de.find("=docEnding")); } string currentOne; for(char* map = memMap; map < memMap + util::FileSize(currentFile); map++) { @@ -24,7 +24,7 @@ DocumentEnding ISREndDoc::next() { memMap = nullptr; return DocumentEnding(); } - if(currentChunk == 8) { + if(currentChunk == 5) { DocumentEnding a = DocumentEnding(); a.url = "aaa"; return a; @@ -64,45 +64,3 @@ DocumentEnding ISREndDoc::next() { DocumentEnding ISREndDoc::getCurrentDoc() { return currentDoc; } - -vector<size_t> ISREndDoc::getSeekContents() { - string fileName = util::GetCurrentWorkingDir() + "/constraintSolver/index-test-files/twitter/index" + to_string(currentChunk) + "-seek.txt"; - int file = open(fileName.c_str(), O_RDONLY); - ssize_t fileSize = util::FileSize(file); - vector<size_t> contents; - - - char* memMap = (char*) mmap(nullptr, fileSize, PROT_READ, MAP_PRIVATE, file, 0); - // char* memMap = util::getFileMap(fileName); - string word = ""; - bool midWord = false; - bool midFind = false; - if(memMap != MAP_FAILED) { - for(char* map = memMap; map < memMap + fileSize; map++) { - if(midFind && isalpha(*map)) { - break; - } - switch(*map) { - case '\n': - case '\r': - case '\t': - case ' ': - if (midFind && word != "") { - contents.push_back(stoll(word)); - word = ""; - } else if (midWord) { - midWord = false; - if(word == "=docEnding") { - midFind = true; - } - word = ""; - } - break; - default: - word += *map; - midWord = true; - } - } - } - return contents; -} \ No newline at end of file diff --git a/constraintSolver/ISREndDoc.h b/constraintSolver/ISREndDoc.h index bea16279c281f03c50e0e6a9c437222fb7c74caa..0d722dfffbd48ab191b230c9f9cb3e1af006cb0e 100644 --- a/constraintSolver/ISREndDoc.h +++ b/constraintSolver/ISREndDoc.h @@ -17,6 +17,7 @@ #include "WordSeek.h" #include "../util/util.h" #include "../indexer/DocumentEnding.h" +#include "../DataStructures/DiskHashTable/MMDiskHashTable.h" // Find occurrences of document ends. diff --git a/constraintSolver/ISRWord.cpp b/constraintSolver/ISRWord.cpp index ee15492b6fd1233c48e5c7a344ff1fb8fef47010..7a681915b5de41c6cb2e9898d8036c0d8e45d0ad 100644 --- a/constraintSolver/ISRWord.cpp +++ b/constraintSolver/ISRWord.cpp @@ -141,26 +141,26 @@ void ISRWord::getWordSeek() { //if so, set location to that big chunk //go to next chunk Location ISRWord::Seek( Location target ) { -// if(!wordSeekLookupTable.empty()) { -// auto best = wordSeekLookupTable.front(); -// for(auto entry : wordSeekLookupTable) { -// if(entry.realLocation < target) { -// best = entry; -// } else { -// string currentChunkFileLocation = util::GetCurrentWorkingDir() + "/constraintSolver/index-test-files/twitter/index" + to_string(listOfChunks[currentChunk]) + ".txt"; -// int currentChunkFile = open(currentChunkFileLocation.c_str(), O_RDONLY); -// ssize_t currentChunkFileSize = FileSize(currentChunkFile); -// currentMemMap = (char*) mmap(nullptr, currentChunkFileSize, PROT_READ, MAP_PRIVATE, currentChunkFile, 0); -// currentMemMap += best.seekOffset; -// currentLocation = best.realLocation; -// return best.realLocation; -// } -// } -// } else { -// while(Next() <= target) { -// } -// return currentLocation; -// } + if(!wordSeekLookupTable.empty()) { + auto best = wordSeekLookupTable.front(); + for(auto entry : wordSeekLookupTable) { + if(entry.realLocation < target) { + best = entry; + } else { + string currentChunkFileLocation = util::GetCurrentWorkingDir() + "/constraintSolver/index-test-files/twitter/" + to_string(listOfChunks[currentChunk]) + ".txt"; + int currentChunkFile = open(currentChunkFileLocation.c_str(), O_RDONLY); + ssize_t currentChunkFileSize = FileSize(currentChunkFile); + currentMemMap = (char*) mmap(nullptr, currentChunkFileSize, PROT_READ, MAP_PRIVATE, currentChunkFile, 0); + currentMemMap += best.seekOffset; + currentLocation = best.realLocation; + return best.realLocation; + } + } + } else { + while(Next() <= target) { + } + return currentLocation; + } } diff --git a/constraintSolver/tests/ISRWordTests.cpp b/constraintSolver/tests/ISRWordTests.cpp index fb8deb8c4871bd5c04a14008095e9f560bc9f541..38628d28ac5bc9d7e0634e6094cebfa3ce999411 100644 --- a/constraintSolver/tests/ISRWordTests.cpp +++ b/constraintSolver/tests/ISRWordTests.cpp @@ -14,7 +14,7 @@ using namespace std; int main ( ) { char* query = "iphone"; - ISRWord queryWord("fsfasfas"); + ISRWord queryWord("nike"); ISREndDoc endDocs; vector<size_t> locations; vector<DocumentEnding> docEnds; @@ -23,22 +23,19 @@ int main ( ) while(queryWord.getCurrentLocation() != MAX_Location) { locations.push_back(queryWord.Next()); } - for(auto loc : locations) { - cout << loc << endl; + while(endDocs.next().url != "aaa") + { + for(auto locs : locations) + { + if(locs < endDocs.getCurrentDoc().docEndPosition && + locs >= (endDocs.getCurrentDoc().docEndPosition - endDocs.getCurrentDoc().docNumWords)) { + urls.insert(endDocs.getCurrentDoc().url); + } + } + + } + for(auto urrl : urls) { + cout << urrl << endl; } -// while(endDocs.next().url != "aaa") -// { -// for(auto locs : locations) -// { -// if(locs < endDocs.getCurrentDoc().docEndPosition && -// locs >= (endDocs.getCurrentDoc().docEndPosition - endDocs.getCurrentDoc().docNumWords)) { -// urls.insert(endDocs.getCurrentDoc().url); -// } -// } -// -// } -// for(auto urrl : urls) { -// cout << urrl << endl; -// } return 0; } \ No newline at end of file