Skip to content
Snippets Groups Projects
Commit a4ce5b6a authored by Nicholas Yang's avatar Nicholas Yang
Browse files

isr word test working again

parent ed96f71d
No related branches found
No related tags found
No related merge requests found
No preview for this file type
......@@ -11,11 +11,11 @@ ISREndDoc::ISREndDoc() {
DocumentEnding ISREndDoc::next() {
if(memMap == nullptr) {
string fileName = util::GetCurrentWorkingDir() + "/constraintSolver/index-test-files/twitter/index" + to_string(currentChunk) + ".txt";
string fileName = util::GetCurrentWorkingDir() + "/constraintSolver/index-test-files/twitter/" + to_string(currentChunk) + ".txt";
currentFile = open(fileName.c_str(), O_RDONLY);
vector<size_t> contents = getSeekContents();
MMDiskHashTable de(util::GetCurrentWorkingDir() + "/constraintSolver/index-test-files/twitter/" + to_string(currentChunk) + "-seek.txt", 30, 8);
memMap = (char*) mmap(nullptr, util::FileSize(currentFile), PROT_READ, MAP_PRIVATE, currentFile, 0);
memMap += contents[0];
memMap += stoll(de.find("=docEnding"));
}
string currentOne;
for(char* map = memMap; map < memMap + util::FileSize(currentFile); map++) {
......@@ -24,7 +24,7 @@ DocumentEnding ISREndDoc::next() {
memMap = nullptr;
return DocumentEnding();
}
if(currentChunk == 8) {
if(currentChunk == 5) {
DocumentEnding a = DocumentEnding();
a.url = "aaa";
return a;
......@@ -64,45 +64,3 @@ DocumentEnding ISREndDoc::next() {
DocumentEnding ISREndDoc::getCurrentDoc() {
return currentDoc;
}
vector<size_t> ISREndDoc::getSeekContents() {
string fileName = util::GetCurrentWorkingDir() + "/constraintSolver/index-test-files/twitter/index" + to_string(currentChunk) + "-seek.txt";
int file = open(fileName.c_str(), O_RDONLY);
ssize_t fileSize = util::FileSize(file);
vector<size_t> contents;
char* memMap = (char*) mmap(nullptr, fileSize, PROT_READ, MAP_PRIVATE, file, 0);
// char* memMap = util::getFileMap(fileName);
string word = "";
bool midWord = false;
bool midFind = false;
if(memMap != MAP_FAILED) {
for(char* map = memMap; map < memMap + fileSize; map++) {
if(midFind && isalpha(*map)) {
break;
}
switch(*map) {
case '\n':
case '\r':
case '\t':
case ' ':
if (midFind && word != "") {
contents.push_back(stoll(word));
word = "";
} else if (midWord) {
midWord = false;
if(word == "=docEnding") {
midFind = true;
}
word = "";
}
break;
default:
word += *map;
midWord = true;
}
}
}
return contents;
}
\ No newline at end of file
......@@ -17,6 +17,7 @@
#include "WordSeek.h"
#include "../util/util.h"
#include "../indexer/DocumentEnding.h"
#include "../DataStructures/DiskHashTable/MMDiskHashTable.h"
// Find occurrences of document ends.
......
......@@ -141,26 +141,26 @@ void ISRWord::getWordSeek() {
//if so, set location to that big chunk
//go to next chunk
Location ISRWord::Seek( Location target ) {
// if(!wordSeekLookupTable.empty()) {
// auto best = wordSeekLookupTable.front();
// for(auto entry : wordSeekLookupTable) {
// if(entry.realLocation < target) {
// best = entry;
// } else {
// string currentChunkFileLocation = util::GetCurrentWorkingDir() + "/constraintSolver/index-test-files/twitter/index" + to_string(listOfChunks[currentChunk]) + ".txt";
// int currentChunkFile = open(currentChunkFileLocation.c_str(), O_RDONLY);
// ssize_t currentChunkFileSize = FileSize(currentChunkFile);
// currentMemMap = (char*) mmap(nullptr, currentChunkFileSize, PROT_READ, MAP_PRIVATE, currentChunkFile, 0);
// currentMemMap += best.seekOffset;
// currentLocation = best.realLocation;
// return best.realLocation;
// }
// }
// } else {
// while(Next() <= target) {
// }
// return currentLocation;
// }
if(!wordSeekLookupTable.empty()) {
auto best = wordSeekLookupTable.front();
for(auto entry : wordSeekLookupTable) {
if(entry.realLocation < target) {
best = entry;
} else {
string currentChunkFileLocation = util::GetCurrentWorkingDir() + "/constraintSolver/index-test-files/twitter/" + to_string(listOfChunks[currentChunk]) + ".txt";
int currentChunkFile = open(currentChunkFileLocation.c_str(), O_RDONLY);
ssize_t currentChunkFileSize = FileSize(currentChunkFile);
currentMemMap = (char*) mmap(nullptr, currentChunkFileSize, PROT_READ, MAP_PRIVATE, currentChunkFile, 0);
currentMemMap += best.seekOffset;
currentLocation = best.realLocation;
return best.realLocation;
}
}
} else {
while(Next() <= target) {
}
return currentLocation;
}
}
......
......@@ -14,7 +14,7 @@ using namespace std;
int main ( )
{
char* query = "iphone";
ISRWord queryWord("fsfasfas");
ISRWord queryWord("nike");
ISREndDoc endDocs;
vector<size_t> locations;
vector<DocumentEnding> docEnds;
......@@ -23,22 +23,19 @@ int main ( )
while(queryWord.getCurrentLocation() != MAX_Location) {
locations.push_back(queryWord.Next());
}
for(auto loc : locations) {
cout << loc << endl;
while(endDocs.next().url != "aaa")
{
for(auto locs : locations)
{
if(locs < endDocs.getCurrentDoc().docEndPosition &&
locs >= (endDocs.getCurrentDoc().docEndPosition - endDocs.getCurrentDoc().docNumWords)) {
urls.insert(endDocs.getCurrentDoc().url);
}
}
}
for(auto urrl : urls) {
cout << urrl << endl;
}
// while(endDocs.next().url != "aaa")
// {
// for(auto locs : locations)
// {
// if(locs < endDocs.getCurrentDoc().docEndPosition &&
// locs >= (endDocs.getCurrentDoc().docEndPosition - endDocs.getCurrentDoc().docNumWords)) {
// urls.insert(endDocs.getCurrentDoc().url);
// }
// }
//
// }
// for(auto urrl : urls) {
// cout << urrl << endl;
// }
return 0;
}
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment