From 3c0eec038f83adbac0a1eadea49362fd9566093c Mon Sep 17 00:00:00 2001 From: Nicholas Yang <parablank@gmail.com> Date: Tue, 20 Mar 2018 15:02:38 -0400 Subject: [PATCH] seek lookup table in --- CMakeLists.txt | 2 +- constraintSolver/ISRWord.cpp | 26 ++++++++++++++++++++++--- constraintSolver/ISRWord.h | 11 ++++------- constraintSolver/WordSeek.h | 7 +++++++ constraintSolver/tests/ISRWordTests.cpp | 1 - 5 files changed, 35 insertions(+), 12 deletions(-) create mode 100644 constraintSolver/WordSeek.h diff --git a/CMakeLists.txt b/CMakeLists.txt index d0cadfc..7a341bb 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -54,7 +54,7 @@ add_executable(URLTEST shared/url.h shared/urlTest.cpp) add_executable(search-engine search.cpp query/Query.cpp) -add_executable(ISRWord-tests util/util.cpp constraintSolver/ISRWord.cpp constraintSolver/tests/ISRWordTests.cpp ) +add_executable(ISRWord-tests constraintSolver/ISRWord.cpp constraintSolver/tests/ISRWordTests.cpp ) find_package(OpenSSL REQUIRED) diff --git a/constraintSolver/ISRWord.cpp b/constraintSolver/ISRWord.cpp index a349730..639b151 100644 --- a/constraintSolver/ISRWord.cpp +++ b/constraintSolver/ISRWord.cpp @@ -2,9 +2,14 @@ // Created by Jake Close on 3/13/18. // +#include <string> #include "ISRWord.h" -using namespace std; +size_t FileSize(int f) { + struct stat fileInfo; + fstat( f, &fileInfo); + return fileInfo.st_size; +} ISRWord::ISRWord(char* word) : term(word) { getChunks(); @@ -24,15 +29,28 @@ vector<size_t> ISRWord::getSeekContents(string fileName) { string word = ""; bool midWord = false; bool midFind = false; + WordSeek wordDictionaryEntry; if(memMap != MAP_FAILED) { for(char* map = memMap; map < memMap + fileSize; map++) { if(midFind && isalpha(*map)) { break; } switch(*map) { - case '\t': + if(midFind) { + case '<': + wordDictionaryEntry = WordSeek(); + break; + case '>': + wordDictionaryEntry.seekOffset = stoll(word); + wordSeekLookupTable.push_back(wordDictionaryEntry); + break; + case ',': + wordDictionaryEntry.realLocation = stoll(word); + break; + } case '\n': case '\r': + case '\t': case ' ': if (midFind && word != "") { contents.push_back(stoll(word)); @@ -151,6 +169,8 @@ Location ISRWord::next() { //go to next chunk Location ISRWord::seek( Location target ) { - + if(!wordSeekLookupTable.empty()) { + + } } diff --git a/constraintSolver/ISRWord.h b/constraintSolver/ISRWord.h index e0eb8c8..eface93 100644 --- a/constraintSolver/ISRWord.h +++ b/constraintSolver/ISRWord.h @@ -5,6 +5,7 @@ #pragma once //#include "ISR.h" +#include <iostream> #include <vector> #include <fcntl.h> #include <stdio.h> @@ -13,18 +14,13 @@ #include <sys/mman.h> #include <sys/stat.h> #include <sys/types.h> +#include "WordSeek.h" //#include "../util/util.h" - -size_t FileSize(int f) { - struct stat fileInfo; - fstat( f, &fileInfo); - return fileInfo.st_size; -} - using namespace std; + //Find occurrences of individual words typedef size_t Location; @@ -50,6 +46,7 @@ public: char* term; char* masterIndex; vector<size_t> listOfChunks; + vector<WordSeek> wordSeekLookupTable; size_t currentChunk; char* currentMemMap; diff --git a/constraintSolver/WordSeek.h b/constraintSolver/WordSeek.h new file mode 100644 index 0000000..e0f44b9 --- /dev/null +++ b/constraintSolver/WordSeek.h @@ -0,0 +1,7 @@ +#pragma once + +class WordSeek { +public: + ssize_t seekOffset; + size_t realLocation; +}; \ No newline at end of file diff --git a/constraintSolver/tests/ISRWordTests.cpp b/constraintSolver/tests/ISRWordTests.cpp index d34c5ea..e9501ac 100644 --- a/constraintSolver/tests/ISRWordTests.cpp +++ b/constraintSolver/tests/ISRWordTests.cpp @@ -4,7 +4,6 @@ #include <iostream> #include "../ISRWord.h" -#include "../ISRWord.cpp" using namespace std; -- GitLab