Skip to content
Snippets Groups Projects
Commit 3c0eec03 authored by Nicholas Yang's avatar Nicholas Yang
Browse files

seek lookup table in

parent 22d29b60
No related branches found
No related tags found
1 merge request!2WIP:Crawler parser 2 merge into duplicate url-crawler
...@@ -54,7 +54,7 @@ add_executable(URLTEST shared/url.h shared/urlTest.cpp) ...@@ -54,7 +54,7 @@ add_executable(URLTEST shared/url.h shared/urlTest.cpp)
add_executable(search-engine search.cpp query/Query.cpp) add_executable(search-engine search.cpp query/Query.cpp)
add_executable(ISRWord-tests util/util.cpp constraintSolver/ISRWord.cpp constraintSolver/tests/ISRWordTests.cpp ) add_executable(ISRWord-tests constraintSolver/ISRWord.cpp constraintSolver/tests/ISRWordTests.cpp )
find_package(OpenSSL REQUIRED) find_package(OpenSSL REQUIRED)
......
...@@ -2,9 +2,14 @@ ...@@ -2,9 +2,14 @@
// Created by Jake Close on 3/13/18. // Created by Jake Close on 3/13/18.
// //
#include <string>
#include "ISRWord.h" #include "ISRWord.h"
using namespace std; size_t FileSize(int f) {
struct stat fileInfo;
fstat( f, &fileInfo);
return fileInfo.st_size;
}
ISRWord::ISRWord(char* word) : term(word) { ISRWord::ISRWord(char* word) : term(word) {
getChunks(); getChunks();
...@@ -24,15 +29,28 @@ vector<size_t> ISRWord::getSeekContents(string fileName) { ...@@ -24,15 +29,28 @@ vector<size_t> ISRWord::getSeekContents(string fileName) {
string word = ""; string word = "";
bool midWord = false; bool midWord = false;
bool midFind = false; bool midFind = false;
WordSeek wordDictionaryEntry;
if(memMap != MAP_FAILED) { if(memMap != MAP_FAILED) {
for(char* map = memMap; map < memMap + fileSize; map++) { for(char* map = memMap; map < memMap + fileSize; map++) {
if(midFind && isalpha(*map)) { if(midFind && isalpha(*map)) {
break; break;
} }
switch(*map) { switch(*map) {
case '\t': if(midFind) {
case '<':
wordDictionaryEntry = WordSeek();
break;
case '>':
wordDictionaryEntry.seekOffset = stoll(word);
wordSeekLookupTable.push_back(wordDictionaryEntry);
break;
case ',':
wordDictionaryEntry.realLocation = stoll(word);
break;
}
case '\n': case '\n':
case '\r': case '\r':
case '\t':
case ' ': case ' ':
if (midFind && word != "") { if (midFind && word != "") {
contents.push_back(stoll(word)); contents.push_back(stoll(word));
...@@ -151,6 +169,8 @@ Location ISRWord::next() { ...@@ -151,6 +169,8 @@ Location ISRWord::next() {
//go to next chunk //go to next chunk
Location ISRWord::seek( Location target ) { Location ISRWord::seek( Location target ) {
if(!wordSeekLookupTable.empty()) {
}
} }
...@@ -5,6 +5,7 @@ ...@@ -5,6 +5,7 @@
#pragma once #pragma once
//#include "ISR.h" //#include "ISR.h"
#include <iostream>
#include <vector> #include <vector>
#include <fcntl.h> #include <fcntl.h>
#include <stdio.h> #include <stdio.h>
...@@ -13,18 +14,13 @@ ...@@ -13,18 +14,13 @@
#include <sys/mman.h> #include <sys/mman.h>
#include <sys/stat.h> #include <sys/stat.h>
#include <sys/types.h> #include <sys/types.h>
#include "WordSeek.h"
//#include "../util/util.h" //#include "../util/util.h"
size_t FileSize(int f) {
struct stat fileInfo;
fstat( f, &fileInfo);
return fileInfo.st_size;
}
using namespace std; using namespace std;
//Find occurrences of individual words //Find occurrences of individual words
typedef size_t Location; typedef size_t Location;
...@@ -50,6 +46,7 @@ public: ...@@ -50,6 +46,7 @@ public:
char* term; char* term;
char* masterIndex; char* masterIndex;
vector<size_t> listOfChunks; vector<size_t> listOfChunks;
vector<WordSeek> wordSeekLookupTable;
size_t currentChunk; size_t currentChunk;
char* currentMemMap; char* currentMemMap;
......
#pragma once
class WordSeek {
public:
ssize_t seekOffset;
size_t realLocation;
};
\ No newline at end of file
...@@ -4,7 +4,6 @@ ...@@ -4,7 +4,6 @@
#include <iostream> #include <iostream>
#include "../ISRWord.h" #include "../ISRWord.h"
#include "../ISRWord.cpp"
using namespace std; using namespace std;
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment