Skip to content
Snippets Groups Projects
Commit 3c0eec03 authored by Nicholas Yang's avatar Nicholas Yang
Browse files

seek lookup table in

parent 22d29b60
No related branches found
No related tags found
1 merge request!2WIP:Crawler parser 2 merge into duplicate url-crawler
......@@ -54,7 +54,7 @@ add_executable(URLTEST shared/url.h shared/urlTest.cpp)
add_executable(search-engine search.cpp query/Query.cpp)
add_executable(ISRWord-tests util/util.cpp constraintSolver/ISRWord.cpp constraintSolver/tests/ISRWordTests.cpp )
add_executable(ISRWord-tests constraintSolver/ISRWord.cpp constraintSolver/tests/ISRWordTests.cpp )
find_package(OpenSSL REQUIRED)
......
......@@ -2,9 +2,14 @@
// Created by Jake Close on 3/13/18.
//
#include <string>
#include "ISRWord.h"
using namespace std;
size_t FileSize(int f) {
struct stat fileInfo;
fstat( f, &fileInfo);
return fileInfo.st_size;
}
ISRWord::ISRWord(char* word) : term(word) {
getChunks();
......@@ -24,15 +29,28 @@ vector<size_t> ISRWord::getSeekContents(string fileName) {
string word = "";
bool midWord = false;
bool midFind = false;
WordSeek wordDictionaryEntry;
if(memMap != MAP_FAILED) {
for(char* map = memMap; map < memMap + fileSize; map++) {
if(midFind && isalpha(*map)) {
break;
}
switch(*map) {
case '\t':
if(midFind) {
case '<':
wordDictionaryEntry = WordSeek();
break;
case '>':
wordDictionaryEntry.seekOffset = stoll(word);
wordSeekLookupTable.push_back(wordDictionaryEntry);
break;
case ',':
wordDictionaryEntry.realLocation = stoll(word);
break;
}
case '\n':
case '\r':
case '\t':
case ' ':
if (midFind && word != "") {
contents.push_back(stoll(word));
......@@ -151,6 +169,8 @@ Location ISRWord::next() {
//go to next chunk
Location ISRWord::seek( Location target ) {
if(!wordSeekLookupTable.empty()) {
}
}
......@@ -5,6 +5,7 @@
#pragma once
//#include "ISR.h"
#include <iostream>
#include <vector>
#include <fcntl.h>
#include <stdio.h>
......@@ -13,18 +14,13 @@
#include <sys/mman.h>
#include <sys/stat.h>
#include <sys/types.h>
#include "WordSeek.h"
//#include "../util/util.h"
size_t FileSize(int f) {
struct stat fileInfo;
fstat( f, &fileInfo);
return fileInfo.st_size;
}
using namespace std;
//Find occurrences of individual words
typedef size_t Location;
......@@ -50,6 +46,7 @@ public:
char* term;
char* masterIndex;
vector<size_t> listOfChunks;
vector<WordSeek> wordSeekLookupTable;
size_t currentChunk;
char* currentMemMap;
......
#pragma once
class WordSeek {
public:
ssize_t seekOffset;
size_t realLocation;
};
\ No newline at end of file
......@@ -4,7 +4,6 @@
#include <iostream>
#include "../ISRWord.h"
#include "../ISRWord.cpp"
using namespace std;
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment