Skip to content
Snippets Groups Projects
Commit 5495317c authored by aanvi's avatar aanvi
Browse files

Merge branch 'milestone1' of https://gitlab.eecs.umich.edu/vcday/eecs398-search into milestone1

parents a73589cb 76386b83
No related branches found
No related tags found
1 merge request!2WIP:Crawler parser 2 merge into duplicate url-crawler
......@@ -54,7 +54,7 @@ add_executable(URLTEST shared/url.h shared/urlTest.cpp)
add_executable(search-engine search.cpp query/Query.cpp)
add_executable(ISRWord-tests util/util.cpp constraintSolver/ISRWord.cpp constraintSolver/tests/ISRWordTests.cpp )
add_executable(ISRWord-tests constraintSolver/ISRWord.cpp constraintSolver/tests/ISRWordTests.cpp )
find_package(OpenSSL REQUIRED)
......
......@@ -2,9 +2,14 @@
// Created by Jake Close on 3/13/18.
//
#include <string>
#include "ISRWord.h"
using namespace std;
size_t FileSize(int f) {
struct stat fileInfo;
fstat( f, &fileInfo);
return fileInfo.st_size;
}
ISRWord::ISRWord(char* word) : term(word) {
getChunks();
......@@ -24,15 +29,28 @@ vector<size_t> ISRWord::getSeekContents(string fileName) {
string word = "";
bool midWord = false;
bool midFind = false;
WordSeek wordDictionaryEntry;
if(memMap != MAP_FAILED) {
for(char* map = memMap; map < memMap + fileSize; map++) {
if(midFind && isalpha(*map)) {
break;
}
switch(*map) {
case '\t':
if(midFind) {
case '<':
wordDictionaryEntry = WordSeek();
break;
case '>':
wordDictionaryEntry.seekOffset = stoll(word);
wordSeekLookupTable.push_back(wordDictionaryEntry);
break;
case ',':
wordDictionaryEntry.realLocation = stoll(word);
break;
}
case '\n':
case '\r':
case '\t':
case ' ':
if (midFind && word != "") {
contents.push_back(stoll(word));
......@@ -151,6 +169,25 @@ Location ISRWord::next() {
//go to next chunk
Location ISRWord::seek( Location target ) {
if(!wordSeekLookupTable.empty()) {
auto best = wordSeekLookupTable.front();
for(auto entry : wordSeekLookupTable) {
if(entry.realLocation < target) {
best = entry;
} else {
string currentChunkFileLocation = "index-test-files/twitter/index" + to_string(listOfChunks[currentChunk]) + ".txt";
int currentChunkFile = open(currentChunkFileLocation.c_str(), O_RDONLY);
ssize_t currentChunkFileSize = FileSize(currentChunkFile);
currentMemMap = (char*) mmap(nullptr, currentChunkFileSize, PROT_READ, MAP_PRIVATE, currentChunkFile, 0);
currentMemMap += best.seekOffset;
currentLocation = best.realLocation;
return best.realLocation;
}
}
} else {
while(next() <= target) {
}
return currentLocation;
}
}
......@@ -5,6 +5,7 @@
#pragma once
//#include "ISR.h"
#include <iostream>
#include <vector>
#include <fcntl.h>
#include <stdio.h>
......@@ -13,18 +14,13 @@
#include <sys/mman.h>
#include <sys/stat.h>
#include <sys/types.h>
#include "WordSeek.h"
//#include "../util/util.h"
size_t FileSize(int f) {
struct stat fileInfo;
fstat( f, &fileInfo);
return fileInfo.st_size;
}
using namespace std;
//Find occurrences of individual words
typedef size_t Location;
......@@ -50,6 +46,7 @@ public:
char* term;
char* masterIndex;
vector<size_t> listOfChunks;
vector<WordSeek> wordSeekLookupTable;
size_t currentChunk;
char* currentMemMap;
......
#pragma once
class WordSeek {
public:
ssize_t seekOffset;
size_t realLocation;
};
\ No newline at end of file
......@@ -4,7 +4,6 @@
#include <iostream>
#include "../ISRWord.h"
#include "../ISRWord.cpp"
using namespace std;
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment