Skip to content
Snippets Groups Projects
Commit 834ba0da authored by Nicholas Yang's avatar Nicholas Yang
Browse files

Merge remote-tracking branch 'origin/ConstraintSolver' into ConstraintSolver

parents 9f77422e 1b1de95c
No related branches found
No related tags found
No related merge requests found
......@@ -150,6 +150,16 @@ add_executable(ISROR-tests
util/stringProcessing.cpp
util/Stemmer.cpp )
add_executable(ISRAnd-tests
util/util.cpp
constraintSolver/ISR.cpp
constraintSolver/ISRWord.cpp
constraintSolver/ISRAnd.cpp
constraintSolver/tests/ISROrTests.cpp
constraintSolver/ISREndDoc.cpp
util/stringProcessing.cpp
util/Stemmer.cpp )
find_package(OpenSSL REQUIRED)
......
......@@ -12,8 +12,10 @@
#include <sys/mman.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <limits>
typedef size_t Location; // Location 0 is the null location.
const Location MAX_Location = std::numeric_limits<unsigned>::max();
class ISR
{
......
......@@ -3,3 +3,115 @@
//
#include "ISRAnd.h"
#include <cassert>
ISRAnd::ISRAnd ( vector<ISR * > InputTerms ) : Terms( InputTerms )
{
assert(InputTerms.size() > 1);
for (auto currentTerm : InputTerms)
{
currentTerm->First();
Location currentLocation = currentTerm->currentLocation;
if (currentLocation < nearestStartLocation) {
nearestTerm = currentTerm;
nearestStartLocation = currentLocation;
}
if (currentLocation > nearestEndLocation) {
nearestEndLocation = currentLocation;
}
++NumberOfTerms;
currentTerm++;
}
}
Location ISRAnd::First()
{
//Fixme?
Location first = MAX_Location;
for(auto isr : Terms)
{
Location temp = isr->First();
if (temp < first)
{
first = temp;
}
}
//fixme should this return the nearest location of one subterm or the nearest location all the terms match?
currentLocation = Seek(first);
return currentLocation;
}
Location ISRAnd::Next ( )
{
return Seek( nearestStartLocation);
}
Location ISRAnd::NextDocument()
{
return Seek(GetEndDocument());
}
Location ISRAnd::Seek ( Location target )
{
//Todo
// 1. Seek all the ISRs to the first occurrence beginning at
// the target location.
// 2. Move the document end ISR to just past the furthest
// word, then calculate the document begin location.
// 3. Seek all the other terms to past the document begin.
// 4. If any term is past the document end, return to
// step 2.
// 5. If any ISR reaches the end, there is no match.
Location nearest;
Location furthest = 0;
while(furthest != MAX_Location) {
nearest = MAX_Location;
//find nearest & furthest ISR
for (auto isr : Terms) {
Location temp = isr->Seek(target);
if (temp == MAX_Location) return MAX_Location;
if (temp > furthest) furthest = temp;
if (temp < nearest) nearest = temp;
}
//Get Document of the furthest ISR
DocumentEnd->Seek(target);
//set next target to be starting location of document
//Location lastDocStart = DocumentEnd->GetStart()?;
Location lastDocStart = 0;
if(nearest >= lastDocStart)
{
//MATCH
//Does it matter for 'AND' what location we return? first/last in document?
//Should we return an ISR as well?
return nearest;
} else{
//No Match yet, keep searching
target = lastDocStart;
}
}
}
Location ISRAnd::GetEndDocument()
{
//What does currentLocation hold? When is it updated?
return DocumentEnd->Seek(currentLocation);
}
......@@ -5,37 +5,31 @@
#pragma once
#include "ISR.h"
#include <vector>
using namespace std;
//Find occurrences of all child ISRs within a single document
class ISRAnd : ISR
{
public:
ISR **Terms;
vector<ISR*>Terms;
unsigned NumberOfTerms;
Post *Seek ( Location target )
{
// 1. Seek all the ISRs to the first occurrence beginning at
// the target location.
// 2. Move the document end ISR to just past the furthest
// word, then calculate the document begin location.
// 3. Seek all the other terms to past the document begin.
// 4. If any term is past the document end, return to
// step 2.
// 5. If any ISR reaches the end, there is no match.
}
ISRAnd ( vector<ISR * > InputTerms );
Location First ( ) override;
Location Next ( ) override;
Location NextDocument ( ) override;
Location Seek ( Location target ) override;
Location GetEndDocument ( ) override;
Location GetCurrentLocation();
Post *Next ( )
{
return Seek( nearestStartLocation + 1 );
}
private:
unsigned nearestTerm, farthestTerm;
ISR *nearestTerm;
// nearStartLocation and nearestEndLocation are// the start and end of the nearestTerm.
Location nearestStartLocation, nearestEndLocation;
};
......
......@@ -5,6 +5,28 @@
#include "ISROr.h"
ISROr::ISROr ( vector<ISR * > InputTerms ) : Terms( InputTerms )
{
for (auto currentTerm : InputTerms)
{
currentTerm->First();
Location currentLocation = currentTerm->currentLocation;
if (currentLocation < nearestStartLocation) {
nearestTerm = currentTerm;
nearestStartLocation = currentLocation;
}
if (currentLocation > nearestEndLocation) {
nearestEndLocation = currentLocation;
}
++NumberOfTerms;
currentTerm++;
}
}
Location ISROr::GetStartLocation ( )
{
return nearestStartLocation;
......
......@@ -15,6 +15,8 @@ public:
vector<ISR*>Terms;
unsigned NumberOfTerms;
ISROr ( vector<ISR * > InputTerms );
Location First ( ) override;
Location Next ( ) override;
Location NextDocument ( ) override;
......@@ -27,30 +29,6 @@ public:
Location GetEndLocation ( );
ISROr ( vector<ISR * > InputTerms ) : Terms( InputTerms )
{
for(auto currentTerm : InputTerms)
{
currentTerm->First( );
Location currentLocation = currentTerm->currentLocation;
if ( currentLocation < nearestStartLocation )
{
nearestTerm = currentTerm;
nearestStartLocation = currentLocation;
}
if ( currentLocation > nearestEndLocation )
{
nearestEndLocation = currentLocation;
}
++NumberOfTerms;
currentTerm++;
}
}
private:
ISR *nearestTerm;
// nearStartLocation and nearestEndLocation are// the start and end of the nearestTerm.
......
......@@ -24,7 +24,7 @@ int main ( )
vector<size_t> locations;
vector<DocumentEnding> docEnds;
set<string> urls;
while(queryOr->GetCurrentLocation() != 9999999999999) {
while(queryOr->GetCurrentLocation() != MAX_Location) {
locations.push_back(queryOr->Next());
}
while(endDocs.next().url != "aaa")
......
......@@ -14,13 +14,13 @@ using namespace std;
int main ( )
{
char* query = "iphone";
ISRWord queryWord(query);
char* query;
ISRWord queryWord("aare");
ISREndDoc endDocs;
vector<size_t> locations;
vector<DocumentEnding> docEnds;
set<string> urls;
locations.push_back(queryWord.getCurrentLocation());
while(queryWord.getCurrentLocation() != 9999999999999) {
while(queryWord.getCurrentLocation() != MAX_Location) {
locations.push_back(queryWord.Next());
}
for(auto loc : locations) {
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment