Skip to content
Snippets Groups Projects
Commit 1b1de95c authored by benbergk's avatar benbergk
Browse files

logic for ISRAnd

parent 2bcbf5e6
No related branches found
No related tags found
No related merge requests found
......@@ -150,6 +150,16 @@ add_executable(ISROR-tests
util/stringProcessing.cpp
util/Stemmer.cpp )
add_executable(ISRAnd-tests
util/util.cpp
constraintSolver/ISR.cpp
constraintSolver/ISRWord.cpp
constraintSolver/ISRAnd.cpp
constraintSolver/tests/ISROrTests.cpp
constraintSolver/ISREndDoc.cpp
util/stringProcessing.cpp
util/Stemmer.cpp )
find_package(OpenSSL REQUIRED)
......
......@@ -12,8 +12,10 @@
#include <sys/mman.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <limits>
typedef size_t Location; // Location 0 is the null location.
const Location MAX_Location = std::numeric_limits<unsigned>::max();
class ISR
{
......
......@@ -3,3 +3,115 @@
//
#include "ISRAnd.h"
#include <cassert>
ISRAnd::ISRAnd ( vector<ISR * > InputTerms ) : Terms( InputTerms )
{
assert(InputTerms.size() > 1);
for (auto currentTerm : InputTerms)
{
currentTerm->First();
Location currentLocation = currentTerm->currentLocation;
if (currentLocation < nearestStartLocation) {
nearestTerm = currentTerm;
nearestStartLocation = currentLocation;
}
if (currentLocation > nearestEndLocation) {
nearestEndLocation = currentLocation;
}
++NumberOfTerms;
currentTerm++;
}
}
Location ISRAnd::First()
{
//Fixme?
Location first = MAX_Location;
for(auto isr : Terms)
{
Location temp = isr->First();
if (temp < first)
{
first = temp;
}
}
//fixme should this return the nearest location of one subterm or the nearest location all the terms match?
currentLocation = Seek(first);
return currentLocation;
}
Location ISRAnd::Next ( )
{
return Seek( nearestStartLocation);
}
Location ISRAnd::NextDocument()
{
return Seek(GetEndDocument());
}
Location ISRAnd::Seek ( Location target )
{
//Todo
// 1. Seek all the ISRs to the first occurrence beginning at
// the target location.
// 2. Move the document end ISR to just past the furthest
// word, then calculate the document begin location.
// 3. Seek all the other terms to past the document begin.
// 4. If any term is past the document end, return to
// step 2.
// 5. If any ISR reaches the end, there is no match.
Location nearest;
Location furthest = 0;
while(furthest != MAX_Location) {
nearest = MAX_Location;
//find nearest & furthest ISR
for (auto isr : Terms) {
Location temp = isr->Seek(target);
if (temp == MAX_Location) return MAX_Location;
if (temp > furthest) furthest = temp;
if (temp < nearest) nearest = temp;
}
//Get Document of the furthest ISR
DocumentEnd->Seek(target);
//set next target to be starting location of document
//Location lastDocStart = DocumentEnd->GetStart()?;
Location lastDocStart = 0;
if(nearest >= lastDocStart)
{
//MATCH
//Does it matter for 'AND' what location we return? first/last in document?
//Should we return an ISR as well?
return nearest;
} else{
//No Match yet, keep searching
target = lastDocStart;
}
}
}
Location ISRAnd::GetEndDocument()
{
//What does currentLocation hold? When is it updated?
return DocumentEnd->Seek(currentLocation);
}
......@@ -5,37 +5,31 @@
#pragma once
#include "ISR.h"
#include <vector>
using namespace std;
//Find occurrences of all child ISRs within a single document
class ISRAnd : ISR
{
public:
ISR **Terms;
vector<ISR*>Terms;
unsigned NumberOfTerms;
Post *Seek ( Location target )
{
// 1. Seek all the ISRs to the first occurrence beginning at
// the target location.
// 2. Move the document end ISR to just past the furthest
// word, then calculate the document begin location.
// 3. Seek all the other terms to past the document begin.
// 4. If any term is past the document end, return to
// step 2.
// 5. If any ISR reaches the end, there is no match.
}
ISRAnd ( vector<ISR * > InputTerms );
Location First ( ) override;
Location Next ( ) override;
Location NextDocument ( ) override;
Location Seek ( Location target ) override;
Location GetEndDocument ( ) override;
Location GetCurrentLocation();
Post *Next ( )
{
return Seek( nearestStartLocation + 1 );
}
private:
unsigned nearestTerm, farthestTerm;
ISR *nearestTerm;
// nearStartLocation and nearestEndLocation are// the start and end of the nearestTerm.
Location nearestStartLocation, nearestEndLocation;
};
......
......@@ -5,6 +5,28 @@
#include "ISROr.h"
ISROr::ISROr ( vector<ISR * > InputTerms ) : Terms( InputTerms )
{
for (auto currentTerm : InputTerms)
{
currentTerm->First();
Location currentLocation = currentTerm->currentLocation;
if (currentLocation < nearestStartLocation) {
nearestTerm = currentTerm;
nearestStartLocation = currentLocation;
}
if (currentLocation > nearestEndLocation) {
nearestEndLocation = currentLocation;
}
++NumberOfTerms;
currentTerm++;
}
}
Location ISROr::GetStartLocation ( )
{
return nearestStartLocation;
......
......@@ -15,6 +15,8 @@ public:
vector<ISR*>Terms;
unsigned NumberOfTerms;
ISROr ( vector<ISR * > InputTerms );
Location First ( ) override;
Location Next ( ) override;
Location NextDocument ( ) override;
......@@ -27,30 +29,6 @@ public:
Location GetEndLocation ( );
ISROr ( vector<ISR * > InputTerms ) : Terms( InputTerms )
{
for(auto currentTerm : InputTerms)
{
currentTerm->First( );
Location currentLocation = currentTerm->currentLocation;
if ( currentLocation < nearestStartLocation )
{
nearestTerm = currentTerm;
nearestStartLocation = currentLocation;
}
if ( currentLocation > nearestEndLocation )
{
nearestEndLocation = currentLocation;
}
++NumberOfTerms;
currentTerm++;
}
}
private:
ISR *nearestTerm;
// nearStartLocation and nearestEndLocation are// the start and end of the nearestTerm.
......
......@@ -24,7 +24,7 @@ int main ( )
vector<size_t> locations;
vector<DocumentEnding> docEnds;
set<string> urls;
while(queryOr->GetCurrentLocation() != 9999999999999) {
while(queryOr->GetCurrentLocation() != MAX_Location) {
locations.push_back(queryOr->Next());
}
while(endDocs.next().url != "aaa")
......
......@@ -14,12 +14,12 @@ using namespace std;
int main ( )
{
char* query;
ISRWord queryWord("iphone");
ISRWord queryWord("aare");
ISREndDoc endDocs;
vector<size_t> locations;
vector<DocumentEnding> docEnds;
set<string> urls;
while(queryWord.getCurrentLocation() != 9999999999999) {
while(queryWord.getCurrentLocation() != MAX_Location) {
locations.push_back(queryWord.Next());
}
while(endDocs.next().url != "aaa")
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment