diff --git a/CMakeLists.txt b/CMakeLists.txt index da93d71d69a335ab1aff9fb620c7ec7506691bd1..f7ade6a771f95a12b32de43b34ed6a28c6421f66 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -150,6 +150,16 @@ add_executable(ISROR-tests util/stringProcessing.cpp util/Stemmer.cpp ) +add_executable(ISRAnd-tests + util/util.cpp + constraintSolver/ISR.cpp + constraintSolver/ISRWord.cpp + constraintSolver/ISRAnd.cpp + constraintSolver/tests/ISROrTests.cpp + constraintSolver/ISREndDoc.cpp + util/stringProcessing.cpp + util/Stemmer.cpp ) + find_package(OpenSSL REQUIRED) diff --git a/constraintSolver/ISR.h b/constraintSolver/ISR.h index b8096879a1506e6e17e43237571b0050f000b906..0f9307606f810ad7b926c1fef39a773f7705dec6 100644 --- a/constraintSolver/ISR.h +++ b/constraintSolver/ISR.h @@ -12,8 +12,10 @@ #include <sys/mman.h> #include <sys/stat.h> #include <sys/types.h> +#include <limits> typedef size_t Location; // Location 0 is the null location. +const Location MAX_Location = std::numeric_limits<unsigned>::max(); class ISR { diff --git a/constraintSolver/ISRAnd.cpp b/constraintSolver/ISRAnd.cpp index 82ff1555c9a6baf146e496f09afe815af242bd74..ff50caf07e1d05abd3afe8fbee85517b28c2ca11 100644 --- a/constraintSolver/ISRAnd.cpp +++ b/constraintSolver/ISRAnd.cpp @@ -3,3 +3,115 @@ // #include "ISRAnd.h" +#include <cassert> + +ISRAnd::ISRAnd ( vector<ISR * > InputTerms ) : Terms( InputTerms ) + { + + assert(InputTerms.size() > 1); + + for (auto currentTerm : InputTerms) + { + currentTerm->First(); + Location currentLocation = currentTerm->currentLocation; + if (currentLocation < nearestStartLocation) { + nearestTerm = currentTerm; + nearestStartLocation = currentLocation; + + } + if (currentLocation > nearestEndLocation) { + nearestEndLocation = currentLocation; + } + ++NumberOfTerms; + currentTerm++; + + } + } + +Location ISRAnd::First() + { + //Fixme? + Location first = MAX_Location; + + for(auto isr : Terms) + { + Location temp = isr->First(); + if (temp < first) + { + first = temp; + } + } + + //fixme should this return the nearest location of one subterm or the nearest location all the terms match? + currentLocation = Seek(first); + return currentLocation; + } + + +Location ISRAnd::Next ( ) + { + return Seek( nearestStartLocation); + } + +Location ISRAnd::NextDocument() + { + return Seek(GetEndDocument()); + } + +Location ISRAnd::Seek ( Location target ) + { + //Todo + // 1. Seek all the ISRs to the first occurrence beginning at + // the target location. + // 2. Move the document end ISR to just past the furthest + // word, then calculate the document begin location. + // 3. Seek all the other terms to past the document begin. + // 4. If any term is past the document end, return to + // step 2. + // 5. If any ISR reaches the end, there is no match. + + Location nearest; + Location furthest = 0; + + while(furthest != MAX_Location) { + + nearest = MAX_Location; + + //find nearest & furthest ISR + for (auto isr : Terms) { + Location temp = isr->Seek(target); + if (temp == MAX_Location) return MAX_Location; + if (temp > furthest) furthest = temp; + if (temp < nearest) nearest = temp; + } + + //Get Document of the furthest ISR + DocumentEnd->Seek(target); + + //set next target to be starting location of document + //Location lastDocStart = DocumentEnd->GetStart()?; + Location lastDocStart = 0; + + if(nearest >= lastDocStart) + { + //MATCH + //Does it matter for 'AND' what location we return? first/last in document? + //Should we return an ISR as well? + return nearest; + } else{ + //No Match yet, keep searching + target = lastDocStart; + } + + } + + } + +Location ISRAnd::GetEndDocument() + { + //What does currentLocation hold? When is it updated? + return DocumentEnd->Seek(currentLocation); + } + + + diff --git a/constraintSolver/ISRAnd.h b/constraintSolver/ISRAnd.h index f899588143fd8f0317d2a73575ccb23b43ed7f8c..60921e31e1fa4f239da0df2f9df92a1f630b01ab 100644 --- a/constraintSolver/ISRAnd.h +++ b/constraintSolver/ISRAnd.h @@ -5,37 +5,31 @@ #pragma once #include "ISR.h" +#include <vector> - +using namespace std; //Find occurrences of all child ISRs within a single document - class ISRAnd : ISR { public: - ISR **Terms; + vector<ISR*>Terms; unsigned NumberOfTerms; - Post *Seek ( Location target ) - { - // 1. Seek all the ISRs to the first occurrence beginning at - // the target location. - // 2. Move the document end ISR to just past the furthest - // word, then calculate the document begin location. - // 3. Seek all the other terms to past the document begin. - // 4. If any term is past the document end, return to - // step 2. - // 5. If any ISR reaches the end, there is no match. - } + ISRAnd ( vector<ISR * > InputTerms ); + + Location First ( ) override; + Location Next ( ) override; + Location NextDocument ( ) override; + Location Seek ( Location target ) override; + Location GetEndDocument ( ) override; + Location GetCurrentLocation(); - Post *Next ( ) - { - return Seek( nearestStartLocation + 1 ); - } private: - unsigned nearestTerm, farthestTerm; + ISR *nearestTerm; + // nearStartLocation and nearestEndLocation are// the start and end of the nearestTerm. Location nearestStartLocation, nearestEndLocation; }; diff --git a/constraintSolver/ISROr.cpp b/constraintSolver/ISROr.cpp index e765d1e081b581c694106561affda796aba553be..d3282a1727df2f678268f217f5dfa7bd74ee41c3 100644 --- a/constraintSolver/ISROr.cpp +++ b/constraintSolver/ISROr.cpp @@ -5,6 +5,28 @@ #include "ISROr.h" +ISROr::ISROr ( vector<ISR * > InputTerms ) : Terms( InputTerms ) + { + + for (auto currentTerm : InputTerms) + { + currentTerm->First(); + Location currentLocation = currentTerm->currentLocation; + if (currentLocation < nearestStartLocation) { + nearestTerm = currentTerm; + nearestStartLocation = currentLocation; + + } + if (currentLocation > nearestEndLocation) { + nearestEndLocation = currentLocation; + } + ++NumberOfTerms; + currentTerm++; + + } + } + + Location ISROr::GetStartLocation ( ) { return nearestStartLocation; diff --git a/constraintSolver/ISROr.h b/constraintSolver/ISROr.h index 29d3b0936de276ca52e1e7257cf7146a99cf961f..657c3fee37363b7665b2f43101688b26227bd38c 100644 --- a/constraintSolver/ISROr.h +++ b/constraintSolver/ISROr.h @@ -15,6 +15,8 @@ public: vector<ISR*>Terms; unsigned NumberOfTerms; + ISROr ( vector<ISR * > InputTerms ); + Location First ( ) override; Location Next ( ) override; Location NextDocument ( ) override; @@ -27,30 +29,6 @@ public: Location GetEndLocation ( ); - ISROr ( vector<ISR * > InputTerms ) : Terms( InputTerms ) - { - - for(auto currentTerm : InputTerms) - { - currentTerm->First( ); - Location currentLocation = currentTerm->currentLocation; - if ( currentLocation < nearestStartLocation ) - { - nearestTerm = currentTerm; - nearestStartLocation = currentLocation; - - } - if ( currentLocation > nearestEndLocation ) - { - nearestEndLocation = currentLocation; - } - ++NumberOfTerms; - currentTerm++; - - } - - } - private: ISR *nearestTerm; // nearStartLocation and nearestEndLocation are// the start and end of the nearestTerm. diff --git a/constraintSolver/tests/ISROrTests.cpp b/constraintSolver/tests/ISROrTests.cpp index 2db362b0a3e4f036aeaf68dc8b941e8e6d4ace7b..3f982cb10d120ae34b2372232797e8439e5349db 100644 --- a/constraintSolver/tests/ISROrTests.cpp +++ b/constraintSolver/tests/ISROrTests.cpp @@ -24,7 +24,7 @@ int main ( ) vector<size_t> locations; vector<DocumentEnding> docEnds; set<string> urls; - while(queryOr->GetCurrentLocation() != 9999999999999) { + while(queryOr->GetCurrentLocation() != MAX_Location) { locations.push_back(queryOr->Next()); } while(endDocs.next().url != "aaa") diff --git a/constraintSolver/tests/ISRWordTests.cpp b/constraintSolver/tests/ISRWordTests.cpp index 37d4f8356b75dc3dfd4916485f4ebd20a33bfa96..58b4cf50bacb04b1fc6ed363b1eeee725de1a111 100644 --- a/constraintSolver/tests/ISRWordTests.cpp +++ b/constraintSolver/tests/ISRWordTests.cpp @@ -14,13 +14,13 @@ using namespace std; int main ( ) { char* query = "iphone"; - ISRWord queryWord(query); + char* query; + ISRWord queryWord("aare"); ISREndDoc endDocs; vector<size_t> locations; vector<DocumentEnding> docEnds; set<string> urls; - locations.push_back(queryWord.getCurrentLocation()); - while(queryWord.getCurrentLocation() != 9999999999999) { + while(queryWord.getCurrentLocation() != MAX_Location) { locations.push_back(queryWord.Next()); } for(auto loc : locations) {