diff --git a/CMakeLists.txt b/CMakeLists.txt index 4eaabcb3310986e30d0cb0ccacbf1b20a4964a53..407c97e52473f41dea7158117616dc25ad3db263 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -157,7 +157,24 @@ add_executable(URLTEST ) -add_executable(search-engine search.cpp query/Searcher.cpp) +add_executable(search-engine + search.cpp + query/Searcher.cpp + util/DataStructureLib/tuple.cpp + util/stringProcessing.cpp + util/Stemmer.cpp + util/util.cpp + constraintSolver/ISRContainer.cpp + constraintSolver/ISR.h + query/Ranker/Ranker.cpp + query/Ranker/Site.cpp + query/Ranker/Scorer.cpp + query/queryLanguage/QueryParser.cpp + constraintSolver/ISREndDoc.cpp + constraintSolver/ISRWord.cpp + constraintSolver/ISRAnd.cpp + constraintSolver/ISROr.cpp) + add_executable(ISRWord-tests util/util.cpp @@ -223,7 +240,7 @@ add_executable(DataStructures-HashTable-tests DataStructures/HashTable/HashTable.h DataStructures/HashTable/HashTableTests.cpp) -add_executable(DataStructures-DiskHashTable-tests +add_executable(DataStructures-q-DiskHashTable-tests DataStructures/DiskHashTable/DiskHashTable.h DataStructures/DiskHashTable/DiskHashTableTests.cpp) diff --git a/constraintSolver/ISRContainer.cpp b/constraintSolver/ISRContainer.cpp index 38585a59ba4f378db4e4d4c2b25df74a48fbae4a..1658e38222fb2ce5403728b1598a9a6895fcd410 100644 --- a/constraintSolver/ISRContainer.cpp +++ b/constraintSolver/ISRContainer.cpp @@ -28,7 +28,8 @@ ISR * ISRContainer::recurviseCompile( Tuple * root ) if( root->Type == WordTupleType ) { - string currentTerm; + + string currentTerm = root->object.text; terms.push_back( currentTerm ); return new ISRWord( currentTerm ); } @@ -55,27 +56,11 @@ void ISRContainer::Solve( ) Location BeginningfDocument = Contained->GetISRToBeginningOfDocument( ); PassToRanker( BeginningfDocument ); + //PassToRanker( BeginningfDocument ); Contained->NextDocument( ); - - -/* - * beg = GetBeginning of Doc - * Pass Terms to ranker - * - * vector<words> - * - * Ranker: - * for term in terms - * IsrWord word = new ISR(term) - * Term.seek(beg) - * words.push(word) - * rank(words) - * - * NextDocument() - */ } - + return; } @@ -83,7 +68,7 @@ void ISRContainer::PassToRanker( Location docBeginning ) { vector<ISRWord* > toRanker; - for ( auto term : Terms ) + for ( auto term : terms ) { ISRWord * isrWord = new ISRWord ( term ) ; @@ -92,6 +77,7 @@ void ISRContainer::PassToRanker( Location docBeginning ) } - ranker.rank ( toRanker ) + //ranker.rank( toRanker ); + } diff --git a/query/Searcher.cpp b/query/Searcher.cpp index fef73dfae3f66802f3359364748a15e050e49f59..c04f71b1a499a2e3a607901f5eb7a0fedf28a65a 100644 --- a/query/Searcher.cpp +++ b/query/Searcher.cpp @@ -16,21 +16,21 @@ using namespace std; void Searcher::search ( ) { - if (*CompleteQuery == "-quit" ||*CompleteQuery == "-q" ) + if (CompleteQuery == "-quit" ||CompleteQuery == "-q" ) { cout << "Thank you for using C++lue search engine" << endl; exit( 0 ); } - else if ( *CompleteQuery == "-help") + else if ( CompleteQuery == "-help") { cout << "Manual" << endl; return; } else { - queryParser.parse(*CompleteQuery); - container->compile( queryParser ); - container->solve( ); + queryParser.parse(CompleteQuery); + ISRContainer container = ISRContainer( queryParser.queryTree ); + container.Solve( ); @@ -50,7 +50,7 @@ void Searcher::search ( ) * */ -void QueryParser::printResults ( ) +void Searcher::printResults ( ) { cout << " Generated XXX results in about XXX seconds " << endl; diff --git a/query/Searcher.h b/query/Searcher.h index 97b87e77fce6ec372784148bb8bfae2298335fdc..0edff071822ddb1d511cac8aac86ae3561f92268 100644 --- a/query/Searcher.h +++ b/query/Searcher.h @@ -2,8 +2,8 @@ // Created by Jake Close on 3/7/18. // -#include "/query/queryLanguage/QueryParser.h" -#include "/constraintSolver/ISRContainer.h" +#include "../query/queryLanguage/QueryParser.h" +#include "../constraintSolver/ISRContainer.h" #pragma once using namespace std; @@ -12,17 +12,21 @@ class Searcher { public: - Searcher ( string *query_in ) : CompleteQuery( query_in ) - { }; + Searcher ( string query_in ) : CompleteQuery( query_in ) + { + + + + }; void search ( ); void printResults ( ); private: - string *CompleteQuery; + string CompleteQuery; QueryParser queryParser; - ISRContainer* container; + //ISRContainer container; }; diff --git a/query/queryLanguage/QueryParser.cpp b/query/queryLanguage/QueryParser.cpp index 525b63ff22c578149a3ea6e7cda6703267ce77d6..055804f5640c1bcc987b669b4f034bb4849ec03e 100644 --- a/query/queryLanguage/QueryParser.cpp +++ b/query/queryLanguage/QueryParser.cpp @@ -4,6 +4,7 @@ #include "QueryParser.h" #include<unordered_set> + //#include "../../util/stringProcessing.h" #include<iostream> /*** @@ -14,18 +15,28 @@ * * */ + +void removeWhitespace(string &str) + { + str.erase(std::remove(str.begin(), str.end(), ' '), str.end()); + } /*** + * * Returns a token of the next word in the query, past the given index * @param index * @return */ Token QueryParser::FindNextToken( int &index ){ + //TODO remove this when you add new ISR unordered_set<char> stopChars; stopChars.insert(' '); int size = 1; int start = index; + //vector<string> words = splitStr( query , ' ', 0); + //string text = words [ start ] ; + //++index; while(start + size < query.size()) @@ -41,19 +52,21 @@ Token QueryParser::FindNextToken( int &index ){ ++size; index = start + size; string text = query.substr ( start, size ); + removeWhitespace(text); if( MatchOR ( text ) ) return Token( "-OR-" ); return Token( text ); } else if ( stopChars.count( query[ start + size ] ) > 0) { - while( query[start] == ' ') - { - ++start; - } + + //while( query[start] == ' ') + // { + // ++start; + // } index = start + size; string text = query.substr ( start, size ); - cout << "horse" << text; + removeWhitespace(text); return Token( text ); } @@ -63,8 +76,11 @@ Token QueryParser::FindNextToken( int &index ){ } } index = start + size; + string text = query.substr ( start, size ); - cout << "horsey: " << text; + removeWhitespace(text); + + return Token( text ); } diff --git a/query/queryLanguage/tests/queryIsrTest.cpp b/query/queryLanguage/tests/queryIsrTest.cpp index ed1a256ec4b9940cfd54b1bb87535a2131470fd9..774e3750370e0da3014d3eb284c171e03afb0ed4 100644 --- a/query/queryLanguage/tests/queryIsrTest.cpp +++ b/query/queryLanguage/tests/queryIsrTest.cpp @@ -13,9 +13,17 @@ int main() string query = "moment life"; string OR = "bike cycle "; QueryParser parser; - parser.parse( query ); + parser.parse( "moment" ); - Token orParentToken = Token("-OR-"); + Token life = Token("life"); + Tuple* lifeTuple = new Tuple( life ); + + parser.queryTree->Next.push_back( lifeTuple ); + + + + + Token orParentToken = Token("-And-"); Tuple * orparent = new Tuple( orParentToken ); Token bike = Token("bike"); diff --git a/search-engine b/search-engine index b6426abcdbdf2d553e718305de768e5a58926ea7..8a8ce5782f9977228876ab93ffcd26e058a5bf8b 100755 Binary files a/search-engine and b/search-engine differ diff --git a/search.cpp b/search.cpp index 59694619d74d6d683330e320af7d2cc32a4d8b8b..0cfdd5228565f77a8e9e65d4d13befb0c4301e66 100644 --- a/search.cpp +++ b/search.cpp @@ -29,11 +29,14 @@ int main( int argc, char *argv[] ) while ( getline( cin, q ) && !q.empty( )) { - QueryParser *query = new QueryParser( q.c_str( )); - query->search( ); - query->printResults( ); + Searcher searchEngine( q ); + cout << "Results" << endl; + searchEngine.search( ); + //query->printResults( ); + cout << "Please enter another search " << endl; } + } diff --git a/testISRQueryTuple b/testISRQueryTuple index 7903f40157e615a83cb2b052a8ff2a44057dd35d..5b22bb188c1fa0ec610ad5f53be35f3f2c99765b 100755 Binary files a/testISRQueryTuple and b/testISRQueryTuple differ