Skip to content
Snippets Groups Projects
Commit 971ccea7 authored by jsclose's avatar jsclose
Browse files

fully working intergrated search engine

parents d085a67c 12a2b694
No related branches found
No related tags found
1 merge request!9Website
...@@ -157,7 +157,24 @@ add_executable(URLTEST ...@@ -157,7 +157,24 @@ add_executable(URLTEST
) )
add_executable(search-engine search.cpp query/Searcher.cpp) add_executable(search-engine
search.cpp
query/Searcher.cpp
util/DataStructureLib/tuple.cpp
util/stringProcessing.cpp
util/Stemmer.cpp
util/util.cpp
constraintSolver/ISRContainer.cpp
constraintSolver/ISR.h
query/Ranker/Ranker.cpp
query/Ranker/Site.cpp
query/Ranker/Scorer.cpp
query/queryLanguage/QueryParser.cpp
constraintSolver/ISREndDoc.cpp
constraintSolver/ISRWord.cpp
constraintSolver/ISRAnd.cpp
constraintSolver/ISROr.cpp)
add_executable(ISRWord-tests add_executable(ISRWord-tests
util/util.cpp util/util.cpp
...@@ -223,7 +240,7 @@ add_executable(DataStructures-HashTable-tests ...@@ -223,7 +240,7 @@ add_executable(DataStructures-HashTable-tests
DataStructures/HashTable/HashTable.h DataStructures/HashTable/HashTable.h
DataStructures/HashTable/HashTableTests.cpp) DataStructures/HashTable/HashTableTests.cpp)
add_executable(DataStructures-DiskHashTable-tests add_executable(DataStructures-q-DiskHashTable-tests
DataStructures/DiskHashTable/DiskHashTable.h DataStructures/DiskHashTable/DiskHashTable.h
DataStructures/DiskHashTable/DiskHashTableTests.cpp) DataStructures/DiskHashTable/DiskHashTableTests.cpp)
...@@ -299,8 +316,8 @@ add_executable(testISRQueryTuple ...@@ -299,8 +316,8 @@ add_executable(testISRQueryTuple
add_executable(query-queryLanguage-tests add_executable(query-queryLanguage-tests
query/queryLanguage/QueryParser.cpp query/queryLanguage/QueryParser.cpp
query/queryLanguage/QueryParser.h
util/DataStructureLib/tuple.cpp
) )
......
...@@ -28,7 +28,7 @@ ISR * ISRContainer::recurviseCompile( Tuple * root ) ...@@ -28,7 +28,7 @@ ISR * ISRContainer::recurviseCompile( Tuple * root )
if( root->Type == WordTupleType ) if( root->Type == WordTupleType )
{ {
string currentTerm; string currentTerm = root->object.text;
terms.push_back( currentTerm ); terms.push_back( currentTerm );
return new ISRWord( currentTerm ); return new ISRWord( currentTerm );
} }
...@@ -53,29 +53,11 @@ void ISRContainer::Solve( ) ...@@ -53,29 +53,11 @@ void ISRContainer::Solve( )
auto url = Contained->GetEndDocument()->getCurrentDoc().url; auto url = Contained->GetEndDocument()->getCurrentDoc().url;
cout << url << endl; cout << url << endl;
Location BeginningfDocument = Contained->GetISRToBeginningOfDocument( ); Location BeginningfDocument = Contained->GetISRToBeginningOfDocument( );
PassToRanker( BeginningfDocument ); //PassToRanker( BeginningfDocument );
Contained->NextDocument( ); Contained->NextDocument( );
/*
* beg = GetBeginning of Doc
* Pass Terms to ranker
*
* vector<words>
*
* Ranker:
* for term in terms
* IsrWord word = new ISR(term)
* Term.seek(beg)
* words.push(word)
* rank(words)
*
* NextDocument()
*/
} }
return;
} }
...@@ -83,7 +65,7 @@ void ISRContainer::PassToRanker( Location docBeginning ) ...@@ -83,7 +65,7 @@ void ISRContainer::PassToRanker( Location docBeginning )
{ {
vector<ISRWord* > toRanker; vector<ISRWord* > toRanker;
for ( auto term : Terms ) for ( auto term : terms )
{ {
ISRWord * isrWord = new ISRWord ( term ) ; ISRWord * isrWord = new ISRWord ( term ) ;
...@@ -92,6 +74,7 @@ void ISRContainer::PassToRanker( Location docBeginning ) ...@@ -92,6 +74,7 @@ void ISRContainer::PassToRanker( Location docBeginning )
} }
ranker.rank ( toRanker ) //ranker.rank( toRanker );
} }
...@@ -16,21 +16,21 @@ using namespace std; ...@@ -16,21 +16,21 @@ using namespace std;
void Searcher::search ( ) void Searcher::search ( )
{ {
if (*CompleteQuery == "-quit" ||*CompleteQuery == "-q" ) if (CompleteQuery == "-quit" ||CompleteQuery == "-q" )
{ {
cout << "Thank you for using C++lue search engine" << endl; cout << "Thank you for using C++lue search engine" << endl;
exit( 0 ); exit( 0 );
} }
else if ( *CompleteQuery == "-help") else if ( CompleteQuery == "-help")
{ {
cout << "Manual" << endl; cout << "Manual" << endl;
return; return;
} }
else else
{ {
queryParser.parse(*CompleteQuery); queryParser.parse(CompleteQuery);
container->compile( queryParser ); ISRContainer container = ISRContainer( queryParser.queryTree );
container->solve( ); container.Solve( );
...@@ -50,7 +50,7 @@ void Searcher::search ( ) ...@@ -50,7 +50,7 @@ void Searcher::search ( )
* *
*/ */
void QueryParser::printResults ( ) void Searcher::printResults ( )
{ {
cout << " Generated XXX results in about XXX seconds " << endl; cout << " Generated XXX results in about XXX seconds " << endl;
......
...@@ -2,8 +2,8 @@ ...@@ -2,8 +2,8 @@
// Created by Jake Close on 3/7/18. // Created by Jake Close on 3/7/18.
// //
#include "/query/queryLanguage/QueryParser.h" #include "../query/queryLanguage/QueryParser.h"
#include "/constraintSolver/ISRContainer.h" #include "../constraintSolver/ISRContainer.h"
#pragma once #pragma once
using namespace std; using namespace std;
...@@ -12,17 +12,21 @@ class Searcher ...@@ -12,17 +12,21 @@ class Searcher
{ {
public: public:
Searcher ( string *query_in ) : CompleteQuery( query_in ) Searcher ( string query_in ) : CompleteQuery( query_in )
{ }; {
};
void search ( ); void search ( );
void printResults ( ); void printResults ( );
private: private:
string *CompleteQuery; string CompleteQuery;
QueryParser queryParser; QueryParser queryParser;
ISRContainer* container; //ISRContainer container;
}; };
......
...@@ -4,7 +4,9 @@ ...@@ -4,7 +4,9 @@
#include "QueryParser.h" #include "QueryParser.h"
#include<unordered_set> #include<unordered_set>
#include "../../util/stringProcessing.h" #include <algorithm>
#include <cctype>
//#include "../../util/stringProcessing.h"
#include<iostream> #include<iostream>
/*** /***
* QUERY PARSER CLASS * QUERY PARSER CLASS
...@@ -14,18 +16,28 @@ ...@@ -14,18 +16,28 @@
* *
* *
*/ */
void removeWhitespace(string &str)
{
str.erase(std::remove(str.begin(), str.end(), ' '), str.end());
}
/*** /***
*
* Returns a token of the next word in the query, past the given index * Returns a token of the next word in the query, past the given index
* @param index * @param index
* @return * @return
*/ */
Token QueryParser::FindNextToken( int &index ){ Token QueryParser::FindNextToken( int &index ){
//TODO remove this when you add new ISR //TODO remove this when you add new ISR
unordered_set<char> stopChars; unordered_set<char> stopChars;
stopChars.insert(' '); stopChars.insert(' ');
int size = 1; int size = 1;
int start = index; int start = index;
//vector<string> words = splitStr( query , ' ', 0);
//string text = words [ start ] ;
//++index;
while(start + size < query.size()) while(start + size < query.size())
...@@ -41,15 +53,20 @@ Token QueryParser::FindNextToken( int &index ){ ...@@ -41,15 +53,20 @@ Token QueryParser::FindNextToken( int &index ){
++size; ++size;
index = start + size; index = start + size;
string text = query.substr ( start, size ); string text = query.substr ( start, size );
removeWhitespace(text);
if( MatchOR ( text ) ) if( MatchOR ( text ) )
return Token( "-OR-" ); return Token( "-OR-" );
return Token( text ); return Token( text );
} }
else if ( stopChars.count( query[ start + size ] ) > 0) else if ( stopChars.count( query[ start + size ] ) > 0)
{ {
//while( query[start] == ' ')
// {
// ++start;
// }
index = start + size; index = start + size;
string text = query.substr ( start, size ); string text = query.substr ( start, size );
cout << "horse" << text; removeWhitespace(text);
return Token( text ); return Token( text );
} }
...@@ -59,8 +76,11 @@ Token QueryParser::FindNextToken( int &index ){ ...@@ -59,8 +76,11 @@ Token QueryParser::FindNextToken( int &index ){
} }
} }
index = start + size; index = start + size;
string text = query.substr ( start, size ); string text = query.substr ( start, size );
cout << "horsey: " << text; removeWhitespace(text);
return Token( text ); return Token( text );
} }
......
...@@ -13,9 +13,17 @@ int main() ...@@ -13,9 +13,17 @@ int main()
string query = "moment life"; string query = "moment life";
string OR = "bike cycle "; string OR = "bike cycle ";
QueryParser parser; QueryParser parser;
parser.parse( query ); parser.parse( "moment" );
Token orParentToken = Token("-OR-"); Token life = Token("life");
Tuple* lifeTuple = new Tuple( life );
parser.queryTree->Next.push_back( lifeTuple );
Token orParentToken = Token("-And-");
Tuple * orparent = new Tuple( orParentToken ); Tuple * orparent = new Tuple( orParentToken );
Token bike = Token("bike"); Token bike = Token("bike");
......
No preview for this file type
...@@ -29,11 +29,14 @@ int main( int argc, char *argv[] ) ...@@ -29,11 +29,14 @@ int main( int argc, char *argv[] )
while ( getline( cin, q ) && !q.empty( )) while ( getline( cin, q ) && !q.empty( ))
{ {
QueryParser *query = new QueryParser( q.c_str( )); Searcher searchEngine( q );
query->search( ); cout << "Results" << endl;
query->printResults( ); searchEngine.search( );
//query->printResults( );
cout << "Please enter another search " << endl;
} }
} }
......
No preview for this file type
...@@ -76,4 +76,9 @@ public: ...@@ -76,4 +76,9 @@ public:
} }
} }
int getNumberOfChildren()
{
return Next.size();
}
}; };
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment