diff --git a/query/queryLanguage/QueryParser.cpp b/query/queryLanguage/QueryParser.cpp index 055804f5640c1bcc987b669b4f034bb4849ec03e..9d79ee27797e5df93c56faedefae3575ca8a8cd7 100644 --- a/query/queryLanguage/QueryParser.cpp +++ b/query/queryLanguage/QueryParser.cpp @@ -4,8 +4,7 @@ #include "QueryParser.h" #include<unordered_set> - -//#include "../../util/stringProcessing.h" +#include "../../util/stringProcessing.h" #include<iostream> /*** * QUERY PARSER CLASS @@ -16,75 +15,6 @@ * */ -void removeWhitespace(string &str) - { - str.erase(std::remove(str.begin(), str.end(), ' '), str.end()); - } -/*** - * - * Returns a token of the next word in the query, past the given index - * @param index - * @return - */ -Token QueryParser::FindNextToken( int &index ){ - - //TODO remove this when you add new ISR - unordered_set<char> stopChars; - stopChars.insert(' '); - - int size = 1; - int start = index; - //vector<string> words = splitStr( query , ' ', 0); - //string text = words [ start ] ; - //++index; - - - while(start + size < query.size()) - { - if ( query[ start + size ] == '"' ) - { - ++size; - while( query[start + size ]!= '"' && (start + size < query.size()) ) - { - ++size; - } - if(start + size < query.size()) - ++size; - index = start + size; - string text = query.substr ( start, size ); - removeWhitespace(text); - if( MatchOR ( text ) ) - return Token( "-OR-" ); - return Token( text ); - } - else if ( stopChars.count( query[ start + size ] ) > 0) - { - - //while( query[start] == ' ') - // { - // ++start; - // } - index = start + size; - string text = query.substr ( start, size ); - removeWhitespace(text); - - return Token( text ); - } - else - { - ++size; - } - } - index = start + size; - - string text = query.substr ( start, size ); - removeWhitespace(text); - - - - return Token( text ); - } - /*** Builds QueryTree from input query * * @param input @@ -93,75 +23,37 @@ void QueryParser::parse( string input ) { query = input; Token current; - int location = 0; - while( location < input.size( ) ) - { - //TODO needs to be BF Traversal - current = FindNextToken( location ); - Tuple * next = new Tuple( current ); - queryTree->Next.push_back( next ); - - } + queryTree = Constraint ( input ); } /*** - * destructor for the Query Parser + * takes in a string and seperates on OR, if no OR, then AND. It will create a AND or OR Tuple if theres a complex string. + * If the string is one word, it will become a WORD tuple and return itself. + * @param input */ -QueryParser::~QueryParser ( ) +Tuple* QueryParser::Constraint( string input ) { - delete_children ( queryTree ); - delete queryTree; - } + vector<Tuple * > constraintList; -/*** - * Traverses down the tree and deletes all of the nodes in the tree - * @param node - */ -void QueryParser::delete_children( Tuple* node ) - { - for( int i = 0; i < node->Next.size( ); ++i ) + //Break on top level OR + if( isOrType( input ) ) { - delete_children( node->Next[ i ] ); - delete node->Next[ i ]; + Tuple *t = new Tuple( OrTupleType ); + constraintList = breakOnOR ( input ); + t->Next = constraintList; + return t; } - } - -/*** - * Prints the compiled Query for testing - */ -void QueryParser::printCompiledQuery() - { - cout << "Query Tree: \n"; - deque<Tuple *> queue; - deque<int> levelQueue; - queue.push_back( queryTree ); - levelQueue.push_back( 0 ); - traverse( queue, levelQueue ); - } - - -void QueryParser::traverse(deque< Tuple*> queue, deque< int> levels) - { - int deepest = 0; - while(!queue.empty()) + else if( isAndType ( input ) ) { - Tuple *current = queue.front ( ); - queue.pop_front ( ); - int currLevel = levels.front(); - levels.pop_front (); - for ( int i = 0; i < current->Next.size ( ); ++i ) - { - queue.push_back( current->Next[ i ] ); - levels.push_back( currLevel + 1); - } - cout << " | "; - if( currLevel > deepest) - { - deepest = currLevel; - cout << "\n[ "<<deepest<<" ] "; - } - - cout << " " << current->object.text << " "; + Tuple *t = new Tuple( AndTupleType); + constraintList = breakOnAND ( input ); + t->Next = constraintList; + return t; + } + else + { + Tuple *t = new Tuple( input, WordTupleType); + return t; } } @@ -205,52 +97,8 @@ bool QueryParser::MatchAND( string input ) return false; } -/*** - * Highest level query parsing, splits the input string on OR, then builds tree subtrees without - * @param input - */ -Tuple* QueryParser::Constraint( string input ) - { - vector<Tuple * > constraintList; - Tuple *t = new Tuple(); - constraintList = breakOnOR( input ); - - - if( constraintList.size( ) > 1 ) - t->Type = OrTupleType; - else - t->Type = AndTupleType; - Tuple* toBeKilled = constraintList[ 0 ]; - constraintList = breakOnAND ( input ); - t->Next = constraintList; - - //Iterate through the subcontraints and if there are ORs, then run this again, else split on and for each - for (int i = 0; i < constraintList.size( ); ++i ) - { - string word =constraintList[ i ]->object.text; - //If the subtype needs an or, then build a new or tuple - if(isOrType(word)) - { - Tuple* toBeKilled = constraintList[ i ]; - constraintList[ i ] = Constraint ( word ); - constraintList[ i ]->Type = OrTupleType; - delete toBeKilled; - toBeKilled = nullptr; - } - else if(isAndType(word)) - { - Tuple* toBeKilled = constraintList[ i ]; - constraintList[ i ] = Constraint ( word ); - constraintList[ i ]->Type = AndTupleType; - delete toBeKilled; - toBeKilled = nullptr; - } - } - - } - /*** * Breaks input string on ORs, returns a list of tuples of those strings @@ -272,7 +120,7 @@ vector<Tuple * > QueryParser::breakOnOR( string input ) closedBracket.insert(')'); closedBracket.insert('}'); closedBracket.insert(']'); - vector<string> query = splitStr (input, ' ', 0); + vector<string> query = splitStr (input, ' ', true); vector<Tuple *> constraintList; int start = 0; @@ -289,41 +137,43 @@ vector<Tuple * > QueryParser::breakOnOR( string input ) } else if( MatchOR( query[ i ]) && depth == 0 ) { - string text = query[ 0 ]; + string text; for ( int j = start; j < i; ++ j) { text+= query[ j ]; + if( j < ( i -1 ) ) + text+= " "; } - Tuple * subConstraint = new Tuple( text ); + if( text == "" || text == " ") + break; + + Tuple * subConstraint = Constraint( text ); constraintList.push_back( subConstraint ); start = i + 1; } else if( i == query.size( ) - 1 ) { string text; - for ( int j = start; j < i; ++ j) + for ( int j = start; j <= i; ++ j) { text+= query[ j ]; + if( j <= ( i -1 ) ) + text+= " "; } - Tuple * subConstraint = new Tuple( text ); + Tuple * subConstraint = Constraint( text ); constraintList.push_back( subConstraint ); } } - return constraintList; + return constraintList; } -Tuple * baseConstraint( string input ) - { -// while( t = simpleConstraint ( input )) - return nullptr; - } /*** * Returns if a string has an OR at its highest level */ bool QueryParser::isOrType( string input ) { - vector<string> query = splitStr (input, ' ', 0); + vector<string> query = splitStr (input, ' ', true); int depth = 0; for( auto word = query.begin(); word != query.end(); ++word ) { @@ -348,10 +198,16 @@ bool QueryParser::isOrType( string input ) */ bool QueryParser::isAndType( string input ) { - vector<string> query = splitStr (input, ' ', 0); + vector<string> query = splitStr (input, ' ', true); + + if( query.size( ) == 1) + return false; + int depth = 0; for( auto word = query.begin(); word != query.end(); ++word ) { + if( depth == 0 && MatchOR ( *word )) + return false; if(depth == 0 && MatchAND(*word)) { return true; @@ -364,11 +220,8 @@ bool QueryParser::isAndType( string input ) { --depth; } - - - } - return false; + return true; } vector<Tuple * > QueryParser::breakOnAND( string input ) @@ -385,7 +238,7 @@ vector<Tuple * > QueryParser::breakOnAND( string input ) closedBracket.insert(')'); closedBracket.insert('}'); closedBracket.insert(']'); - vector<string> query = splitStr (input, ' ', 0); + vector<string> query = splitStr (input, ' ', true); vector<Tuple *> constraintList; int start = 0; @@ -402,25 +255,90 @@ vector<Tuple * > QueryParser::breakOnAND( string input ) } else if( MatchAND( query[ i ]) && depth == 0 ) { - string text = query[ 0 ]; + string text; for ( int j = start; j < i; ++ j) { text+= query[ j ]; + if( j < ( i -1 ) ) + text+= " "; } - Tuple * subConstraint = new Tuple( text ); + if( text == "" || text == " ") + break; + + Tuple * subConstraint = Constraint( text ); constraintList.push_back( subConstraint ); start = i + 1; } - else if( i == query.size( ) - 1 ) + else if( depth == 0 ) { string text; - for ( int j = start; j < i; ++ j) - { - text+= query[ j ]; - } - Tuple * subConstraint = new Tuple( text ); + text = query[ i ]; + Tuple * subConstraint = Constraint( text ); constraintList.push_back( subConstraint ); } } return constraintList; + } + + +/*** + * Prints the compiled Query for testing + */ +void QueryParser::printCompiledQuery() + { + cout << "Query Tree: \n"; + deque<Tuple *> queue; + deque<int> levelQueue; + queue.push_back( queryTree ); + levelQueue.push_back( 0 ); + traverse( queue, levelQueue ); + } + + +void QueryParser::traverse(deque< Tuple*> queue, deque< int> levels) + { + int deepest = 0; + while(!queue.empty()) + { + Tuple *current = queue.front ( ); + queue.pop_front ( ); + int currLevel = levels.front(); + levels.pop_front (); + for ( int i = 0; i < current->Next.size ( ); ++i ) + { + queue.push_back( current->Next[ i ] ); + levels.push_back( currLevel + 1); + } + cout << " | "; + if( currLevel > deepest) + { + deepest = currLevel; + cout << "\n[ "<<deepest<<" ] "; + } + + cout << " " << current->object.text << " "; + } + } + + +/*** + * destructor for the Query Parser + */ +QueryParser::~QueryParser ( ) + { + delete_children ( queryTree ); + delete queryTree; + } + +/*** + * Traverses down the tree and deletes all of the nodes in the tree + * @param node + */ +void QueryParser::delete_children( Tuple* node ) + { + for( int i = 0; i < node->Next.size( ); ++i ) + { + delete_children( node->Next[ i ] ); + delete node->Next[ i ]; + } } \ No newline at end of file diff --git a/query/queryLanguage/QueryParser.h b/query/queryLanguage/QueryParser.h index 0968a8e7aac9e1bc2c75551cfab263f047e2b60a..bdd956d32dee2b83e1b8209baaab19965b095b7b 100644 --- a/query/queryLanguage/QueryParser.h +++ b/query/queryLanguage/QueryParser.h @@ -7,9 +7,6 @@ #include "../../util/DataStructureLib/tuple.cpp" #include<deque> -// Outline of query language from Prof. Nicole Hamilton, University of Michigan 03/15/2018 -// 41 lines - // <Constraint> ::= <BaseConstraint> // { <OrOp> <BaseConstraint> } diff --git a/query/queryLanguage/tests/testQueryParser.cpp b/query/queryLanguage/tests/testQueryParser.cpp index 5132c78407240cc61e8e3fabcae7df6875a6a298..9e44ad7bf3146428a94bdef4c629bdb05452bee3 100644 --- a/query/queryLanguage/tests/testQueryParser.cpp +++ b/query/queryLanguage/tests/testQueryParser.cpp @@ -13,9 +13,11 @@ int main() parser.parse( query ); parser.printCompiledQuery(); - string query1 = " \"apollo moon\" landing"; + string query1 = " apollo moon (landing OR fake)"; QueryParser parser1; parser1.parse( query1 ); parser1.printCompiledQuery (); + + } \ No newline at end of file diff --git a/util/DataStructureLib/tuple.cpp b/util/DataStructureLib/tuple.cpp index bd1336ff0b2cb25ef235a487952080576d733fb5..e0500cdc210b0625b2f8428d78ce246fd5c61794 100644 --- a/util/DataStructureLib/tuple.cpp +++ b/util/DataStructureLib/tuple.cpp @@ -4,10 +4,9 @@ // Outline of query language from Prof. Nicole Hamilton, University of Michigan 03/15/2018 //31 lines -#pragma once #include<string> #include<vector> -#include "../../parser/Parser.h" +//#include "../../parser/Parser.h" //#include "../../constraintSolver/ISRAnd.h" using namespace std; @@ -38,6 +37,7 @@ enum TupleType OrTupleType, AndTupleType, NotTupleType, + SearchTupleType, WordTupleType }; @@ -51,6 +51,7 @@ public: //ISR *Compile( ); Tuple( ) : object( Token() ), Type( AndTupleType ) {} + Tuple( Token input ) : object( input ), Type( AndTupleType ) { @@ -76,6 +77,14 @@ public: } } + Tuple( string inputString ) + :object( Token( inputString ) ), Type( WordTupleType ) + { + } + + Tuple( string inputString , TupleType type ) + : object( Token( inputString ) ), Type( type ){} + int getNumberOfChildren() { return Next.size(); diff --git a/util/stringProcessing.cpp b/util/stringProcessing.cpp index 68ef918ac516090a4b0835c98cb952edb5770364..5d5b678108c5b70d9d100ec3664120b83493d00a 100644 --- a/util/stringProcessing.cpp +++ b/util/stringProcessing.cpp @@ -254,6 +254,10 @@ vector< string > splitStr ( string originalText, set< char > delims, bool remove { word.push_back( begin ); } + else if ( !removeSyms ) + { + word.push_back( begin ); + } ++i; begin = originalText[ i ]; } diff --git a/util/tests/stringProcessingTest.cpp b/util/tests/stringProcessingTest.cpp index 3504edf3165f0dd485e315ae30a086c05ff057c0..11ae03c35eb1459952decbb840d99a8810fee17d 100644 --- a/util/tests/stringProcessingTest.cpp +++ b/util/tests/stringProcessingTest.cpp @@ -39,17 +39,17 @@ int main ( ) "The point of using Lorem Ipsum is that it has a more-or-less normal distribution of letters, as opposed to using 'Content here, content here'," "making it look like readable English. "; - testFindStr( original ); - testFindNext( ); - testFindPrev( ); +// testFindStr( original ); +// testFindNext( ); +// testFindPrev( ); testSplitStr( original ); - testIsStopWord( ); - testToLower( ); - testStemWord( ); - testSubStr( ); - testStripStr( ); - testIsAlpha( ); - testIsNum( ); +// testIsStopWord( ); +// testToLower( ); +// testStemWord( ); +// testSubStr( ); +// testStripStr( ); +// testIsAlpha( ); +// testIsNum( ); cout << "\nTests passed for StringProcessing :D" << endl; @@ -139,11 +139,16 @@ void testSplitStr ( string original ) vector< string > vec = splitStr( original, ' ', true ); assert( vec.size( ) == 53 ); - string word = "hello\ngoodbye"; - vec = splitStr( word, '\n', true ); + string word = "hello goodbye"; + vec = splitStr( word, ' ', true ); assert( vec.size( ) == 2 ); assert( vec[ 0 ] == "hello" && vec[ 1 ] == "goodbye" ); + word = "apollo moon OR landing"; + vec = splitStr( word, ' ', true ); + assert( vec.size( ) == 4 ); + assert( vec[ 0 ] == "apollo" && vec[ 1 ] == "moon" && vec[ 2 ] == "OR" && vec[ 3 ] == "landing" ); + cout << "testSplitStr passed" << endl << endl; }