From a39561b466e9b9407c5572564a89b731546fa8e0 Mon Sep 17 00:00:00 2001
From: Zane Dunnings <zldunn@umich.edu>
Date: Mon, 9 Apr 2018 13:12:54 -0400
Subject: [PATCH] AND and OR working

---
 query/queryLanguage/QueryParser.cpp           | 320 +++++++-----------
 query/queryLanguage/QueryParser.h             |   3 -
 query/queryLanguage/tests/testQueryParser.cpp |   4 +-
 util/DataStructureLib/tuple.cpp               |  13 +-
 util/stringProcessing.cpp                     |   4 +
 util/tests/stringProcessingTest.cpp           |  29 +-
 6 files changed, 154 insertions(+), 219 deletions(-)

diff --git a/query/queryLanguage/QueryParser.cpp b/query/queryLanguage/QueryParser.cpp
index 055804f..9d79ee2 100644
--- a/query/queryLanguage/QueryParser.cpp
+++ b/query/queryLanguage/QueryParser.cpp
@@ -4,8 +4,7 @@
 
 #include "QueryParser.h"
 #include<unordered_set>
-
-//#include "../../util/stringProcessing.h"
+#include "../../util/stringProcessing.h"
 #include<iostream>
 /***
  *  QUERY PARSER CLASS
@@ -16,75 +15,6 @@
  *
  */
 
-void removeWhitespace(string &str)
-	{
-	str.erase(std::remove(str.begin(), str.end(), ' '), str.end());
-	}
-/***
- *
- * Returns a token of the next word in the query, past the given index
- * @param index
- * @return
- */
-Token QueryParser::FindNextToken( int &index ){
-
-	//TODO remove this when you add new ISR
-	unordered_set<char> stopChars;
-	stopChars.insert(' ');
-
-	int size = 1;
-	int start = index;
-	//vector<string> words = splitStr( query , ' ', 0);
-	//string text = words [ start ] ;
-	//++index;
-
-
-	while(start + size < query.size())
-		{
-		if ( query[ start + size ] == '"' )
-			{
-			++size;
-			while( query[start + size ]!= '"' && (start + size < query.size()) )
-				{
-				++size;
-				}
-			if(start + size < query.size())
-				++size;
-			index = start + size;
-			string text = query.substr ( start, size );
-			removeWhitespace(text);
-			if( MatchOR ( text ) )
-				return Token( "-OR-" );
-			return Token( text );
-			}
-		else if ( stopChars.count( query[ start + size ] ) > 0)
-			{
-
-			//while( query[start] == ' ')
-			//	{
-			//	++start;
-			//	}
-			index = start + size;
-			string text = query.substr ( start, size );
-			removeWhitespace(text);
-
-			return Token( text );
-			}
-		else
-			{
-			++size;
-			}
-		}
-		index = start + size;
-
-		string text = query.substr ( start, size );
-		removeWhitespace(text);
-
-
-
-	return Token( text );
-	}
-
 /*** Builds QueryTree from input query
  *
  * @param input
@@ -93,75 +23,37 @@ void QueryParser::parse( string input )
 	{
 	query = input;
 	Token current;
-	int location = 0;
-	while( location < input.size( ) )
-		{
-		//TODO needs to be BF Traversal
-		current = FindNextToken( location );
-		Tuple * next = new Tuple( current );
-		queryTree->Next.push_back( next );
-
-		}
+	queryTree = Constraint ( input );
 	}
 
 /***
- * destructor for the Query Parser
+ * takes in a string and seperates on OR, if no OR, then AND. It will create a AND or OR Tuple if theres a complex string.
+ * If the string is one word, it will become a WORD tuple and return itself.
+ * @param input
  */
-QueryParser::~QueryParser ( )
+Tuple* QueryParser::Constraint( string input )
 	{
-	delete_children ( queryTree );
-	delete queryTree;
-	}
+	vector<Tuple * > constraintList;
 
-/***
- * Traverses down the tree and deletes all of the nodes in the tree
- * @param node
- */
-void QueryParser::delete_children( Tuple* node )
-	{
-	for( int i = 0; i < node->Next.size( ); ++i )
+	//Break on top level OR
+	if( isOrType( input ) )
 		{
-		delete_children( node->Next[ i ] );
-		delete node->Next[ i ];
+		Tuple *t = new Tuple( OrTupleType );
+		constraintList = breakOnOR ( input );
+		t->Next = constraintList;
+		return t;
 		}
-	}
-
-/***
- * Prints the compiled Query for testing
- */
-void QueryParser::printCompiledQuery()
-	{
-	cout << "Query Tree: \n";
-	deque<Tuple *> queue;
-	deque<int> levelQueue;
-	queue.push_back( queryTree );
-	levelQueue.push_back( 0 );
-	traverse( queue, levelQueue );
-	}
-
-
-void QueryParser::traverse(deque< Tuple*> queue, deque< int> levels)
-	{
-	int deepest = 0;
-	while(!queue.empty())
+	else if( isAndType ( input ) )
 		{
-		Tuple *current = queue.front ( );
-		queue.pop_front ( );
-		int currLevel = levels.front();
-		levels.pop_front ();
-		for ( int i = 0; i < current->Next.size ( ); ++i )
-			{
-			queue.push_back( current->Next[ i ] );
-			levels.push_back( currLevel + 1);
-			}
-		cout << " | ";
-		if( currLevel > deepest)
-			{
-			deepest = currLevel;
-			cout << "\n[ "<<deepest<<" ] ";
-			}
-
-		cout << " " << current->object.text << " ";
+		Tuple *t = new Tuple( AndTupleType);
+		constraintList = breakOnAND ( input );
+		t->Next = constraintList;
+		return t;
+		}
+	else
+		{
+		Tuple *t = new Tuple( input, WordTupleType);
+		return t;
 		}
 	}
 
@@ -205,52 +97,8 @@ bool QueryParser::MatchAND( string input )
 	return false;
 	}
 
-/***
- * Highest level query parsing, splits the input string on OR, then builds tree subtrees without
- * @param input
- */
-Tuple* QueryParser::Constraint( string input )
-	{
-	vector<Tuple * > constraintList;
-	Tuple *t = new Tuple();
-	constraintList = breakOnOR( input );
-
-
-	if( constraintList.size( ) > 1 )
-		t->Type = OrTupleType;
-	else
-		t->Type = AndTupleType;
-		Tuple* toBeKilled = constraintList[ 0 ];
-		constraintList = breakOnAND ( input );
-	t->Next = constraintList;
-
-	//Iterate through the subcontraints and if there are ORs, then run this again, else split on and for each
-	for (int i = 0; i < constraintList.size( ); ++i )
-		{
-		string word =constraintList[ i ]->object.text;
-		//If the subtype needs an or, then build a new or tuple
-		if(isOrType(word))
-			{
-			Tuple* toBeKilled = constraintList[ i ];
-			constraintList[ i ] = Constraint ( word );
-			constraintList[ i ]->Type = OrTupleType;
-			delete toBeKilled;
-			toBeKilled = nullptr;
-			}
-		else if(isAndType(word))
-			{
-			Tuple* toBeKilled = constraintList[ i ];
-			constraintList[ i ] = Constraint ( word );
-			constraintList[ i ]->Type = AndTupleType;
-			delete toBeKilled;
-			toBeKilled = nullptr;
-			}
-		}
-
 
 
-	}
-
 
 /***
  * Breaks input string on ORs, returns a list of tuples of those strings
@@ -272,7 +120,7 @@ vector<Tuple * > QueryParser::breakOnOR( string input )
 	closedBracket.insert(')');
 	closedBracket.insert('}');
 	closedBracket.insert(']');
-	vector<string> query = splitStr (input, ' ', 0);
+	vector<string> query = splitStr (input, ' ', true);
 
 	vector<Tuple *> constraintList;
 	int start = 0;
@@ -289,41 +137,43 @@ vector<Tuple * > QueryParser::breakOnOR( string input )
 			}
 		else if( MatchOR( query[ i ]) && depth == 0 )
 			{
-			string text = query[ 0 ];
+			string text;
 			for ( int j = start; j < i; ++ j)
 				{
 				text+= query[ j ];
+				if( j < ( i -1 ) )
+					text+= " ";
 				}
-			Tuple * subConstraint = new Tuple( text );
+			if( text == "" || text == " ")
+				break;
+
+			Tuple * subConstraint = Constraint( text );
 			constraintList.push_back( subConstraint );
 			start = i + 1;
 			}
 		else if( i == query.size( ) - 1 )
 			{
 			string text;
-			for ( int j = start; j < i; ++ j)
+			for ( int j = start; j <= i; ++ j)
 				{
 				text+= query[ j ];
+				if( j <= ( i -1 ) )
+					text+= " ";
 				}
-			Tuple * subConstraint = new Tuple( text );
+			Tuple * subConstraint = Constraint( text );
 			constraintList.push_back( subConstraint );
 			}
 		}
-		return constraintList;
+	return constraintList;
 	}
 
-Tuple * baseConstraint( string input )
-	{
-//	while( t = simpleConstraint ( input ))
-	return nullptr;
-	}
 
 /***
  * Returns if a string has an OR at its highest level
  */
 bool QueryParser::isOrType( string input )
 	{
-	vector<string> query = splitStr (input, ' ', 0);
+	vector<string> query = splitStr (input, ' ', true);
 	int depth = 0;
 	for( auto word = query.begin();  word != query.end();  ++word )
 		{
@@ -348,10 +198,16 @@ bool QueryParser::isOrType( string input )
  */
 bool QueryParser::isAndType( string input )
 	{
-	vector<string> query = splitStr (input, ' ', 0);
+	vector<string> query = splitStr (input, ' ', true);
+
+	if( query.size( ) == 1)
+		return false;
+
 	int depth = 0;
 	for( auto word = query.begin();  word != query.end();  ++word )
 		{
+		if( depth == 0 && MatchOR ( *word ))
+			return false;
 		if(depth == 0 && MatchAND(*word))
 			{
 			return true;
@@ -364,11 +220,8 @@ bool QueryParser::isAndType( string input )
 			{
 			--depth;
 			}
-
-
-
 		}
-	return false;
+	return true;
 	}
 
 vector<Tuple * > QueryParser::breakOnAND( string input )
@@ -385,7 +238,7 @@ vector<Tuple * > QueryParser::breakOnAND( string input )
 	closedBracket.insert(')');
 	closedBracket.insert('}');
 	closedBracket.insert(']');
-	vector<string> query = splitStr (input, ' ', 0);
+	vector<string> query = splitStr (input, ' ', true);
 
 	vector<Tuple *> constraintList;
 	int start = 0;
@@ -402,25 +255,90 @@ vector<Tuple * > QueryParser::breakOnAND( string input )
 			}
 		else if( MatchAND( query[ i ]) && depth == 0 )
 			{
-			string text = query[ 0 ];
+			string text;
 			for ( int j = start; j < i; ++ j)
 				{
 				text+= query[ j ];
+				if( j < ( i -1 ) )
+					text+= " ";
 				}
-			Tuple * subConstraint = new Tuple( text );
+			if( text == "" || text == " ")
+				break;
+
+			Tuple * subConstraint = Constraint( text );
 			constraintList.push_back( subConstraint );
 			start = i + 1;
 			}
-		else if( i == query.size( ) - 1 )
+		else if( depth == 0 )
 			{
 			string text;
-			for ( int j = start; j < i; ++ j)
-				{
-				text+= query[ j ];
-				}
-			Tuple * subConstraint = new Tuple( text );
+			text = query[ i ];
+			Tuple * subConstraint =  Constraint( text );
 			constraintList.push_back( subConstraint );
 			}
 		}
 	return constraintList;
+	}
+
+
+/***
+ * Prints the compiled Query for testing
+ */
+void QueryParser::printCompiledQuery()
+	{
+	cout << "Query Tree: \n";
+	deque<Tuple *> queue;
+	deque<int> levelQueue;
+	queue.push_back( queryTree );
+	levelQueue.push_back( 0 );
+	traverse( queue, levelQueue );
+	}
+
+
+void QueryParser::traverse(deque< Tuple*> queue, deque< int> levels)
+	{
+	int deepest = 0;
+	while(!queue.empty())
+		{
+		Tuple *current = queue.front ( );
+		queue.pop_front ( );
+		int currLevel = levels.front();
+		levels.pop_front ();
+		for ( int i = 0; i < current->Next.size ( ); ++i )
+			{
+			queue.push_back( current->Next[ i ] );
+			levels.push_back( currLevel + 1);
+			}
+		cout << " | ";
+		if( currLevel > deepest)
+			{
+			deepest = currLevel;
+			cout << "\n[ "<<deepest<<" ] ";
+			}
+
+		cout << " " << current->object.text << " ";
+		}
+	}
+
+
+/***
+ * destructor for the Query Parser
+ */
+QueryParser::~QueryParser ( )
+	{
+	delete_children ( queryTree );
+	delete queryTree;
+	}
+
+/***
+ * Traverses down the tree and deletes all of the nodes in the tree
+ * @param node
+ */
+void QueryParser::delete_children( Tuple* node )
+	{
+	for( int i = 0; i < node->Next.size( ); ++i )
+		{
+		delete_children( node->Next[ i ] );
+		delete node->Next[ i ];
+		}
 	}
\ No newline at end of file
diff --git a/query/queryLanguage/QueryParser.h b/query/queryLanguage/QueryParser.h
index 0968a8e..bdd956d 100644
--- a/query/queryLanguage/QueryParser.h
+++ b/query/queryLanguage/QueryParser.h
@@ -7,9 +7,6 @@
 
 #include "../../util/DataStructureLib/tuple.cpp"
 #include<deque>
-// Outline of query language from Prof. Nicole Hamilton, University of Michigan 03/15/2018
-// 41 lines
-
 
 //  <Constraint>        ::= <BaseConstraint>
 //                              { <OrOp> <BaseConstraint> }
diff --git a/query/queryLanguage/tests/testQueryParser.cpp b/query/queryLanguage/tests/testQueryParser.cpp
index 5132c78..9e44ad7 100644
--- a/query/queryLanguage/tests/testQueryParser.cpp
+++ b/query/queryLanguage/tests/testQueryParser.cpp
@@ -13,9 +13,11 @@ int main()
 	parser.parse( query );
 	parser.printCompiledQuery();
 
-	string query1 = " \"apollo moon\" landing";
+	string query1 = " apollo moon (landing OR fake)";
 	QueryParser parser1;
 	parser1.parse( query1 );
 	parser1.printCompiledQuery ();
 
+
+
 	}
\ No newline at end of file
diff --git a/util/DataStructureLib/tuple.cpp b/util/DataStructureLib/tuple.cpp
index bd1336f..e0500cd 100644
--- a/util/DataStructureLib/tuple.cpp
+++ b/util/DataStructureLib/tuple.cpp
@@ -4,10 +4,9 @@
 
 // Outline of query language from Prof. Nicole Hamilton, University of Michigan 03/15/2018
 //31 lines
-#pragma  once
 #include<string>
 #include<vector>
-#include "../../parser/Parser.h"
+//#include "../../parser/Parser.h"
 //#include "../../constraintSolver/ISRAnd.h"
 using namespace std;
 
@@ -38,6 +37,7 @@ enum TupleType
 	OrTupleType,
 	AndTupleType,
 	NotTupleType,
+	SearchTupleType,
 	WordTupleType
 	};
 
@@ -51,6 +51,7 @@ public:
 	//ISR *Compile( );
 	Tuple( )
 			: object( Token() ), Type( AndTupleType ) {}
+
 	Tuple( Token input )
 			: object( input ), Type( AndTupleType )
 		{
@@ -76,6 +77,14 @@ public:
 			}
 		}
 
+	Tuple( string inputString )
+			:object( Token( inputString ) ), Type( WordTupleType )
+		{
+		}
+
+	Tuple( string inputString , TupleType type )
+			: object( Token( inputString ) ), Type( type ){}
+
 	int getNumberOfChildren()
 		{
 		return Next.size();
diff --git a/util/stringProcessing.cpp b/util/stringProcessing.cpp
index 68ef918..5d5b678 100644
--- a/util/stringProcessing.cpp
+++ b/util/stringProcessing.cpp
@@ -254,6 +254,10 @@ vector< string > splitStr ( string originalText, set< char > delims, bool remove
 				{
 				word.push_back( begin );
 				}
+			else if ( !removeSyms )
+				{
+				word.push_back( begin );
+				}
 			++i;
 			begin = originalText[ i ];
 			}
diff --git a/util/tests/stringProcessingTest.cpp b/util/tests/stringProcessingTest.cpp
index 3504edf..11ae03c 100644
--- a/util/tests/stringProcessingTest.cpp
+++ b/util/tests/stringProcessingTest.cpp
@@ -39,17 +39,17 @@ int main ( )
 			"The point of using Lorem Ipsum is that it has a more-or-less normal distribution of letters, as opposed to using 'Content here, content here',"
 			"making it look like readable English. ";
 
-	testFindStr( original );
-	testFindNext( );
-	testFindPrev( );
+//	testFindStr( original );
+//	testFindNext( );
+//	testFindPrev( );
 	testSplitStr( original );
-	testIsStopWord( );
-	testToLower( );
-	testStemWord( );
-	testSubStr( );
-	testStripStr( );
-	testIsAlpha( );
-	testIsNum( );
+//	testIsStopWord( );
+//	testToLower( );
+//	testStemWord( );
+//	testSubStr( );
+//	testStripStr( );
+//	testIsAlpha( );
+//	testIsNum( );
 
 	cout << "\nTests passed for StringProcessing :D" << endl;
 
@@ -139,11 +139,16 @@ void testSplitStr ( string original )
 	vector< string > vec = splitStr( original, ' ', true );
 	assert( vec.size( ) == 53 );
 
-	string word = "hello\ngoodbye";
-	vec = splitStr( word, '\n', true );
+	string word = "hello goodbye";
+	vec = splitStr( word, ' ', true );
 	assert( vec.size( ) == 2 );
 	assert( vec[ 0 ] == "hello" && vec[ 1 ] == "goodbye" );
 
+	word = "apollo moon OR landing";
+	vec = splitStr( word, ' ', true );
+	assert( vec.size( ) == 4 );
+	assert( vec[ 0 ] == "apollo" && vec[ 1 ] == "moon" && vec[ 2 ] == "OR" && vec[ 3 ] == "landing" );
+
 	cout << "testSplitStr passed" << endl << endl;
 
 	}
-- 
GitLab