Skip to content
Snippets Groups Projects
Commit 8b718284 authored by zldunn's avatar zldunn
Browse files

Merge branch 'QueryLang' into 'master'

Basic query language

See merge request !10
parents 6e9a6aa7 cd5b5336
No related branches found
No related tags found
1 merge request!10Basic query language
......@@ -4,8 +4,7 @@
#include "QueryParser.h"
#include<unordered_set>
//#include "../../util/stringProcessing.h"
#include "../../util/stringProcessing.h"
#include<iostream>
/***
* QUERY PARSER CLASS
......@@ -16,75 +15,6 @@
*
*/
void removeWhitespace(string &str)
{
str.erase(std::remove(str.begin(), str.end(), ' '), str.end());
}
/***
*
* Returns a token of the next word in the query, past the given index
* @param index
* @return
*/
Token QueryParser::FindNextToken( int &index ){
//TODO remove this when you add new ISR
unordered_set<char> stopChars;
stopChars.insert(' ');
int size = 1;
int start = index;
//vector<string> words = splitStr( query , ' ', 0);
//string text = words [ start ] ;
//++index;
while(start + size < query.size())
{
if ( query[ start + size ] == '"' )
{
++size;
while( query[start + size ]!= '"' && (start + size < query.size()) )
{
++size;
}
if(start + size < query.size())
++size;
index = start + size;
string text = query.substr ( start, size );
removeWhitespace(text);
if( MatchOR ( text ) )
return Token( "-OR-" );
return Token( text );
}
else if ( stopChars.count( query[ start + size ] ) > 0)
{
//while( query[start] == ' ')
// {
// ++start;
// }
index = start + size;
string text = query.substr ( start, size );
removeWhitespace(text);
return Token( text );
}
else
{
++size;
}
}
index = start + size;
string text = query.substr ( start, size );
removeWhitespace(text);
return Token( text );
}
/*** Builds QueryTree from input query
*
* @param input
......@@ -92,76 +22,39 @@ Token QueryParser::FindNextToken( int &index ){
void QueryParser::parse( string input )
{
query = input;
preprocess();
Token current;
int location = 0;
while( location < input.size( ) )
{
//TODO needs to be BF Traversal
current = FindNextToken( location );
Tuple * next = new Tuple( current );
queryTree->Next.push_back( next );
}
queryTree = Constraint ( query );
}
/***
* destructor for the Query Parser
* takes in a string and seperates on OR, if no OR, then AND. It will create a AND or OR Tuple if theres a complex string.
* If the string is one word, it will become a WORD tuple and return itself.
* @param input
*/
QueryParser::~QueryParser ( )
Tuple* QueryParser::Constraint( string input )
{
delete_children ( queryTree );
delete queryTree;
}
vector<Tuple * > constraintList;
/***
* Traverses down the tree and deletes all of the nodes in the tree
* @param node
*/
void QueryParser::delete_children( Tuple* node )
{
for( int i = 0; i < node->Next.size( ); ++i )
//Break on top level OR
if( isOrType( input ) )
{
delete_children( node->Next[ i ] );
delete node->Next[ i ];
Tuple *t = new Tuple( OrTupleType );
constraintList = breakOnOR ( input );
t->Next = constraintList;
return t;
}
}
/***
* Prints the compiled Query for testing
*/
void QueryParser::printCompiledQuery()
{
cout << "Query Tree: \n";
deque<Tuple *> queue;
deque<int> levelQueue;
queue.push_back( queryTree );
levelQueue.push_back( 0 );
traverse( queue, levelQueue );
}
void QueryParser::traverse(deque< Tuple*> queue, deque< int> levels)
{
int deepest = 0;
while(!queue.empty())
else if( isAndType ( input ) )
{
Tuple *current = queue.front ( );
queue.pop_front ( );
int currLevel = levels.front();
levels.pop_front ();
for ( int i = 0; i < current->Next.size ( ); ++i )
{
queue.push_back( current->Next[ i ] );
levels.push_back( currLevel + 1);
}
cout << " | ";
if( currLevel > deepest)
{
deepest = currLevel;
cout << "\n[ "<<deepest<<" ] ";
}
cout << " " << current->object.text << " ";
Tuple *t = new Tuple( AndTupleType);
constraintList = breakOnAND ( input );
t->Next = constraintList;
return t;
}
else
{
Tuple *t = new Tuple( input, WordTupleType);
return t;
}
}
......@@ -205,52 +98,8 @@ bool QueryParser::MatchAND( string input )
return false;
}
/***
* Highest level query parsing, splits the input string on OR, then builds tree subtrees without
* @param input
*/
Tuple* QueryParser::Constraint( string input )
{
vector<Tuple * > constraintList;
Tuple *t = new Tuple();
constraintList = breakOnOR( input );
if( constraintList.size( ) > 1 )
t->Type = OrTupleType;
else
t->Type = AndTupleType;
Tuple* toBeKilled = constraintList[ 0 ];
constraintList = breakOnAND ( input );
t->Next = constraintList;
//Iterate through the subcontraints and if there are ORs, then run this again, else split on and for each
for (int i = 0; i < constraintList.size( ); ++i )
{
string word =constraintList[ i ]->object.text;
//If the subtype needs an or, then build a new or tuple
if(isOrType(word))
{
Tuple* toBeKilled = constraintList[ i ];
constraintList[ i ] = Constraint ( word );
constraintList[ i ]->Type = OrTupleType;
delete toBeKilled;
toBeKilled = nullptr;
}
else if(isAndType(word))
{
Tuple* toBeKilled = constraintList[ i ];
constraintList[ i ] = Constraint ( word );
constraintList[ i ]->Type = AndTupleType;
delete toBeKilled;
toBeKilled = nullptr;
}
}
}
/***
* Breaks input string on ORs, returns a list of tuples of those strings
......@@ -272,7 +121,7 @@ vector<Tuple * > QueryParser::breakOnOR( string input )
closedBracket.insert(')');
closedBracket.insert('}');
closedBracket.insert(']');
vector<string> query = splitStr (input, ' ', 0);
vector<string> query = splitStr (input, ' ', false);
vector<Tuple *> constraintList;
int start = 0;
......@@ -282,6 +131,28 @@ vector<Tuple * > QueryParser::breakOnOR( string input )
if( query[ i ] == "(")
{
++depth;
++i;
string text;
while ( depth != 0)
{
if( i > query.size() )
break;
if( query[ i ] == "(")
++depth;
else if ( query[ i ] == ")")
--depth;
if( depth != 0)
{
if( text!= "")
text+=" ";
text+=query[ i ];
++i;
}
}
Tuple * subConstraint = Constraint( text );
constraintList.push_back( subConstraint );
start = i + 1;
}
else if( query[ i ] == ")")
{
......@@ -289,41 +160,44 @@ vector<Tuple * > QueryParser::breakOnOR( string input )
}
else if( MatchOR( query[ i ]) && depth == 0 )
{
string text = query[ 0 ];
string text;
for ( int j = start; j < i; ++ j)
{
text+= query[ j ];
if( j < ( i -1 ) )
text+= " ";
}
if( text != "" && text != " ")
{
Tuple * subConstraint = Constraint( text );
constraintList.push_back( subConstraint );
}
Tuple * subConstraint = new Tuple( text );
constraintList.push_back( subConstraint );
start = i + 1;
}
else if( i == query.size( ) - 1 )
{
string text;
for ( int j = start; j < i; ++ j)
for ( int j = start; j <= i; ++ j)
{
text+= query[ j ];
if( j <= ( i -1 ) )
text+= " ";
}
Tuple * subConstraint = new Tuple( text );
Tuple * subConstraint = Constraint( text );
constraintList.push_back( subConstraint );
}
}
return constraintList;
return constraintList;
}
Tuple * baseConstraint( string input )
{
// while( t = simpleConstraint ( input ))
return nullptr;
}
/***
* Returns if a string has an OR at its highest level
*/
bool QueryParser::isOrType( string input )
{
vector<string> query = splitStr (input, ' ', 0);
vector<string> query = splitStr (input, ' ', false);
int depth = 0;
for( auto word = query.begin(); word != query.end(); ++word )
{
......@@ -348,10 +222,16 @@ bool QueryParser::isOrType( string input )
*/
bool QueryParser::isAndType( string input )
{
vector<string> query = splitStr (input, ' ', 0);
vector<string> query = splitStr (input, ' ', false);
if( query.size( ) == 1)
return false;
int depth = 0;
for( auto word = query.begin(); word != query.end(); ++word )
{
if( depth == 0 && MatchOR ( *word ))
return false;
if(depth == 0 && MatchAND(*word))
{
return true;
......@@ -364,11 +244,8 @@ bool QueryParser::isAndType( string input )
{
--depth;
}
}
return false;
return true;
}
vector<Tuple * > QueryParser::breakOnAND( string input )
......@@ -385,7 +262,7 @@ vector<Tuple * > QueryParser::breakOnAND( string input )
closedBracket.insert(')');
closedBracket.insert('}');
closedBracket.insert(']');
vector<string> query = splitStr (input, ' ', 0);
vector<string> query = splitStr (input, ' ', false);
vector<Tuple *> constraintList;
int start = 0;
......@@ -395,6 +272,26 @@ vector<Tuple * > QueryParser::breakOnAND( string input )
if( query[ i ] == "(")
{
++depth;
++i;
string text;
while ( depth != 0)
{
if( query[ i ] == "(")
++depth;
else if ( query[ i ] == ")")
--depth;
if( depth != 0)
{
if( text!= "")
text+=" ";
text+=query[ i ];
++i;
}
}
Tuple * subConstraint = Constraint( text );
constraintList.push_back( subConstraint );
start = i + 1;
}
else if( query[ i ] == ")")
{
......@@ -402,25 +299,112 @@ vector<Tuple * > QueryParser::breakOnAND( string input )
}
else if( MatchAND( query[ i ]) && depth == 0 )
{
string text = query[ 0 ];
for ( int j = start; j < i; ++ j)
{
text+= query[ j ];
}
Tuple * subConstraint = new Tuple( text );
constraintList.push_back( subConstraint );
start = i + 1;
}
else if( i == query.size( ) - 1 )
else if( depth == 0 )
{
string text;
for ( int j = start; j < i; ++ j)
{
text+= query[ j ];
}
Tuple * subConstraint = new Tuple( text );
text = query[ i ];
Tuple * subConstraint = Constraint( text );
constraintList.push_back( subConstraint );
}
}
return constraintList;
}
/***
* Prints the compiled Query for testing
*/
void QueryParser::printCompiledQuery()
{
cout << "\nQuery Tree: \n";
cout << getTestingTree();
}
/***
* generates the string that the printCompiledQuery will print
*
*/
string QueryParser::getTestingTree()
{
string output = "";
deque<Tuple *> queue;
deque<int> levelQueue;
queue.push_back( queryTree );
levelQueue.push_back( 0 );
traverse( queue, levelQueue, output );
return output;
}
void QueryParser::traverse(deque< Tuple*> queue, deque< int> levels, string &output)
{
int deepest = 0;
int level = 0;
while(!queue.empty())
{
Tuple *current = queue.front ( );
queue.pop_front ( );
int currLevel = levels.front();
levels.pop_front ();
for ( int i = 0; i < current->Next.size ( ); ++i )
{
queue.push_back( current->Next[ i ] );
levels.push_back( currLevel + 1);
}
output += " | ";
if( currLevel > deepest)
{
deepest = currLevel;
output += "\n[ ";
output += to_string(deepest);
output += " ] ";
}
output += " ";
output += current->object.text;
output += " ";
}
}
/***
* destructor for the Query Parser
*/
QueryParser::~QueryParser ( )
{
delete_children ( queryTree );
delete queryTree;
}
/***
* Traverses down the tree and deletes all of the nodes in the tree
* @param node
*/
void QueryParser::delete_children( Tuple* node )
{
for( int i = 0; i < node->Next.size( ); ++i )
{
delete_children( node->Next[ i ] );
delete node->Next[ i ];
}
}
void QueryParser::preprocess( )
{
string formattedString;
for( int i = 0; i < query.size(); ++i)
{
if( query[ i ] == '(' || query[ i ] == ')')
{
formattedString += " ";
formattedString += query[i] ;
formattedString += " ";
}
else
{
formattedString+= query[i];
}
}
query = formattedString;
}
\ No newline at end of file
......@@ -7,9 +7,6 @@
#include "../../util/DataStructureLib/tuple.cpp"
#include<deque>
// Outline of query language from Prof. Nicole Hamilton, University of Michigan 03/15/2018
// 41 lines
// <Constraint> ::= <BaseConstraint>
// { <OrOp> <BaseConstraint> }
......@@ -52,6 +49,7 @@ public:
vector<Tuple * > breakOnAND( string input );
void printCompiledQuery( );
string getTestingTree( );
~QueryParser ( );
......@@ -59,7 +57,8 @@ public:
Tuple* queryTree;
string query;
private:
void traverse(deque< Tuple*> queue, deque< int> levels);
void preprocess( );
void traverse(deque< Tuple*> queue, deque< int> levels, string &output);
void delete_children( Tuple* node );
bool MatchOR( string input );
bool MatchAND( string input );
......
......@@ -5,17 +5,142 @@
#include "../QueryParser.h"
#include<iostream>
#include <fstream>
#include <cassert>
using namespace std;
void testAND();
void testOR();
void testSimple();
void testORwithAND();
void testnestedOR();
//void nestedAND();
void testNestedORwithAND();
int main()
{
string query = "apollo moon OR landing";
QueryParser parser;
parser.parse( query );
parser.printCompiledQuery();
cout << "Starting QueryLang tests...\n";
testAND();
testOR();
testSimple ();
testORwithAND ();
testNestedORwithAND();
testnestedOR();
}
void testAND()
{
cout << "Testing AND...\n";
string query = "Is Lebron james the goat";
QueryParser lebronParser;
lebronParser.parse( query );
string correct = " | -AND- | \n[ 1 ] Is | Lebron | james | the | goat ";
assert(correct == lebronParser.getTestingTree( ));
string dessertQuery = "I like cookies AND cake & pie && icecream and dessert";
QueryParser dessertParser;
dessertParser.parse( dessertQuery );
string correctDesserts = " | -AND- | \n[ 1 ] I | like | cookies | cake | pie | icecream | dessert ";
assert(correctDesserts == dessertParser.getTestingTree( ));
cout << "All AND tests passed!\n";
}
void testOR()
{
cout << "Testing OR..\n";
string query = "who or what OR when || where | why";
QueryParser Parser;
Parser.parse( query );
string correct = " | -OR- | \n[ 1 ] who | what | when | where | why ";
assert(correct == Parser.getTestingTree( ));
string simple = "left or right";
QueryParser simpleParser;
simpleParser.parse( simple );
string simpleCorrect = " | -OR- | \n[ 1 ] left | right ";
assert( simpleCorrect == simpleParser.getTestingTree( ) );
cout << "All OR tests passes\n";
}
void testSimple()
{
cout << "Testing Simple Case..\n";
string simple = "Zane";
QueryParser Parser;
Parser.parse( simple );
string correct = " | Zane ";
assert(correct == Parser.getTestingTree( ));
cout<<"All simple tests passed!\n";
}
void testORwithAND()
{
cout << "Testing OR with AND\n";
string nasa = "moon mission was a lie OR truth ";
QueryParser Parser;
Parser.parse( nasa );
string correct = " | -OR- | \n"
"[ 1 ] -AND- | truth | \n"
"[ 2 ] moon | mission | was | a | lie ";
assert( correct == Parser.getTestingTree( ) );
string earth = "the earth is || isnt flat or round";
QueryParser earthParser;
earthParser.parse( earth );
correct = " | -OR- | \n"
"[ 1 ] -AND- | -AND- | round | \n"
"[ 2 ] the | earth | is | isnt | flat ";
assert( correct == earthParser.getTestingTree( ) );
cout <<"All OR and AND tests passed!\n";
}
void testnestedOR()
{
cout << "Testing nestedOR with AND\n";
string RB = "karan OR ( chris OR ( kareem or omaury ) ) ";
QueryParser RBParser;
RBParser.parse( RB );
string correct = " | -OR- | \n"
"[ 1 ] karan | -OR- | \n"
"[ 2 ] chris | -OR- | \n"
"[ 3 ] kareem | omaury ";
assert( correct == RBParser.getTestingTree( ) );
string WR = "( DPJ or Tarik ) or (nico or oliver) or kekoa";
QueryParser WRParser;
WRParser.parse( WR );
correct = " | -OR- | \n"
"[ 1 ] -OR- | -OR- | kekoa | \n"
"[ 2 ] DPJ | Tarik | nico | oliver ";
assert( correct == WRParser.getTestingTree( ));
}
void testNestedORwithAND()
{
cout << "Testing nestedOR with AND\n";
string nasa = "moon mission was a ( lie OR truth )";
QueryParser Parser;
Parser.parse( nasa );
string correct = " | -AND- | \n"
"[ 1 ] moon | mission | was | a | -OR- | \n"
"[ 2 ] lie | truth ";
assert( correct == Parser.getTestingTree( ) );
string earth = "the earth ( is || isnt)( flat or round )";
QueryParser earthParser;
earthParser.parse( earth );
correct = " | -AND- | \n"
"[ 1 ] the | earth | -OR- | -OR- | \n"
"[ 2 ] is | isnt | flat | round ";
assert( correct == earthParser.getTestingTree( ) );
string query1 = " \"apollo moon\" landing";
QueryParser parser1;
parser1.parse( query1 );
parser1.printCompiledQuery ();
cout <<"All nested OR and AND tests passed!\n";
}
}
\ No newline at end of file
......@@ -4,10 +4,9 @@
// Outline of query language from Prof. Nicole Hamilton, University of Michigan 03/15/2018
//31 lines
#pragma once
#include<string>
#include<vector>
#include "../../parser/Parser.h"
//#include "../../parser/Parser.h"
//#include "../../constraintSolver/ISRAnd.h"
using namespace std;
......@@ -38,6 +37,7 @@ enum TupleType
OrTupleType,
AndTupleType,
NotTupleType,
SearchTupleType,
WordTupleType
};
......@@ -51,6 +51,7 @@ public:
//ISR *Compile( );
Tuple( )
: object( Token() ), Type( AndTupleType ) {}
Tuple( Token input )
: object( input ), Type( AndTupleType )
{
......@@ -76,6 +77,14 @@ public:
}
}
Tuple( string inputString )
:object( Token( inputString ) ), Type( WordTupleType )
{
}
Tuple( string inputString , TupleType type )
: object( Token( inputString ) ), Type( type ){}
int getNumberOfChildren()
{
return Next.size();
......
......@@ -210,7 +210,14 @@ vector< string > splitStr ( string originalText, char delim, bool removeSyms )
string word = "";
while ( begin != delim && i < originalText.size( ) )
{
if ( removeSyms && ( isAlpha( begin ) || isNum( begin ) ) )
if ( removeSyms)
{
if( isAlpha( begin ) || isNum( begin ) )
{
word.push_back( begin );
}
}
else
{
word.push_back( begin );
}
......@@ -254,6 +261,10 @@ vector< string > splitStr ( string originalText, set< char > delims, bool remove
{
word.push_back( begin );
}
else if ( !removeSyms )
{
word.push_back( begin );
}
++i;
begin = originalText[ i ];
}
......
......@@ -39,17 +39,17 @@ int main ( )
"The point of using Lorem Ipsum is that it has a more-or-less normal distribution of letters, as opposed to using 'Content here, content here',"
"making it look like readable English. ";
testFindStr( original );
testFindNext( );
testFindPrev( );
// testFindStr( original );
// testFindNext( );
// testFindPrev( );
testSplitStr( original );
testIsStopWord( );
testToLower( );
testStemWord( );
testSubStr( );
testStripStr( );
testIsAlpha( );
testIsNum( );
// testIsStopWord( );
// testToLower( );
// testStemWord( );
// testSubStr( );
// testStripStr( );
// testIsAlpha( );
// testIsNum( );
cout << "\nTests passed for StringProcessing :D" << endl;
......@@ -139,11 +139,16 @@ void testSplitStr ( string original )
vector< string > vec = splitStr( original, ' ', true );
assert( vec.size( ) == 53 );
string word = "hello\ngoodbye";
vec = splitStr( word, '\n', true );
string word = "hello goodbye";
vec = splitStr( word, ' ', true );
assert( vec.size( ) == 2 );
assert( vec[ 0 ] == "hello" && vec[ 1 ] == "goodbye" );
word = "apollo moon OR landing";
vec = splitStr( word, ' ', false );
assert( vec.size( ) == 4 );
assert( vec[ 0 ] == "apollo" && vec[ 1 ] == "moon" && vec[ 2 ] == "OR" && vec[ 3 ] == "landing" );
cout << "testSplitStr passed" << endl << endl;
}
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment