From a041c8ffe5aac0bb4457db89a8a38be13ebb0e01 Mon Sep 17 00:00:00 2001
From: vcday <vcday@umich.edu>
Date: Sun, 4 Mar 2018 19:12:52 -0500
Subject: [PATCH] parser tests pass

---
 parser/Parser.h             | 18 ++++++++++++------
 parser/tests/parserTest.cpp | 31 ++++++++++---------------------
 util/stringProcessing.h     |  2 +-
 3 files changed, 23 insertions(+), 28 deletions(-)

diff --git a/parser/Parser.h b/parser/Parser.h
index 6435fae..4f7203a 100644
--- a/parser/Parser.h
+++ b/parser/Parser.h
@@ -59,12 +59,17 @@ private:
 
 		string tokenizerInput = "";
 		string currentTerm = "";
-		for ( int i = 0; i < html.size ( ); ++i )
+		int index = 0;
+		while (index != html.size())
 			{
-			while ( html.at( i ) != '\n' )
+			currentTerm = "";
+			while ( html.at( index ) != '\n' )
 				{
-				currentTerm += html[ i ];
+				currentTerm += html[ index ];
+				++index;
 				}
+			++index;
+
 
 			string url = extract_url ( currentTerm );
 			if (url != "")
@@ -121,14 +126,15 @@ private:
 	string extract_title ( string & word )
 		{
 		string title = "";
+		char end = '<';
 		auto pos = findStr ( "<title>", word );
 		if ( *pos != '\0')
 			{
-			pos += 6;
-			while ( *pos != *findStr ( "</title>", word ) )
+			pos += 7;
+			while ( *pos != end )
 				{
-				++pos;
 				title += *pos;
+				++pos;
 				}
 			}
 		return title;
diff --git a/parser/tests/parserTest.cpp b/parser/tests/parserTest.cpp
index bc5248b..d160c4c 100644
--- a/parser/tests/parserTest.cpp
+++ b/parser/tests/parserTest.cpp
@@ -15,30 +15,19 @@ int main ( )
 	{
 	cout << "Testing Parser ... " << endl << endl;
 	ProducerConsumerQueue < string > * urlFrontierTest;
-	Document document ( "<!DOCTYPE html>\n"
-			                    "<html>\n"
-			                    "<head>\n"
-			                    "<!-- HTML Codes by Quackit.com -->\n"
-			                    "<title>\n"
-			                    "Story of Cat</title>\n"
-			                    "<meta name=\"viewport\" content=\"width=device-width, initial-scale=1\">\n"
-			                    "<meta name=\"keywords\" content=\"cat story\">\n"
-			                    "<meta name=\"description\" content=\"This is the tale of a cat names joe\">\n"
-			                    "<style>\n"
-			                    "body {background-color:#ffffff;background-repeat:no-repeat;background-position:top left;background-attachment:fixed;}\n"
-			                    "h1{font-family:Arial, sans-serif;color:#000000;background-color:#ffffff;}\n"
-			                    "p {font-family:Georgia, serif;font-size:14px;font-style:normal;font-weight:normal;color:#000000;background-color:#ffffff;}\n"
-			                    "</style>\n"
-			                    "</head>\n"
-			                    "<body>\n"
-			                    "<h1>Joe the cat</h1>\n"
-			                    "<p>On Saturday, joe the cat went to the store. He climbed up a mountain? It was weird. The store was called Food Store</p>\n"
-			                    "</body>\n"
-			                    "</html>" );
+	Document document ( "<title>This Cat Title Cat</title>\n" );
 
 	Parser parser ( urlFrontierTest );
 	auto dictionary = parser.execute ( &document );
-	assert( dictionary != nullptr );
+
+	assert ( dictionary != nullptr );
+	assert ( dictionary->size () == 2);
+	assert ( dictionary->find ( "cat" ) != dictionary->end () );
+	assert ( dictionary->find ( "title" ) != dictionary->end () );
+	assert ( dictionary->find ( "this" ) == dictionary->end () );
+	assert ( dictionary->at ( "cat" )[ 0 ] == 0 && dictionary->at ( "cat" )[ 1 ] == 2 );
+	assert ( dictionary->at ( "title" )[ 0 ] == 1 );
+
 	cout << "Parser Tests Passed! :D" << endl;
 
 	}
diff --git a/util/stringProcessing.h b/util/stringProcessing.h
index 0afdee9..ed54713 100644
--- a/util/stringProcessing.h
+++ b/util/stringProcessing.h
@@ -28,7 +28,7 @@ set< string > stopWords = { "a", "all", "an", "and", "any", "are", "as", "at", "
  * @param needle
  * @return
  */
-string::iterator findStr ( string haystack, string needle )
+string::iterator findStr (string needle, string haystack )
 	{
 
 	auto beginNeedle = needle.begin ( );
-- 
GitLab