From 46f1301c3d302c997d37614b1ad303a07213d902 Mon Sep 17 00:00:00 2001
From: vcday <vcday@umich.edu>
Date: Thu, 22 Mar 2018 15:32:38 -0400
Subject: [PATCH] parser test http pass

---
 parser/Parser.cpp           | 11 +++++++++++
 parser/tests/parserTest.cpp | 31 +++++++++++++++++++++++++------
 2 files changed, 36 insertions(+), 6 deletions(-)

diff --git a/parser/Parser.cpp b/parser/Parser.cpp
index ba7f266..9975814 100644
--- a/parser/Parser.cpp
+++ b/parser/Parser.cpp
@@ -138,8 +138,15 @@ string Parser::extractUrl ( string html )
 			{
 			url = "";
 			unsigned long closeTag = findNext( ">", foundHref, html );
+			unsigned long closeQuote = findNext( "\"", foundHref, html );
 			unsigned long closeSpace = findNext( " ", foundHref, html );
 			unsigned long closeUrl = 0;
+
+			// ends in "
+//			if ( closeQuote < html.size( ) && closeTag < html.size( ) && closeQuote < closeTag && closeQuote < closeSpace )
+//				{
+//				closeUrl = closeQuote;
+//				}
 			// end == ' >'
 			if ( closeSpace < html.size( ) && closeTag < html.size( ) && closeSpace < closeTag )
 				{
@@ -167,6 +174,10 @@ string Parser::extractUrl ( string html )
 			}
 		}
 
+	while ( !url.empty( ) && ( url.back( ) == '\"' || url.back( ) == ';' ) )
+		{
+		url.pop_back( );
+		}
 	return url;
 	}
 
diff --git a/parser/tests/parserTest.cpp b/parser/tests/parserTest.cpp
index 6cf84d2..be53342 100644
--- a/parser/tests/parserTest.cpp
+++ b/parser/tests/parserTest.cpp
@@ -16,7 +16,7 @@
 using namespace std;
 
 void testSimple( );
-void testComplex( );
+void testHttp( );
 void testURL( );
 void testBody ( );
 void testExtractBody ( );
@@ -28,7 +28,7 @@ int main ( )
 	{
 	cout << "Testing Parser ... " << endl << endl;
 	testSimple( );
-//	testComplex( );
+	testHttp( );
 	testURL( );
 	testBody ( );
 	testExtractBody ( );
@@ -90,14 +90,14 @@ void testSimple ( )
 	cout << "Simple Test Passed!" << endl << endl;
 	}
 
-void testComplex( )
+void testHttp( )
 	{
 	cout << "Testing Complex: " << endl;
 	ProducerConsumerQueue< ParsedUrl > urlFrontierTest;
 	Parser parser( &urlFrontierTest );
-	ParsedUrl httpURL = ParsedUrl( "www.veronicacday.com" );
-	HttpReader reader( httpURL );
+	ParsedUrl httpURL = ParsedUrl( "http://veronicacday.com/" );
 
+	HttpReader reader( httpURL );
 	auto success = reader.request( );
 	if ( !success )
 		{
@@ -107,10 +107,29 @@ void testComplex( )
 
 	auto dictionary = parser.execute( &reader );
 	printDictionary( *dictionary );
+
+	urlFrontierTest.Pop( );
+	assert( urlFrontierTest.Pop( ).getCompleteUrl( ) == "https://trove.com/" );
+	assert( urlFrontierTest.Pop( ).getCompleteUrl( ) == "http://arcinnovations.xyz/" );
+	assert( urlFrontierTest.Pop( ).getCompleteUrl( ) == "https://gwydion.co/" );
+	assert( urlFrontierTest.Pop( ).getCompleteUrl( ) == "https://madeatmichigan.umich.edu/ventures/venture/gwydion/" );
+
+	assert ( dictionary != nullptr );
+	assert ( dictionary->size( ) == 67 );
+
+	assert ( dictionary->at( "=veronicacday.com/" ).size( ) == 1 && dictionary->at( "=veronicacday.com/" )[ 0 ] == 0 );
+	assert ( dictionary->at( "%serena" ).size( ) == 2 && dictionary->at( "%serena" )[ 1 ] == 24 );
+	assert ( dictionary->at( "#veronica" ).size( ) == 1 && dictionary->at( "#veronica" )[ 0 ] == 2 );
+	assert ( dictionary->at( "#dai" ).size( ) == 1 && dictionary->at( "#dai" )[ 0 ] == 3 );
+	assert ( dictionary->at( "%educ" ).size( ) == 1 && dictionary->at( "%educ" )[ 0 ] == 13 );
+	assert ( dictionary->at( "%surgeri" ).size( ) == 1 && dictionary->at( "%surgeri" )[ 0 ] == 72 );
+
+
+
 	delete dictionary;
 	dictionary = nullptr;
 
-	cout << "Complex Test Passed! " << endl;
+	cout << "Complex Test Passed! " << endl << endl;
 	}
 
 
-- 
GitLab