diff --git a/parser/Parser.cpp b/parser/Parser.cpp index ba7f26610cee7ba7a69c033a90c6f7f718df5abb..9975814a9b4d83a9968644fb78f45a379da2cd30 100644 --- a/parser/Parser.cpp +++ b/parser/Parser.cpp @@ -138,8 +138,15 @@ string Parser::extractUrl ( string html ) { url = ""; unsigned long closeTag = findNext( ">", foundHref, html ); + unsigned long closeQuote = findNext( "\"", foundHref, html ); unsigned long closeSpace = findNext( " ", foundHref, html ); unsigned long closeUrl = 0; + + // ends in " +// if ( closeQuote < html.size( ) && closeTag < html.size( ) && closeQuote < closeTag && closeQuote < closeSpace ) +// { +// closeUrl = closeQuote; +// } // end == ' >' if ( closeSpace < html.size( ) && closeTag < html.size( ) && closeSpace < closeTag ) { @@ -167,6 +174,10 @@ string Parser::extractUrl ( string html ) } } + while ( !url.empty( ) && ( url.back( ) == '\"' || url.back( ) == ';' ) ) + { + url.pop_back( ); + } return url; } diff --git a/parser/tests/parserTest.cpp b/parser/tests/parserTest.cpp index 6cf84d29c16b01454dde51c3abfe6e74447b1bb9..be53342c472741aa7d0e2e0ea2b4ffbc56fc3b2c 100644 --- a/parser/tests/parserTest.cpp +++ b/parser/tests/parserTest.cpp @@ -16,7 +16,7 @@ using namespace std; void testSimple( ); -void testComplex( ); +void testHttp( ); void testURL( ); void testBody ( ); void testExtractBody ( ); @@ -28,7 +28,7 @@ int main ( ) { cout << "Testing Parser ... " << endl << endl; testSimple( ); -// testComplex( ); + testHttp( ); testURL( ); testBody ( ); testExtractBody ( ); @@ -90,14 +90,14 @@ void testSimple ( ) cout << "Simple Test Passed!" << endl << endl; } -void testComplex( ) +void testHttp( ) { cout << "Testing Complex: " << endl; ProducerConsumerQueue< ParsedUrl > urlFrontierTest; Parser parser( &urlFrontierTest ); - ParsedUrl httpURL = ParsedUrl( "www.veronicacday.com" ); - HttpReader reader( httpURL ); + ParsedUrl httpURL = ParsedUrl( "http://veronicacday.com/" ); + HttpReader reader( httpURL ); auto success = reader.request( ); if ( !success ) { @@ -107,10 +107,29 @@ void testComplex( ) auto dictionary = parser.execute( &reader ); printDictionary( *dictionary ); + + urlFrontierTest.Pop( ); + assert( urlFrontierTest.Pop( ).getCompleteUrl( ) == "https://trove.com/" ); + assert( urlFrontierTest.Pop( ).getCompleteUrl( ) == "http://arcinnovations.xyz/" ); + assert( urlFrontierTest.Pop( ).getCompleteUrl( ) == "https://gwydion.co/" ); + assert( urlFrontierTest.Pop( ).getCompleteUrl( ) == "https://madeatmichigan.umich.edu/ventures/venture/gwydion/" ); + + assert ( dictionary != nullptr ); + assert ( dictionary->size( ) == 67 ); + + assert ( dictionary->at( "=veronicacday.com/" ).size( ) == 1 && dictionary->at( "=veronicacday.com/" )[ 0 ] == 0 ); + assert ( dictionary->at( "%serena" ).size( ) == 2 && dictionary->at( "%serena" )[ 1 ] == 24 ); + assert ( dictionary->at( "#veronica" ).size( ) == 1 && dictionary->at( "#veronica" )[ 0 ] == 2 ); + assert ( dictionary->at( "#dai" ).size( ) == 1 && dictionary->at( "#dai" )[ 0 ] == 3 ); + assert ( dictionary->at( "%educ" ).size( ) == 1 && dictionary->at( "%educ" )[ 0 ] == 13 ); + assert ( dictionary->at( "%surgeri" ).size( ) == 1 && dictionary->at( "%surgeri" )[ 0 ] == 72 ); + + + delete dictionary; dictionary = nullptr; - cout << "Complex Test Passed! " << endl; + cout << "Complex Test Passed! " << endl << endl; }