Skip to content
Snippets Groups Projects
Commit 2970f790 authored by vcday's avatar vcday
Browse files

added more parser tests

parent d2900242
Branches
No related tags found
No related merge requests found
......@@ -20,8 +20,7 @@ add_executable(Parser2Test
crawler/Readers/StreamReader.h
crawler/Readers/LocalReader.cpp
crawler/Readers/HttpReader.cpp
crawler/Readers/HttpsReader.cpp
)
crawler/Readers/HttpsReader.cpp)
add_executable(crawler-parser-test
main.cpp
......
......@@ -18,6 +18,9 @@ using namespace std;
void testSimple( );
void testComplex( );
void testURL( );
void testBody ( );
void testExtractBody ( );
void printDictionary ( unordered_map< string, vector< unsigned long > > dictionary );
......@@ -25,8 +28,10 @@ int main ( )
{
cout << "Testing Parser ... " << endl << endl;
testSimple( );
testComplex( );
// testComplex( );
testURL( );
testBody ( );
testExtractBody ( );
cout << "Parser Tests Passed! :D" << endl;
}
......@@ -102,6 +107,10 @@ void testComplex( )
auto dictionary = parser.execute( &reader );
printDictionary( *dictionary );
delete dictionary;
dictionary = nullptr;
cout << "Complex Test Passed! " << endl;
}
......@@ -136,4 +145,95 @@ void testURL ( )
delete dictionary;
dictionary = nullptr;
cout << "URL Test Passed!" << endl << endl;
}
void testBody ( )
{
cout << "Testing Body: " << endl;
ProducerConsumerQueue< ParsedUrl > urlFrontierTest;
Parser parser( &urlFrontierTest );
ParsedUrl fake_url = ParsedUrl( "http://www.testingBody.edu" );
string filepath = util::GetCurrentWorkingDir( ) + "/tests/testParserBody.html";
LocalReader reader( filepath );
reader.setUrl( fake_url );
auto success = reader.request( );
if ( !success )
{
cerr << "Couldn't open file\n";
exit( 1 );
}
auto dictionary = parser.execute( &reader );
printDictionary( *dictionary );
//TODO fix "n"
assert ( dictionary != nullptr );
assert ( dictionary->size( ) == 18 );
assert ( dictionary->at( "=www.testingBody.edu/" ).size( ) == 1 && dictionary->at( "=www.testingBody.edu/" )[ 0 ] == 0 );
assert ( dictionary->at( "$edu" ).size( ) == 1 && dictionary->at( "$edu" )[ 0 ] == 1 );
assert ( dictionary->at( "#stori" ).size( ) == 1 && dictionary->at( "#stori" )[ 0 ] == 3 );
assert ( dictionary->at( "%up" ).size( ) == 1 && dictionary->at( "%up" )[ 0 ] == 12 );
assert ( dictionary->at( "#cat" ).size( ) == 1 && dictionary->at( "#cat" )[ 0 ] == 4 );
assert ( dictionary->at( "$testingbodi" ).size( ) == 1 && dictionary->at( "$testingbodi" )[ 0 ] == 0 );
assert ( dictionary->at( "%saturdai" ).size( ) == 1 && dictionary->at( "%saturdai" )[ 0 ] == 6 );
assert ( dictionary->at( "%cat" ).size( ) == 1 && dictionary->at( "%cat" )[ 0 ] == 8 );
assert ( dictionary->at( "%joe" ).size( ) == 1 && dictionary->at( "%joe" )[ 0 ] == 7 );
assert ( dictionary->at( "%went" ).size( ) == 1 && dictionary->at( "%went" )[ 0 ] == 9 );
assert ( dictionary->at( "%weird" ).size( ) == 1 && dictionary->at( "%weird" )[ 0 ] == 14 );
assert ( dictionary->at( "%store" ).size( ) == 3 && dictionary->at( "%store" )[ 0 ] == 10 &&
dictionary->at( "%store" )[ 1 ] == 15 && dictionary->at( "%store" )[ 2 ] == 18 );
assert ( dictionary->at( "%climb" ).size( ) == 1 && dictionary->at( "%climb" )[ 0 ] == 11 );
assert ( dictionary->at( "%mountain" ).size( ) == 1 && dictionary->at( "%mountain" )[ 0 ] == 13 );
assert ( dictionary->at( "%call" ).size( ) == 1 && dictionary->at( "%call" )[ 0 ] == 16 );
assert ( dictionary->at( "%food" ).size( ) == 1 && dictionary->at( "%food" )[ 0 ] == 17 );
delete dictionary;
dictionary = nullptr;
cout << "Body Test Passed!" << endl;
}
void testExtractBody ( )
{
cout << "Testing ExtractBody: " << endl;
ProducerConsumerQueue< ParsedUrl > urlFrontierTest;
Parser parser( &urlFrontierTest );
ParsedUrl fake_url = ParsedUrl( "https://developer.mozilla.org/en-US/docs/Learn" );
string filepath = util::GetCurrentWorkingDir( ) + "/tests/testExtractBodyTest.html";
LocalReader reader( filepath );
reader.setUrl( fake_url );
auto success = reader.request( );
if ( !success )
{
cerr << "Couldn't open file\n";
exit( 1 );
}
auto dictionary = parser.execute( &reader );
printDictionary( *dictionary );
assert( dictionary->size( ) == 15 );
// assert( dictionary->at( "%paragraph" )[ 0 ] == 0 );
// assert( dictionary->at( "%bodi" )[ 1 ] == 5 );
assert( dictionary->at( "=developer.mozilla.org/en-US/docs/Learn" )[ 0 ] == 0 );
assert( dictionary->at( "$develop" )[ 0 ] == 0 );
assert( dictionary->at( "%word" )[ 0 ] == 14 );
assert( dictionary->at( "$mozilla" )[ 0 ] == 1);
assert( dictionary->at( "%word" )[ 0 ] == 14 );
assert( dictionary->at( "$org" )[ 0 ] == 2 );
assert( dictionary->at( "#specif" )[ 0 ] == 6 );
delete dictionary;
dictionary = nullptr;
cout << "Extract Body Test Passed!" << endl;
}
\ No newline at end of file
......@@ -9,11 +9,9 @@
//
//using namespace std;
//
//void testSimple ( );
//
//void testComplex ( );
//
//void testURL ( );
//
//void testExtractBody ( );
//
......@@ -42,92 +40,7 @@ int main ( )
//\
//
//
//void testBody ( )
// {
// ProducerConsumerQueue< ParsedUrl > urlFrontierTest;
// ParsedUrl url = ParsedUrl( "http://www.testurl.com" );
// char docString[1024];
// strcpy( docString, "<!DOCTYPE html>\n"
// "<html>\n"
// "<head>\n"
// "<!-- HTML Codes by Quackit.com -->\n"
// "<title>\n"
// "Story of Cat</title>\n"
// "<meta name=\"viewport\" content=\"width=device-width, initial-scale=1\">\n"
// "<meta name=\"keywords\" content=\"cat story\">\n"
// "<meta name=\"description\" content=\"This is the tale of a cat names joe\">\n"
// "<style>\n"
// "body {background-color:#ffffff;background-repeat:no-repeat;background-position:top left;background-attachment:fixed;}\n"
// "h1{font-family:Arial, sans-serif;color:#000000;background-color:#ffffff;}\n"
// "p {font-family:Georgia, serif;font-size:14px;font-style:normal;font-weight:normal;color:#000000;background-color:#ffffff;}\n"
// "</style>\n"
// "</head>\n"
// "<body>\n"
// "<h1>Joe the cat</h1>\n"
// "<p>On Saturday, joe the cat went to the store. He climbed up a mountain? It was weird. The store was called Food Store</p>\n"
// "</body>\n"
// "</html>" );
// Document document( url, docString );
//
// Parser parser( &urlFrontierTest );
// auto dictionary = parser.execute( &document );
// cout << dictionary->size( ) << endl;
// //assert( dictionary->size( ) == 4);
// for ( auto it = dictionary->begin( ); it != dictionary->end( ); it++ )
// {
// cout << it->first << ':';
// for ( int i = 0; i < it->second.size( ); ++i )
// {
// cout << it->second[ i ] << " ";
// }
// cout << std::endl;
// }
// }
//
//void testExtractBody ( )
// {
// ProducerConsumerQueue< ParsedUrl > urlFrontierTest;
// ParsedUrl url = ParsedUrl( "http://www.testurl.com" );
// char docString[1024];
// strcpy( docString, "<title>Paragraph body text hello</title>" );
// Document document( url, docString );
//
// Parser parser( &urlFrontierTest );
// auto dictionary = parser.execute( &document );
// cout << dictionary->size( ) << endl;
// for ( auto it = dictionary->begin( ); it != dictionary->end( ); it++ )
// {
// cout << it->first << ':';
// for ( int i = 0; i < it->second.size( ); ++i )
// {
// cout << it->second[ i ] << " ";
// }
// cout << std::endl;
// }
// cout << endl << endl;
// assert( dictionary->size( ) == 6 );
//
// char docString2[1024];
// strcpy( docString2, "<p>Paragraph body text hello <title>Specific title</title> more body words</p>" );
// Document document2( url, docString2 );
// Parser parser2( &urlFrontierTest );
// dictionary = parser.execute( &document2 );
// cout << "Dictionary 2 size " << dictionary->size( ) << endl;
// for ( auto it = dictionary->begin( ); it != dictionary->end( ); it++ )
// {
// cout << it->first << ':';
// for ( int i = 0; i < it->second.size( ); ++i )
// {
// cout << it->second[ i ] << " ";
// }
// cout << std::endl;
// }
// assert( dictionary->size( ) == 10 );
// assert( dictionary->at( "#specif" )[ 0 ] == 0 );
// assert( dictionary->at( "%paragraph" )[ 0 ] == 0 );
// assert( dictionary->at( "%bodi" )[ 1 ] == 5 );
//
//
// }
//
//
"<p>Paragraph body text hello <title>Specific title</title> more body words</p>"
\ No newline at end of file
"<!DOCTYPE html>\n"
"<html>\n"
"<head>\n"
"<!-- HTML Codes by Quackit.com -->\n"
"<title>\n"
"Story of Cat</title>\n"
"<meta name=\"viewport\" content=\"width=device-width, initial-scale=1\">\n"
"<meta name=\"keywords\" content=\"cat story\">\n"
"<meta name=\"description\" content=\"This is the tale of a cat names joe\">\n"
"<style>\n"
"body {background-color:#ffffff;background-repeat:no-repeat;background-position:top left;background-attachment:fixed;}\n"
"h1{font-family:Arial, sans-serif;color:#000000;background-color:#ffffff;}\n"
"p {font-family:Georgia, serif;font-size:14px;font-style:normal;font-weight:normal;color:#000000;background-color:#ffffff;}\n"
"</style>\n"
"</head>\n"
"<body>\n"
"<h1>Joe the cat</h1>\n"
"<p>On Saturday, joe the cat went to the store. He climbed up a mountain? It was weird. The store was called Food Store</p>\n"
"</body>\n"
"</html>"
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment