Skip to content
Snippets Groups Projects
Commit a041c8ff authored by vcday's avatar vcday
Browse files

parser tests pass

parent 2ceeed1b
No related branches found
No related tags found
No related merge requests found
......@@ -59,12 +59,17 @@ private:
string tokenizerInput = "";
string currentTerm = "";
for ( int i = 0; i < html.size ( ); ++i )
int index = 0;
while (index != html.size())
{
while ( html.at( i ) != '\n' )
currentTerm = "";
while ( html.at( index ) != '\n' )
{
currentTerm += html[ i ];
currentTerm += html[ index ];
++index;
}
++index;
string url = extract_url ( currentTerm );
if (url != "")
......@@ -121,14 +126,15 @@ private:
string extract_title ( string & word )
{
string title = "";
char end = '<';
auto pos = findStr ( "<title>", word );
if ( *pos != '\0')
{
pos += 6;
while ( *pos != *findStr ( "</title>", word ) )
pos += 7;
while ( *pos != end )
{
++pos;
title += *pos;
++pos;
}
}
return title;
......
......@@ -15,30 +15,19 @@ int main ( )
{
cout << "Testing Parser ... " << endl << endl;
ProducerConsumerQueue < string > * urlFrontierTest;
Document document ( "<!DOCTYPE html>\n"
"<html>\n"
"<head>\n"
"<!-- HTML Codes by Quackit.com -->\n"
"<title>\n"
"Story of Cat</title>\n"
"<meta name=\"viewport\" content=\"width=device-width, initial-scale=1\">\n"
"<meta name=\"keywords\" content=\"cat story\">\n"
"<meta name=\"description\" content=\"This is the tale of a cat names joe\">\n"
"<style>\n"
"body {background-color:#ffffff;background-repeat:no-repeat;background-position:top left;background-attachment:fixed;}\n"
"h1{font-family:Arial, sans-serif;color:#000000;background-color:#ffffff;}\n"
"p {font-family:Georgia, serif;font-size:14px;font-style:normal;font-weight:normal;color:#000000;background-color:#ffffff;}\n"
"</style>\n"
"</head>\n"
"<body>\n"
"<h1>Joe the cat</h1>\n"
"<p>On Saturday, joe the cat went to the store. He climbed up a mountain? It was weird. The store was called Food Store</p>\n"
"</body>\n"
"</html>" );
Document document ( "<title>This Cat Title Cat</title>\n" );
Parser parser ( urlFrontierTest );
auto dictionary = parser.execute ( &document );
assert( dictionary != nullptr );
assert ( dictionary != nullptr );
assert ( dictionary->size () == 2);
assert ( dictionary->find ( "cat" ) != dictionary->end () );
assert ( dictionary->find ( "title" ) != dictionary->end () );
assert ( dictionary->find ( "this" ) == dictionary->end () );
assert ( dictionary->at ( "cat" )[ 0 ] == 0 && dictionary->at ( "cat" )[ 1 ] == 2 );
assert ( dictionary->at ( "title" )[ 0 ] == 1 );
cout << "Parser Tests Passed! :D" << endl;
}
......
......@@ -28,7 +28,7 @@ set< string > stopWords = { "a", "all", "an", "and", "any", "are", "as", "at", "
* @param needle
* @return
*/
string::iterator findStr ( string haystack, string needle )
string::iterator findStr (string needle, string haystack )
{
auto beginNeedle = needle.begin ( );
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment