Skip to content
Snippets Groups Projects
Commit a041c8ff authored by vcday's avatar vcday
Browse files

parser tests pass

parent 2ceeed1b
No related branches found
No related tags found
No related merge requests found
...@@ -59,12 +59,17 @@ private: ...@@ -59,12 +59,17 @@ private:
string tokenizerInput = ""; string tokenizerInput = "";
string currentTerm = ""; string currentTerm = "";
for ( int i = 0; i < html.size ( ); ++i ) int index = 0;
while (index != html.size())
{ {
while ( html.at( i ) != '\n' ) currentTerm = "";
while ( html.at( index ) != '\n' )
{ {
currentTerm += html[ i ]; currentTerm += html[ index ];
++index;
} }
++index;
string url = extract_url ( currentTerm ); string url = extract_url ( currentTerm );
if (url != "") if (url != "")
...@@ -121,14 +126,15 @@ private: ...@@ -121,14 +126,15 @@ private:
string extract_title ( string & word ) string extract_title ( string & word )
{ {
string title = ""; string title = "";
char end = '<';
auto pos = findStr ( "<title>", word ); auto pos = findStr ( "<title>", word );
if ( *pos != '\0') if ( *pos != '\0')
{ {
pos += 6; pos += 7;
while ( *pos != *findStr ( "</title>", word ) ) while ( *pos != end )
{ {
++pos;
title += *pos; title += *pos;
++pos;
} }
} }
return title; return title;
......
...@@ -15,30 +15,19 @@ int main ( ) ...@@ -15,30 +15,19 @@ int main ( )
{ {
cout << "Testing Parser ... " << endl << endl; cout << "Testing Parser ... " << endl << endl;
ProducerConsumerQueue < string > * urlFrontierTest; ProducerConsumerQueue < string > * urlFrontierTest;
Document document ( "<!DOCTYPE html>\n" Document document ( "<title>This Cat Title Cat</title>\n" );
"<html>\n"
"<head>\n"
"<!-- HTML Codes by Quackit.com -->\n"
"<title>\n"
"Story of Cat</title>\n"
"<meta name=\"viewport\" content=\"width=device-width, initial-scale=1\">\n"
"<meta name=\"keywords\" content=\"cat story\">\n"
"<meta name=\"description\" content=\"This is the tale of a cat names joe\">\n"
"<style>\n"
"body {background-color:#ffffff;background-repeat:no-repeat;background-position:top left;background-attachment:fixed;}\n"
"h1{font-family:Arial, sans-serif;color:#000000;background-color:#ffffff;}\n"
"p {font-family:Georgia, serif;font-size:14px;font-style:normal;font-weight:normal;color:#000000;background-color:#ffffff;}\n"
"</style>\n"
"</head>\n"
"<body>\n"
"<h1>Joe the cat</h1>\n"
"<p>On Saturday, joe the cat went to the store. He climbed up a mountain? It was weird. The store was called Food Store</p>\n"
"</body>\n"
"</html>" );
Parser parser ( urlFrontierTest ); Parser parser ( urlFrontierTest );
auto dictionary = parser.execute ( &document ); auto dictionary = parser.execute ( &document );
assert( dictionary != nullptr );
assert ( dictionary != nullptr );
assert ( dictionary->size () == 2);
assert ( dictionary->find ( "cat" ) != dictionary->end () );
assert ( dictionary->find ( "title" ) != dictionary->end () );
assert ( dictionary->find ( "this" ) == dictionary->end () );
assert ( dictionary->at ( "cat" )[ 0 ] == 0 && dictionary->at ( "cat" )[ 1 ] == 2 );
assert ( dictionary->at ( "title" )[ 0 ] == 1 );
cout << "Parser Tests Passed! :D" << endl; cout << "Parser Tests Passed! :D" << endl;
} }
......
...@@ -28,7 +28,7 @@ set< string > stopWords = { "a", "all", "an", "and", "any", "are", "as", "at", " ...@@ -28,7 +28,7 @@ set< string > stopWords = { "a", "all", "an", "and", "any", "are", "as", "at", "
* @param needle * @param needle
* @return * @return
*/ */
string::iterator findStr ( string haystack, string needle ) string::iterator findStr (string needle, string haystack )
{ {
auto beginNeedle = needle.begin ( ); auto beginNeedle = needle.begin ( );
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment