diff --git a/parser/Parser.cpp b/parser/Parser.cpp index 1df6081b473ab5a24bbfb1019ecf6bec958430b5..99b46d46c1bc481cc270bdeb9e03735e127b5eee 100644 --- a/parser/Parser.cpp +++ b/parser/Parser.cpp @@ -49,7 +49,6 @@ void Parser::parse ( string html, Tokenizer *tokenizer ) if ( url != "" ) { urlFrontier->Push( url ); - cout << url << endl; } // check if line is title else diff --git a/parser/tests/parserTest.cpp b/parser/tests/parserTest.cpp index 48ec8c50031e517e988f684d93aaf0502149372b..7cd5f2b97dffabafb16b3a1bb7e531f3f61aec74 100644 --- a/parser/tests/parserTest.cpp +++ b/parser/tests/parserTest.cpp @@ -12,34 +12,14 @@ void testSimple ( ); void testComplex ( ); +void testURL ( ); int main ( ) { cout << "Testing Parser ... " << endl << endl; - - const char * line = "<li><span class=\"official-website\"><span class=\"url\"><a rel=\"nofollow\" class=\"external text\" href=\"http://www.bafta.org/\">Official website</a></span></span></li>"; - - ProducerConsumerQueue< string > urlFrontierTest; - ParsedUrl url = ParsedUrl( "testurl.com" ); - char docString[10240]; - strcpy( docString, line ); - Document document( url, docString ); - - Parser parser( &urlFrontierTest ); - auto dict = parser.execute( &document ); - - for ( auto it = dict->begin( ); it != dict->end( ); it++ ) - { - cout << it->first << ':'; - for ( int i = 0; i < it->second.size( ); ++i ) - { - cout << it->second[ i ] << " "; - } - cout << std::endl; - } - -// testSimple( ); -// testComplex( ); + testURL ( ); + testSimple( ); + testComplex( ); cout << "Parser Tests Passed! :D" << endl; } @@ -48,7 +28,7 @@ void testSimple ( ) { ProducerConsumerQueue< string > urlFrontierTest; - ParsedUrl url = ParsedUrl( "testurl.com" ); + ParsedUrl url = ParsedUrl( "http://www.testurl.com" ); char docString[10240]; strcpy( docString, "<title>This Cat Title Cat</title>" ); Document document( url, docString ); @@ -105,3 +85,19 @@ void testComplex ( ) delete[] writable; } + +void testURL ( ) + { + const char *line = "<li><span class=\"official-website\"><span class=\"url\"><a rel=\"nofollow\" class=\"external text\" href=\"http://www.bafta.org/\">Official website</a></span></span></li>"; + + ProducerConsumerQueue< string > urlFrontierTest; + ParsedUrl url = ParsedUrl( "testurl.com" ); + char docString[10240]; + strcpy( docString, line ); + Document document( url, docString ); + + Parser parser( &urlFrontierTest ); + auto dict = parser.execute( &document ); + assert ( urlFrontierTest.Pop( ) == "http://www.bafta.org/"); + delete dict; + } \ No newline at end of file diff --git a/util/Stemmer.cpp b/util/Stemmer.cpp index 52f47c1bb15b2c6d64705779a044f289e4249768..952445fae8360c83c1709b6cd1ccded5d9185c1f 100644 --- a/util/Stemmer.cpp +++ b/util/Stemmer.cpp @@ -323,7 +323,7 @@ std::string Stemmer::step1b ( std::string word ) else if ( *substrING != '\0' && isVowelPresent( word.begin( ), substrING, word ) ) { wordStem = subStr( word.begin( ), substrING ); - if ( addE( wordStem ) || ( m == 1 && endCVC( wordStem + 'e' ) ) ) + if ( addE( wordStem ) || ( measure ( wordStem ) == 1 && endCVC( wordStem + 'e' ) ) ) { wordStem += 'e'; } diff --git a/util/tests/tokenizerTest.cpp b/util/tests/tokenizerTest.cpp index 4755059f67629df14ec8f1a6eb164d067fc908d0..a89e22d6fcbde1978eecee2b32fee5c6fb6f8e2d 100644 --- a/util/tests/tokenizerTest.cpp +++ b/util/tests/tokenizerTest.cpp @@ -41,5 +41,6 @@ void testExecute ( string original ) } cout << std::endl; } + delete dict; }