Skip to content
Snippets Groups Projects
Commit b9a95bef authored by vcday's avatar vcday
Browse files

fixed stemmer

parent d69dc520
No related branches found
No related tags found
No related merge requests found
...@@ -49,7 +49,6 @@ void Parser::parse ( string html, Tokenizer *tokenizer ) ...@@ -49,7 +49,6 @@ void Parser::parse ( string html, Tokenizer *tokenizer )
if ( url != "" ) if ( url != "" )
{ {
urlFrontier->Push( url ); urlFrontier->Push( url );
cout << url << endl;
} }
// check if line is title // check if line is title
else else
......
...@@ -12,34 +12,14 @@ void testSimple ( ); ...@@ -12,34 +12,14 @@ void testSimple ( );
void testComplex ( ); void testComplex ( );
void testURL ( );
int main ( ) int main ( )
{ {
cout << "Testing Parser ... " << endl << endl; cout << "Testing Parser ... " << endl << endl;
testURL ( );
const char * line = "<li><span class=\"official-website\"><span class=\"url\"><a rel=\"nofollow\" class=\"external text\" href=\"http://www.bafta.org/\">Official website</a></span></span></li>"; testSimple( );
testComplex( );
ProducerConsumerQueue< string > urlFrontierTest;
ParsedUrl url = ParsedUrl( "testurl.com" );
char docString[10240];
strcpy( docString, line );
Document document( url, docString );
Parser parser( &urlFrontierTest );
auto dict = parser.execute( &document );
for ( auto it = dict->begin( ); it != dict->end( ); it++ )
{
cout << it->first << ':';
for ( int i = 0; i < it->second.size( ); ++i )
{
cout << it->second[ i ] << " ";
}
cout << std::endl;
}
// testSimple( );
// testComplex( );
cout << "Parser Tests Passed! :D" << endl; cout << "Parser Tests Passed! :D" << endl;
} }
...@@ -48,7 +28,7 @@ void testSimple ( ) ...@@ -48,7 +28,7 @@ void testSimple ( )
{ {
ProducerConsumerQueue< string > urlFrontierTest; ProducerConsumerQueue< string > urlFrontierTest;
ParsedUrl url = ParsedUrl( "testurl.com" ); ParsedUrl url = ParsedUrl( "http://www.testurl.com" );
char docString[10240]; char docString[10240];
strcpy( docString, "<title>This Cat Title Cat</title>" ); strcpy( docString, "<title>This Cat Title Cat</title>" );
Document document( url, docString ); Document document( url, docString );
...@@ -105,3 +85,19 @@ void testComplex ( ) ...@@ -105,3 +85,19 @@ void testComplex ( )
delete[] writable; delete[] writable;
} }
void testURL ( )
{
const char *line = "<li><span class=\"official-website\"><span class=\"url\"><a rel=\"nofollow\" class=\"external text\" href=\"http://www.bafta.org/\">Official website</a></span></span></li>";
ProducerConsumerQueue< string > urlFrontierTest;
ParsedUrl url = ParsedUrl( "testurl.com" );
char docString[10240];
strcpy( docString, line );
Document document( url, docString );
Parser parser( &urlFrontierTest );
auto dict = parser.execute( &document );
assert ( urlFrontierTest.Pop( ) == "http://www.bafta.org/");
delete dict;
}
\ No newline at end of file
...@@ -323,7 +323,7 @@ std::string Stemmer::step1b ( std::string word ) ...@@ -323,7 +323,7 @@ std::string Stemmer::step1b ( std::string word )
else if ( *substrING != '\0' && isVowelPresent( word.begin( ), substrING, word ) ) else if ( *substrING != '\0' && isVowelPresent( word.begin( ), substrING, word ) )
{ {
wordStem = subStr( word.begin( ), substrING ); wordStem = subStr( word.begin( ), substrING );
if ( addE( wordStem ) || ( m == 1 && endCVC( wordStem + 'e' ) ) ) if ( addE( wordStem ) || ( measure ( wordStem ) == 1 && endCVC( wordStem + 'e' ) ) )
{ {
wordStem += 'e'; wordStem += 'e';
} }
......
...@@ -41,5 +41,6 @@ void testExecute ( string original ) ...@@ -41,5 +41,6 @@ void testExecute ( string original )
} }
cout << std::endl; cout << std::endl;
} }
delete dict;
} }
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment