Skip to content
Snippets Groups Projects
Commit b9a95bef authored by vcday's avatar vcday
Browse files

fixed stemmer

parent d69dc520
Branches url-parsing
No related tags found
No related merge requests found
......@@ -49,7 +49,6 @@ void Parser::parse ( string html, Tokenizer *tokenizer )
if ( url != "" )
{
urlFrontier->Push( url );
cout << url << endl;
}
// check if line is title
else
......
......@@ -12,34 +12,14 @@ void testSimple ( );
void testComplex ( );
void testURL ( );
int main ( )
{
cout << "Testing Parser ... " << endl << endl;
const char * line = "<li><span class=\"official-website\"><span class=\"url\"><a rel=\"nofollow\" class=\"external text\" href=\"http://www.bafta.org/\">Official website</a></span></span></li>";
ProducerConsumerQueue< string > urlFrontierTest;
ParsedUrl url = ParsedUrl( "testurl.com" );
char docString[10240];
strcpy( docString, line );
Document document( url, docString );
Parser parser( &urlFrontierTest );
auto dict = parser.execute( &document );
for ( auto it = dict->begin( ); it != dict->end( ); it++ )
{
cout << it->first << ':';
for ( int i = 0; i < it->second.size( ); ++i )
{
cout << it->second[ i ] << " ";
}
cout << std::endl;
}
// testSimple( );
// testComplex( );
testURL ( );
testSimple( );
testComplex( );
cout << "Parser Tests Passed! :D" << endl;
}
......@@ -48,7 +28,7 @@ void testSimple ( )
{
ProducerConsumerQueue< string > urlFrontierTest;
ParsedUrl url = ParsedUrl( "testurl.com" );
ParsedUrl url = ParsedUrl( "http://www.testurl.com" );
char docString[10240];
strcpy( docString, "<title>This Cat Title Cat</title>" );
Document document( url, docString );
......@@ -105,3 +85,19 @@ void testComplex ( )
delete[] writable;
}
void testURL ( )
{
const char *line = "<li><span class=\"official-website\"><span class=\"url\"><a rel=\"nofollow\" class=\"external text\" href=\"http://www.bafta.org/\">Official website</a></span></span></li>";
ProducerConsumerQueue< string > urlFrontierTest;
ParsedUrl url = ParsedUrl( "testurl.com" );
char docString[10240];
strcpy( docString, line );
Document document( url, docString );
Parser parser( &urlFrontierTest );
auto dict = parser.execute( &document );
assert ( urlFrontierTest.Pop( ) == "http://www.bafta.org/");
delete dict;
}
\ No newline at end of file
......@@ -323,7 +323,7 @@ std::string Stemmer::step1b ( std::string word )
else if ( *substrING != '\0' && isVowelPresent( word.begin( ), substrING, word ) )
{
wordStem = subStr( word.begin( ), substrING );
if ( addE( wordStem ) || ( m == 1 && endCVC( wordStem + 'e' ) ) )
if ( addE( wordStem ) || ( measure ( wordStem ) == 1 && endCVC( wordStem + 'e' ) ) )
{
wordStem += 'e';
}
......
......@@ -41,5 +41,6 @@ void testExecute ( string original )
}
cout << std::endl;
}
delete dict;
}
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment