From b9a95befbaddb23f60359d739caf9a1ee0930c1d Mon Sep 17 00:00:00 2001
From: vcday <vcday@umich.edu>
Date: Thu, 15 Mar 2018 23:53:32 -0400
Subject: [PATCH] fixed stemmer

---
 parser/Parser.cpp            |  1 -
 parser/tests/parserTest.cpp  | 46 ++++++++++++++++--------------------
 util/Stemmer.cpp             |  2 +-
 util/tests/tokenizerTest.cpp |  1 +
 4 files changed, 23 insertions(+), 27 deletions(-)

diff --git a/parser/Parser.cpp b/parser/Parser.cpp
index 1df6081..99b46d4 100644
--- a/parser/Parser.cpp
+++ b/parser/Parser.cpp
@@ -49,7 +49,6 @@ void Parser::parse ( string html, Tokenizer *tokenizer )
 			if ( url != "" )
 				{
 				urlFrontier->Push( url );
-				cout << url << endl;
 				}
 				// check if line is title
 			else
diff --git a/parser/tests/parserTest.cpp b/parser/tests/parserTest.cpp
index 48ec8c5..7cd5f2b 100644
--- a/parser/tests/parserTest.cpp
+++ b/parser/tests/parserTest.cpp
@@ -12,34 +12,14 @@ void testSimple ( );
 
 void testComplex ( );
 
+void testURL ( );
 
 int main ( )
 	{
 	cout << "Testing Parser ... " << endl << endl;
-
-	const char * line = "<li><span class=\"official-website\"><span class=\"url\"><a rel=\"nofollow\" class=\"external text\" href=\"http://www.bafta.org/\">Official website</a></span></span></li>";
-
-	ProducerConsumerQueue< string > urlFrontierTest;
-	ParsedUrl url = ParsedUrl( "testurl.com" );
-	char docString[10240];
-	strcpy( docString, line );
-	Document document( url, docString );
-
-	Parser parser( &urlFrontierTest );
-	auto dict = parser.execute( &document );
-
-	for ( auto it = dict->begin( ); it != dict->end( ); it++ )
-		{
-		cout << it->first << ':';
-		for ( int i = 0; i < it->second.size( ); ++i )
-			{
-			cout << it->second[ i ] << " ";
-			}
-		cout << std::endl;
-		}
-
-//	testSimple( );
-//	testComplex( );
+	testURL ( );
+	testSimple( );
+	testComplex( );
 	cout << "Parser Tests Passed! :D" << endl;
 
 	}
@@ -48,7 +28,7 @@ void testSimple ( )
 	{
 
 	ProducerConsumerQueue< string > urlFrontierTest;
-	ParsedUrl url = ParsedUrl( "testurl.com" );
+	ParsedUrl url = ParsedUrl( "http://www.testurl.com" );
 	char docString[10240];
 	strcpy( docString, "<title>This Cat Title Cat</title>" );
 	Document document( url, docString );
@@ -105,3 +85,19 @@ void testComplex ( )
 	delete[] writable;
 
 	}
+
+void testURL ( )
+	{
+	const char *line = "<li><span class=\"official-website\"><span class=\"url\"><a rel=\"nofollow\" class=\"external text\" href=\"http://www.bafta.org/\">Official website</a></span></span></li>";
+
+	ProducerConsumerQueue< string > urlFrontierTest;
+	ParsedUrl url = ParsedUrl( "testurl.com" );
+	char docString[10240];
+	strcpy( docString, line );
+	Document document( url, docString );
+
+	Parser parser( &urlFrontierTest );
+	auto dict = parser.execute( &document );
+	assert ( urlFrontierTest.Pop( ) == "http://www.bafta.org/");
+	delete dict;
+	}
\ No newline at end of file
diff --git a/util/Stemmer.cpp b/util/Stemmer.cpp
index 52f47c1..952445f 100644
--- a/util/Stemmer.cpp
+++ b/util/Stemmer.cpp
@@ -323,7 +323,7 @@ std::string Stemmer::step1b ( std::string word )
 	else if ( *substrING != '\0' && isVowelPresent( word.begin( ), substrING, word ) )
 		{
 		wordStem = subStr( word.begin( ), substrING );
-		if ( addE( wordStem ) || ( m == 1 && endCVC( wordStem + 'e' ) ) )
+		if ( addE( wordStem ) || ( measure ( wordStem ) == 1 && endCVC( wordStem + 'e' ) ) )
 			{
 			wordStem += 'e';
 			}
diff --git a/util/tests/tokenizerTest.cpp b/util/tests/tokenizerTest.cpp
index 4755059..a89e22d 100644
--- a/util/tests/tokenizerTest.cpp
+++ b/util/tests/tokenizerTest.cpp
@@ -41,5 +41,6 @@ void testExecute ( string original )
 			}
 		cout << std::endl;
 		}
+	delete dict;
 
 	}
-- 
GitLab