From 21ea70137b89eda46179057f0d0cc0c284579a47 Mon Sep 17 00:00:00 2001
From: vcday <vcday@umich.edu>
Date: Wed, 21 Mar 2018 20:43:11 -0400
Subject: [PATCH] parser2test

---
 crawler/Readers/LocalReader.cpp |  2 +-
 tests/plaintext.txt             |  8 +++--
 util/Stemmer.cpp                | 59 +++++++++++++++++++++++++++++++--
 util/tests/stemmerTest.cpp      |  2 ++
 4 files changed, 65 insertions(+), 6 deletions(-)

diff --git a/crawler/Readers/LocalReader.cpp b/crawler/Readers/LocalReader.cpp
index 0810591..cef70ba 100644
--- a/crawler/Readers/LocalReader.cpp
+++ b/crawler/Readers/LocalReader.cpp
@@ -31,7 +31,7 @@ string LocalReader::PageToString ( )
 
 ParsedUrl LocalReader::getUrl ( )
 	{
-	ParsedUrl url(fileName); //Fixme
+	ParsedUrl url(test_url);
 	return url;
 	}
 
diff --git a/tests/plaintext.txt b/tests/plaintext.txt
index b570f9c..5a34b1d 100644
--- a/tests/plaintext.txt
+++ b/tests/plaintext.txt
@@ -2,7 +2,11 @@
 <title>
 Apple Ardvark Anteater Alligator
 </title>
-<body>
+<p class="text-muted">
 Basement Battle Bridge Bottle
-</body>
+</p>
+<p class="text-muted">
+Hello Goodbye <a href="http://veronicaday.com/" class="btn btn-yes">
+Cat Cradle
+</p>
 </html>
\ No newline at end of file
diff --git a/util/Stemmer.cpp b/util/Stemmer.cpp
index 7b17f21..674a93b 100644
--- a/util/Stemmer.cpp
+++ b/util/Stemmer.cpp
@@ -49,6 +49,12 @@ int Stemmer::measure ( std::string word )
 	int m = 0;
 	unsigned long wordIt = 0;
 	unsigned long wordEnd = word.size( ) - 1;
+
+	if ( word.empty( ) )
+		{
+		return 0;
+		}
+
 	// Looking for CVC pattern
 	while ( wordIt <= wordEnd )
 		{
@@ -108,6 +114,12 @@ int Stemmer::measure ( std::string word )
  */
 bool Stemmer::isVowelPresent ( unsigned long wordBeg, unsigned long wordEnd, string word )
 	{
+
+	if ( word.empty( ) )
+		{
+		return false;
+		}
+
 	while ( wordBeg != wordEnd && wordBeg < word.size( ) )
 		{
 		if ( !isConsonant( wordBeg, word ) )
@@ -159,6 +171,11 @@ bool Stemmer::isConsonant ( unsigned long wordIt, string word )
  */
 bool Stemmer::addE ( string word )
 	{
+	if ( word.empty( ) )
+		{
+		return false;
+		}
+
 	// AT -> ATE
 	// BL -> BLE
 	// IZ -> IZE
@@ -182,6 +199,11 @@ bool Stemmer::addE ( string word )
  */
 bool Stemmer::doubleCon ( string word )
 	{
+	if ( word.empty( ) )
+		{
+		return false;
+		}
+
 	unsigned long endWord = word.size( ) - 1;
 
 	if ( word.size( ) > 2 && word[ endWord ] == word[ endWord - 1 ] )
@@ -209,6 +231,12 @@ bool Stemmer::doubleCon ( string word )
  */
 bool Stemmer::endCVC ( std::string word )
 	{
+
+	if ( word.empty( ) )
+		{
+		return false;
+		}
+
 	unsigned long endWord = word.size( ) - 1;
 
 	if ( word.size( ) > 2 )
@@ -236,6 +264,11 @@ bool Stemmer::endCVC ( std::string word )
 std::string Stemmer::step1a ( std::string word )
 	{
 
+	if ( word.empty( ) )
+		{
+		return word;
+		}
+
 	// check S at end
 	if ( word[ word.size( ) - 1 ] == 's' && word.size() != 1)
 		{
@@ -286,6 +319,12 @@ std::string Stemmer::step1a ( std::string word )
  */
 std::string Stemmer::step1b ( std::string word )
 	{
+
+	if ( word.empty( ) )
+		{
+		return word;
+		}
+
 	unsigned long end = word.size( ) - 1;
 	auto begPtr = word.begin( );
 	auto endPtr = begPtr + end;
@@ -349,6 +388,10 @@ std::string Stemmer::step1b ( std::string word )
 string Stemmer::step1c ( string word )
 	{
 
+	if ( word.empty( ) )
+		{
+		return word;
+		}
 	// Y -> I
 	// happy -> happi
 	// sky -> sky
@@ -371,7 +414,7 @@ string Stemmer::step1c ( string word )
  */
 string Stemmer::step2 ( std::string word )
 	{
-	if ( measure( word ) == 0 )
+	if ( measure( word ) == 0 || word.empty( ) )
 		{
 		return word;
 		}
@@ -528,7 +571,7 @@ string Stemmer::step2 ( std::string word )
 std::string Stemmer::step3 ( std::string word )
 	{
 
-	if ( measure( word ) == 0 )
+	if ( measure( word ) == 0 || word.empty( ) )
 		{
 		return word;
 		}
@@ -592,7 +635,7 @@ std::string Stemmer::step3 ( std::string word )
  */
 std::string Stemmer::step4 ( std::string word )
 	{
-	if ( measure( word ) <= 2 )
+	if ( measure( word ) <= 2 || word.empty( ) )
 		{
 		return word;
 		}
@@ -730,6 +773,11 @@ std::string Stemmer::step4 ( std::string word )
  */
 std::string Stemmer::step5a ( std::string word )
 	{
+	if ( word.empty( ) )
+		{
+		return word;
+		}
+
 	auto m = measure( word );
 	// E ->
 	// probabte -> probat
@@ -757,6 +805,11 @@ std::string Stemmer::step5a ( std::string word )
  */
 std::string Stemmer::step5b ( std::string word )
 	{
+	if ( word.empty( ) )
+		{
+		return word;
+		}
+
 	if ( word.size( ) > 2 && measure( word ) > 1 && word[ word.size( ) - 1 ] == 'l' && word[ word.size( ) - 2 ] == 'l' )
 		{
 		word = subStr( word, 0, word.size( ) - 1 );
diff --git a/util/tests/stemmerTest.cpp b/util/tests/stemmerTest.cpp
index c49415b..79fabfa 100644
--- a/util/tests/stemmerTest.cpp
+++ b/util/tests/stemmerTest.cpp
@@ -110,6 +110,8 @@ int main ( )
 	assert ( stem.execute( "are" ) == "ar" );
 	assert ( stem.execute( "terrible" ) == "terribl" );
 
+	assert ( stem.execute( "" ) == "" );
+	assert ( stem.execute( "s" ) == "s" );
 
 	cout << "\nTests passed for Stemmer :D" << endl;
 
-- 
GitLab