Stemmer.h 2.17 KiB
#pragma once
#include <string>
#include "stringProcessing.h"
/**
* Modeled after the Porter Stemmer algorithm
* http://snowball.tartarus.org/algorithms/porter/stemmer.html
*/
class Stemmer
{
public:
/**
* Stemmer Cstor
*/
Stemmer ( );
/**
* Returns the stem of a word
*
* @param word
* @return
*/
std::string execute ( std::string word );
private:
/**
* Number of consonant sequences
*
* <c><v> -> 0
* <c>vc<v> -> 1
* <c>vcvc<v> -> 2
* <c>vcvcvc<v> -> 3
*
* @param word
* @return
*/
int measure ( std::string word );
/**
* Check if a vowel is present in the stem
*
* @param wordBeg
* @param wordEnd
* @param word
* @return
*/
bool isVowelPresent ( string::iterator wordBeg, string::iterator wordEnd, string word );
/**
* Return true if the wordIt points to a consonant
*
* @param wordIt
* @param wordBegin
* @return
*/
bool isConsonant ( string::iterator wordIt, string::iterator wordBegin );
/**
* Returns true if should add 'e' to end
*
* @param word
* @return
*/
bool addE ( string word );
/**
* Returns true if word ends in double constant
* Not LL, SS, ZZ
* @param word
* @return
*/
bool doubleCon ( string word );
/**
* Returns true if a word ends in a
* Consonant, Vowel, Consonant pattern
* Except when second C is W, X, or Y
*
* @param word
* @return
*/
bool endCVC ( std::string word );
/**
* Stem plural words
*
* @param word
* @return
*/
std::string step1a ( std::string word );
/**
* Stem ED and ING
*
* @param word
* @return
*/
std::string step1b ( std::string word );
/**
* Checks for Y -> I
*
* @param word
* @return
*/
string step1c ( string word );
/**
* Step 2
*
* @param word
* @return
*/
string step2 ( std::string word );
/**
* Step 3
*
* @param word
* @return
*/
std::string step3 ( std::string word );
/**
* Step 4
*
* @param word
* @return
*/
std::string step4 ( std::string word );
/**
* Step 5a
*
* @param word
* @return
*/
std::string step5a ( std::string word );
/**
* Step 5b
*
* @param word
* @return
*/
std::string step5b ( std::string word );
};