Newer
Older
//
// Created by anvia on 1/31/2018.
//
#ifndef EECS398_SEARCH_STRINGPROCESSING_H
#define EECS398_SEARCH_STRINGPROCESSING_H
#include <string>
#include <unordered_map>
#include <vector>
/**
* Set of stopwords
*/
set< string > stopWords = { "a", "all", "an", "and", "any", "are", "as", "at", "be", "been", "but", "by", "few", "from",
"for", "have", "he", "her", "here", "him", "his", "how",
"i", "in", "is", "it", "its", "many ", "me", "my", "none", "of", "on", "or", "our", "she",
"some", "the", "their", "them", "there", "they", "that",
"this", "to", "us", "was", "what", "when", "where", "which", "who", "why", "will", "with",
"you", "your" };
/**
* Finds the needle in the haystack
auto beginNeedle = needle.begin ( );
auto beginHaystack = haystack.begin();
{
/* want to keep the original iterator where it is so it
can return the beginning of the matched word if found */
auto temp = beginHaystack;
while ( *temp == *beginNeedle )
++beginNeedle;
//if it hits the end of the needleing, it signifies an exact match
if ( *beginNeedle == '\0' )
}
}
//need to reset because still has to search rest of the string for a match
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
/**
* Finds the next position of the needle in the string
* @param needle
* @param pointer
* @return string::iterator
*/
string::iterator findNext (string needle, string::iterator haystackPointer )
{
auto beginNeedle = needle.begin ( );
auto beginHaystack = haystackPointer;
while ( *beginHaystack != '\0' )
{
//keep looking for instance of a match
if ( *beginHaystack != *beginNeedle )
{
++beginHaystack;
}
else if ( *beginHaystack == *beginNeedle )
{
/* want to keep the original iterator where it is so it
can return the beginning of the matched word if found */
auto temp = beginHaystack;
while ( *temp == *beginNeedle )
{
++temp;
++beginNeedle;
//if it hits the end of the needleing, it signifies an exact match
if ( *beginNeedle == '\0' )
{
//this is pointing at the beginning of the match
return beginHaystack;
}
}
//need to reset because still has to search rest of the string for a match
beginNeedle = needle.begin ( );
//sets the original text pointer to where the last search left off
beginHaystack = temp;
}
else
{
//DO NOTHING
}
}
return beginHaystack;
}
/**
* Returns a vector of strings from @originalText, split by @delim
* @param originalText
* @param delim
* @return
*/
vector< string > splitStr ( string originalText, char delim )
{
vector< string > splitWords;
auto begin = originalText.begin ( );
while ( *begin != '\0' )
{
string word = "";
while ( *begin != delim && *begin != '\0' )
{
word += *begin;
++begin;
}
splitWords.push_back ( word );
++begin;
}
return splitWords;
}
/**
* Returns true if @word is a stopword
* @param word
* @return
*/
bool isStopWord ( string word )
{
return ( stopWords.find ( word ) != stopWords.end ( ) );
}
/**
* Returns lowercase @word
* @param word
* @return
*/
string toLower ( string word )
{
auto iter = word.begin ( );
string lowerWord = "";
while ( *iter != '\0' )
{
if ( *iter >= 'A' && *iter <= 'Z' )
{
lowerWord += ( *iter + 32 );
}
else
{
lowerWord += *iter;
}
++iter;
}
return lowerWord;
}
//TODO
/**
* Returns stemmed @word
* @param word
* @return
*/
string stemWord(string word)
{
return "";
}