Skip to content
Snippets Groups Projects
stringProcessing.h 4.26 KiB
Newer Older
  • Learn to ignore specific revisions
  • aanvi's avatar
    aanvi committed
    //
    // Created by anvia on 1/31/2018.
    //
    
    #ifndef EECS398_SEARCH_STRINGPROCESSING_H
    #define EECS398_SEARCH_STRINGPROCESSING_H
    
    #include <string>
    #include <unordered_map>
    #include <vector>
    
    vcday's avatar
    vcday committed
    #include <set>
    
    aanvi's avatar
    aanvi committed
    #include <iterator>
    
    aanvi's avatar
    aanvi committed
    
    using namespace std;
    
    
    vcday's avatar
    vcday committed
    /**
     * Set of stopwords
     */
    set< string > stopWords = { "a", "all", "an", "and", "any", "are", "as", "at", "be", "been", "but", "by", "few", "from",
                                "for", "have", "he", "her", "here", "him", "his", "how",
                                "i", "in", "is", "it", "its", "many ", "me", "my", "none", "of", "on", "or", "our", "she",
                                "some", "the", "their", "them", "there", "they", "that",
                                "this", "to", "us", "was", "what", "when", "where", "which", "who", "why", "will", "with",
                                "you", "your" };
    /**
     * Finds the needle in the haystack
    
    vcday's avatar
    vcday committed
     * returns position of first match
    
    vcday's avatar
    vcday committed
     * @param haystack
     * @param needle
    
    vcday's avatar
    vcday committed
     * @return string::iterator
    
    aanvi's avatar
    aanvi committed
     */
    
    vcday's avatar
    vcday committed
    string::iterator findStr (string needle, string haystack )
    
    vcday's avatar
    vcday committed
    	{
    
    
    vcday's avatar
    vcday committed
    	auto beginNeedle = needle.begin ( );
    	auto beginHaystack = haystack.begin();
    
    vcday's avatar
    vcday committed
    
    
    vcday's avatar
    vcday committed
    	while ( *beginHaystack != '\0' )
    
    vcday's avatar
    vcday committed
    		{
    		//keep looking for instance of a match
    
    vcday's avatar
    vcday committed
    		if ( *beginHaystack != *beginNeedle )
    
    vcday's avatar
    vcday committed
    			{
    
    vcday's avatar
    vcday committed
    			++beginHaystack;
    
    vcday's avatar
    vcday committed
    			}
    
    
    vcday's avatar
    vcday committed
    		else if ( *beginHaystack == *beginNeedle )
    
    vcday's avatar
    vcday committed
    			{
    			/* want to keep the original iterator where it is so it
    				can return the beginning of the matched word if found */
    
    vcday's avatar
    vcday committed
    			auto temp = beginHaystack;
    			while ( *temp == *beginNeedle )
    
    vcday's avatar
    vcday committed
    				{
    				++temp;
    
    vcday's avatar
    vcday committed
    				++beginNeedle;
    				//if it hits the end of the needleing, it signifies an exact match
    				if ( *beginNeedle == '\0' )
    
    vcday's avatar
    vcday committed
    					{
    					//this is pointing at the beginning of the match
    
    vcday's avatar
    vcday committed
    					return beginHaystack;
    
    vcday's avatar
    vcday committed
    					}
    
    				}
    			//need to reset because still has to search rest of the string for a match
    
    vcday's avatar
    vcday committed
    			beginNeedle = needle.begin ( );
    
    vcday's avatar
    vcday committed
    			//sets the original text pointer to where the last search left off
    
    vcday's avatar
    vcday committed
    			beginHaystack = temp;
    
    vcday's avatar
    vcday committed
    			}
    
    		else
    			{
    			//DO NOTHING
    			}
    		}
    
    
    vcday's avatar
    vcday committed
    	return beginHaystack;
    
    vcday's avatar
    vcday committed
    
    	}
    
    
    vcday's avatar
    vcday committed
    /**
     * Finds the next position of the needle in the string
     * @param needle
     * @param pointer
     * @return string::iterator
     */
    string::iterator findNext (string needle, string::iterator haystackPointer )
    	{
    	auto beginNeedle = needle.begin ( );
    	auto beginHaystack = haystackPointer;
    	while ( *beginHaystack != '\0' )
    		{
    		//keep looking for instance of a match
    		if ( *beginHaystack != *beginNeedle )
    			{
    			++beginHaystack;
    			}
    
    		else if ( *beginHaystack == *beginNeedle )
    			{
    			/* want to keep the original iterator where it is so it
    				can return the beginning of the matched word if found */
    			auto temp = beginHaystack;
    			while ( *temp == *beginNeedle )
    				{
    				++temp;
    				++beginNeedle;
    				//if it hits the end of the needleing, it signifies an exact match
    				if ( *beginNeedle == '\0' )
    					{
    					//this is pointing at the beginning of the match
    					return beginHaystack;
    					}
    
    				}
    			//need to reset because still has to search rest of the string for a match
    			beginNeedle = needle.begin ( );
    			//sets the original text pointer to where the last search left off
    			beginHaystack = temp;
    			}
    
    		else
    			{
    			//DO NOTHING
    			}
    		}
    
    	return beginHaystack;
    	}
    
    vcday's avatar
    vcday committed
    
    
    vcday's avatar
    vcday committed
    
    /**
     * Returns a vector of strings from @originalText, split by @delim
     * @param originalText
     * @param delim
     * @return
     */
    vector< string > splitStr ( string originalText, char delim )
    
    vcday's avatar
    vcday committed
    	{
    	vector< string > splitWords;
    	auto begin = originalText.begin ( );
    
    	while ( *begin != '\0' )
    		{
    		string word = "";
    		while ( *begin != delim && *begin != '\0' )
    			{
    			word += *begin;
    			++begin;
    			}
    
    		splitWords.push_back ( word );
    		++begin;
    		}
    
    	return splitWords;
    
    	}
    
    vcday's avatar
    vcday committed
    /**
     * Returns true if @word is a stopword
     * @param word
     * @return
     */
    bool isStopWord ( string word )
    
    vcday's avatar
    vcday committed
    	{
    	return ( stopWords.find ( word ) != stopWords.end ( ) );
    
    	}
    
    vcday's avatar
    vcday committed
    /**
     * Returns lowercase @word
     * @param word
     * @return
     */
    string toLower ( string word )
    
    vcday's avatar
    vcday committed
    	{
    	auto iter = word.begin ( );
    	string lowerWord = "";
    	while ( *iter != '\0' )
    		{
    		if ( *iter >= 'A' && *iter <= 'Z' )
    			{
    			lowerWord += ( *iter + 32 );
    			}
    
    		else
    			{
    			lowerWord += *iter;
    			}
    		++iter;
    		}
    
    	return lowerWord;
    	}
    
    aanvi's avatar
    aanvi committed
    
    
    vcday's avatar
    vcday committed
    //TODO
    /**
     * Returns stemmed @word
     * @param word
     * @return
     */
    string stemWord(string word)
    	{
    	return "";
    	}
    
    
    aanvi's avatar
    aanvi committed
    #endif //EECS398_SEARCH_STRINGPROCESSING_H