Newer
Older
//
// Created by Ben Bergkamp on 3/6/18.
//
#include "stringProcessing.h"
/**
* Finds the needle in the haystack
* returns position of first match
*
* @param needle
unsigned long findStr ( string needle, string haystack )
unsigned long needleIt = 0;
unsigned long haystackIt = 0;
if ( haystack[ haystackIt ] != needle[ needleIt ] )
else if ( haystack[ haystackIt ] == needle[ needleIt ] )
/* want to keep the original iterator where it is so it
can return the beginning of the matched word if found */
unsigned long temp = haystackIt;
while ( haystack[ temp ] == needle[ needleIt ] )
//if it hits the end of the needleing, it signifies an exact match
//need to reset because still has to search rest of the string for a match
//sets the original text pointer to where the last search left off
/**
* Finds the next position of the needle in the string
*
* @param needle
* @param haystackIt
* @param haystack
* @return
unsigned long findNext ( string needle, unsigned long haystackIt, string haystack )
unsigned long needleIt = 0;
while ( haystackIt < haystack.size( ) )
if ( haystack[ haystackIt ] != needle[ needleIt ] )
else if ( haystack[ haystackIt ] == needle[ needleIt ] )
/* want to keep the original iterator where it is so it
can return the beginning of the matched word if found */
if ( needle.size( ) == 1 )
{
return haystackIt;
}
unsigned long temp = haystackIt;
while ( haystack[ temp ] == needle[ needleIt ] )
//if it hits the end of the needleing, it signifies an exact match
if ( needleIt == needle.size( ) - 1 && haystack[ temp ] == needle[ needleIt ] )
//need to reset because still has to search rest of the string for a match
//sets the original text pointer to where the last search left off
/**
* Finds the previous position of the needle in the string
*
unsigned long findPrev ( string needle, unsigned long haystackIt, string haystack )
if ( needle == "" )
{
return haystack.size( );
}
unsigned long needleIt = needle.size( ) - 1;
if ( haystack[ haystackIt ] != needle[ needleIt ] )
if ( haystackIt == 0 )
{
return haystack.size( );
}
--haystackIt;
else if ( haystack[ haystackIt ] == needle[ needleIt ] )
{
/* want to keep the original iterator where it is so it
can return the beginning of the matched word if found */
unsigned long temp = haystackIt;
while ( haystack[ temp ] == needle[ needleIt ] )
//if it hits the end of the needleing, it signifies an exact match
if ( needleIt == 0 && haystack[ temp ] == needle[ needleIt ] )
//this is pointing at the beginning of the match
return temp;
}
if ( temp == 0 )
{
return haystack.size( );
}
//need to reset because still has to search rest of the string for a match
//sets the original text pointer to where the last search left off
/**
* Returns a vector of strings from @originalText, split by @delim
vector< string > splitStr ( string originalText, char delim, bool removeSyms )
char begin;
int i = 0;
while ( i < originalText.size( ) )
while ( begin != delim && i < originalText.size( ) )
if ( removeSyms)
{
if( isAlpha( begin ) || isNum( begin ) )
{
word.push_back( begin );
}
}
else
if ( word != "" && word != " " && word[ 0 ] != delim )
/**
* Splits string by multiple delimiters
*
* @param originalText
* @param delims
* @param removeSyms
* @return
*/
vector< string > splitStr ( string originalText, set< char > delims, bool removeSyms )
for ( int i = 0; i < originalText.size( ); ++i )
while ( delims.find( begin ) == delims.end( ) && i < originalText.size( ) )
if ( removeSyms && ( isAlpha( begin ) || isNum( begin ) ) )
else if ( !removeSyms )
{
word.push_back( begin );
}
{
splitWords.push_back( word );
}
}
return splitWords;
}
/**
* Returns true if @word is a stopword
*
* @param word
if ( word[ wordIt ] >= 'A' && word[ wordIt ] <= 'Z' )
/**
* Returns a substring [ post, len )
*
* @param word
* @param pos
* @param len
string subStr ( string word, unsigned long pos, unsigned long len )
{
string substr = "";
for ( int i = 0; i < len; ++i )
{
++pos;
}
return substr;
}
/**
* Removes the chars in vector from word
*
* @param word
* @param chars
* @return string
*/
string stripStr ( string word, vector< char > chars )
}
return wordStripped;
}
/**
* Removes all chars from word
* Assumes word is lowercase
*
* @param word
* @param chars
* @return string
*/
if ( isAlpha( word[ i ] ) || isNum( word[ i ] ) )
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
}
return wordStripped;
}
/**
* Returns true is character is a letter
*
* @param ch
* @return bool
*/
bool isAlpha ( char ch )
{
// capital letter
if ( ch >= 'A' && ch <= 'Z' )
{
return true;
}
// lowercase letter
if ( ch >= 'a' && ch <= 'z' )
{
return true;
}
return false;
}
/**
* Returns true is character is a number
*
* @param ch
* @return bool
*/
bool isNum ( char ch )
{
if ( ch >= '0' && ch <= '9' )
{
return true;
}
return false;
}
/**
* Returns last n characters in string
* @param input
* @param n
* @return
*/
string lastN ( string input, int n )
{
unsigned long inputSize = input.size( );
return ( n > 0 && inputSize > n ) ? input.substr( inputSize - n ) : "";