// // Created by anvia on 1/31/2018. // #include <string> #include <unordered_map> #include <vector> #include "stringProcessing.h" using namespace std; class Tokenizer { public: Tokenizer ( ) { doc_index = new unordered_map< string, vector< int>>; } unordered_map< string, vector< int>> *get ( ) const { return doc_index; } void execute ( string originalText ) { int offset = 0; vector< string > splitText = splitStr ( originalText, ' ' ); string lowerString = ""; for ( int i = 0; i < splitText.size ( ); ++i ) { lowerString = toLower ( splitText[ i ] ); if ( !isStopWord ( lowerString ) ) { ( *doc_index )[ lowerString ].push_back ( offset ); ++offset; } } } private: unordered_map< string, vector< int>> *doc_index; };