From 2f0739e40d8d8f5156ef4c53cfd356bc01033199 Mon Sep 17 00:00:00 2001 From: vcday <vcday@umich.edu> Date: Thu, 22 Feb 2018 17:33:17 -0500 Subject: [PATCH] changed parser function --- crawler/spider.cpp | 2 +- parser/Parser.h | 139 +++++++----------- parser/tests/ParserTest_endtoend.cpp | 20 --- parser/tests/ParserTest_unit.cpp | 4 - parser/tests/parserTest.cpp | 45 ++++++ shared/Document.h | 129 ++++++++++++++++ shared/documentMap.h | 121 --------------- util/Stemmer.h | 15 ++ {parser => util}/Tokenizer.h | 2 +- util/stringProcessing.h | 92 ++++++++---- util/tests/stemmerTest.cpp | 4 + .../tests/stringProcessingTest.cpp | 82 ++++------- .../tests/tokenizerTest.cpp | 12 +- 13 files changed, 349 insertions(+), 318 deletions(-) delete mode 100644 parser/tests/ParserTest_endtoend.cpp delete mode 100644 parser/tests/ParserTest_unit.cpp create mode 100644 parser/tests/parserTest.cpp create mode 100644 shared/Document.h delete mode 100644 shared/documentMap.h create mode 100644 util/Stemmer.h rename {parser => util}/Tokenizer.h (95%) create mode 100644 util/tests/stemmerTest.cpp rename parser/tests/StringProcessing_unit.cpp => util/tests/stringProcessingTest.cpp (56%) rename parser/tests/TokenizerTest_unit.cpp => util/tests/tokenizerTest.cpp (81%) diff --git a/crawler/spider.cpp b/crawler/spider.cpp index ebd6327..af06926 100644 --- a/crawler/spider.cpp +++ b/crawler/spider.cpp @@ -16,7 +16,7 @@ #include "LocalReader.h" #include "SocketReader.h" -#include "../shared/documentMap.h" +#include "../shared/Document.h" string Spider::getUrl() diff --git a/parser/Parser.h b/parser/Parser.h index 52e4cbe..423d164 100644 --- a/parser/Parser.h +++ b/parser/Parser.h @@ -12,153 +12,126 @@ #include <queue> #include <iostream> #include <fstream> -#include "Tokenizer.h" +#include "../util/Tokenizer.h" #include "../util/stringProcessing.h" +#include "../shared/Document.h" +#include "../shared/ProducerConsumerQueue.h" using namespace std; -// Doc Id -std::priority_queue< int > DOCID_PQ; -std::priority_queue< string > URL_PQ; -string PATH = "/doc"; - -//TEMP - remove once getting actual crawler input - - -//TODO -// get doc id from DocIDqueue (sent from crawler) -// go to disk and get the HTML file -// parse the html file -// if find url; send to crawler -// if find title send string to tokenizer +/** + * This class uses the Doc object from the Crawler to parse the text + * Returns a pointer to a dictionary that contains the tokenized input + */ class Parser { public: - struct raw_data + Parser ( ProducerConsumerQueue < string > * urlFrontierIn) { - string url; - string html_data; - - raw_data ( string u, string h ) : url ( u ), html_data ( h ) - { } - }; + urlFrontier = urlFrontierIn; + } /** * Parser * @return */ - // input: object with char* and URL string - // - const unordered_map< string, vector< int>> execute ( ) + const unordered_map< string, vector< int>> * execute ( Document* document) { Tokenizer tokenizer; - //TEMP - until we get real input from crawler - raw_data data ( "url", "html" ); - parse ( data.html_data, &tokenizer ); + parse ( document->DocToString (), &tokenizer ); return tokenizer.get ( ); } private: + ProducerConsumerQueue < string >* urlFrontier; /** * Parses file * @param inFile * @return */ - - string parse ( string & html_data, Tokenizer *tokenizer ) + void parse ( string html, Tokenizer *tokenizer ) { - //figure out file handle syntax - pointer to file + string tokenizerInput = ""; string currentTerm = ""; - for ( int i = 0; i < html_data.size ( ); ++i ) + for ( int i = 0; i < html.size ( ); ++i ) { - while ( html_data[ i ] != ' ' ) + while ( html.at( i ) != '\n' ) { - currentTerm += html_data[ i ]; + currentTerm += html[ i ]; } - //one method that directly adds urls onto frontier instead of checking for them - add_urls ( currentTerm ); - check_title ( currentTerm ); - tokenizerInput += currentTerm; - } + string url = extract_url ( currentTerm ); + if (url != "") + { + urlFrontier->Push (url); + } + else + { + string title = extract_title ( currentTerm ); + if (title != "") + { + tokenizerInput += title; + } + } + } tokenizer->execute ( tokenizerInput ); + } - /* - * Uses findStr function in stringProcessing.h: STILL HAVE TO TEST - * Instead of bool, just directly adds on to url queue + /** + * Returns a url, or "" if none + * @param word + * @return */ - void add_urls ( string & word ) + string extract_url ( string word ) { - string a_tag = "<a"; - string http_start = "href=http"; - string http_end_tag = ">"; - - auto word_iter = word.begin ( ); string url = ""; - word_iter = findStr ( word_iter, a_tag ); - if ( word_iter != nullptr ) + + if ( findStr ( word, "<a" ) != '\0' ) { - auto found_http = findStr ( word_iter, http_start ); - if ( found_http != nullptr ) + auto foundHttp = findStr ( word, "href=http" ); + if ( foundHttp != '\0' ) { url = "http"; - found_http += 9; - auto end_http = findStr ( word_iter, http_end_tag ); - while ( found_http != end_http ) + foundHttp += 9; + + while ( foundHttp != findStr ( word, "\">" ) ) { - url += *found_http; - ++found_http; + url += *foundHttp; + ++foundHttp; } } } - else - { - return; - } - - if ( url != "" ) - { - URL_PQ.push ( url ); - } - - + return url; } /** - * <title >AJF</title> + * Returns a title, or "" if none * @param word + * @return */ - - bool check_title ( string & word ) + string extract_title ( string & word ) { - if ( char *pos = strstr ( "<title>", word ) ) + string title = ""; + auto pos = findStr ( "<title>", word ); + if ( pos != '\0') { pos += 6; - auto end_pos = strstr ( "</title>", word ); - string title = ""; - while ( pos != end_pos ) + while ( pos != findStr ( "</title>", word ) ) { ++pos; title += *pos; - } - - return title; } - -// string begin_title = "<title>"; -// auto word_begin = word.begin(); -// auto word_iter = findStr(word_begin, begin_title); - + return title; } }; diff --git a/parser/tests/ParserTest_endtoend.cpp b/parser/tests/ParserTest_endtoend.cpp deleted file mode 100644 index 7664468..0000000 --- a/parser/tests/ParserTest_endtoend.cpp +++ /dev/null @@ -1,20 +0,0 @@ -// -// Created by anvia on 2/6/2018. -// - -#include <string> -#include "../../util/stringProcessing.h" -#include <iostream> -using namespace std; - -int main() -{ - string original = "It is a long established fact that a reader will be distracted by the readable content of a page when looking at its layout." - "The point of using Lorem Ipsum is that it has a more-or-less normal distribution of letters, as opposed to using 'Content here, content here'," - "making it look like readable English. "; - - string subStr = "readable"; - auto iter = findStr(subStr, original); - cout << *iter << endl; -} - diff --git a/parser/tests/ParserTest_unit.cpp b/parser/tests/ParserTest_unit.cpp deleted file mode 100644 index 136907e..0000000 --- a/parser/tests/ParserTest_unit.cpp +++ /dev/null @@ -1,4 +0,0 @@ -// -// Created by Veronica Day on 2/13/18. -// - diff --git a/parser/tests/parserTest.cpp b/parser/tests/parserTest.cpp new file mode 100644 index 0000000..bc5248b --- /dev/null +++ b/parser/tests/parserTest.cpp @@ -0,0 +1,45 @@ +// +// Created by anvia on 2/6/2018. +// + +#include <string> +#include <cassert> +#include <iostream> +#include "../Parser.h" +#include "../../shared/Document.h" +#include "../../shared/ProducerConsumerQueue.h" + +using namespace std; + +int main ( ) + { + cout << "Testing Parser ... " << endl << endl; + ProducerConsumerQueue < string > * urlFrontierTest; + Document document ( "<!DOCTYPE html>\n" + "<html>\n" + "<head>\n" + "<!-- HTML Codes by Quackit.com -->\n" + "<title>\n" + "Story of Cat</title>\n" + "<meta name=\"viewport\" content=\"width=device-width, initial-scale=1\">\n" + "<meta name=\"keywords\" content=\"cat story\">\n" + "<meta name=\"description\" content=\"This is the tale of a cat names joe\">\n" + "<style>\n" + "body {background-color:#ffffff;background-repeat:no-repeat;background-position:top left;background-attachment:fixed;}\n" + "h1{font-family:Arial, sans-serif;color:#000000;background-color:#ffffff;}\n" + "p {font-family:Georgia, serif;font-size:14px;font-style:normal;font-weight:normal;color:#000000;background-color:#ffffff;}\n" + "</style>\n" + "</head>\n" + "<body>\n" + "<h1>Joe the cat</h1>\n" + "<p>On Saturday, joe the cat went to the store. He climbed up a mountain? It was weird. The store was called Food Store</p>\n" + "</body>\n" + "</html>" ); + + Parser parser ( urlFrontierTest ); + auto dictionary = parser.execute ( &document ); + assert( dictionary != nullptr ); + cout << "Parser Tests Passed! :D" << endl; + + } + diff --git a/shared/Document.h b/shared/Document.h new file mode 100644 index 0000000..5aca64d --- /dev/null +++ b/shared/Document.h @@ -0,0 +1,129 @@ +// +// Created by Jake Close on 2/8/18. +// + +#pragma once + +#include "url.h" +#include <string> +#include <vector> +#include <pthread.h> + +using namespace std; + +namespace filepath + { + const char *DOC_MAP = "/docMap.txt"; + } + + +pthread_mutex_t docMap_mutex = PTHREAD_MUTEX_INITIALIZER; + +class Document + { +private: + ParsedUrl url; + long docID; + bool lastCrawlStatus; + int lastCrawlDate; + int lastCrawlPageCount; + + //add more info fields here + +public: + Document ( string url_in ) : url ( ParsedUrl ( url_in ) ) + { } + + string DocToString ( ) + { + return string ( url.CompleteUrl, strlen ( url.CompleteUrl ) ) + "\n"; + } + + int WriteToDocMap ( ) + { + + pthread_mutex_lock ( &docMap_mutex ); + + //for now just write url + + string loc = util::GetCurrentWorkingDir ( ) + filepath::DOC_MAP; + int file = util::getFileDescriptor ( loc.c_str ( ), "W" ); + off_t resultPosition = 0; + + try + { + //check if its available + if ( file == -1 ) + { + throw ( "error opening docMap" ); + } + else + { + //get the current size of the docMap + size_t seekPosition = util::FileSize ( file ); + //seek to the end of the file + resultPosition = lseek ( file, seekPosition, SEEK_SET ); + + if ( resultPosition == -1 ) + { + throw ( "Could not seek" ); + } + cout << "Current docMap position on disk" << endl; + cout << resultPosition << endl; + + size_t success = write ( file, this->DocToString ( ).c_str ( ), + strlen ( this->DocToString ( ).c_str ( ) ) ); + if ( success == -1 ) + { + throw ( "Error writing document object to document map" ); + } + } + } + catch ( const char *str ) + { + cerr << str << endl; + close ( file ); + pthread_mutex_unlock ( &docMap_mutex ); + return -1; + } + close ( file ); + pthread_mutex_unlock ( &docMap_mutex ); + return resultPosition; + } + + + static void PrintDocMap ( string url, int location ) + { + pthread_mutex_lock ( &docMap_mutex ); + + std::cout << url << " is " << location; + + string loc = util::GetCurrentWorkingDir ( ) + filepath::DOC_MAP; + int file = util::getFileDescriptor ( loc.c_str ( ), "R" ); + + + //check if its available + if ( file ) + { + off_t resultPosition = lseek ( file, ( size_t ) location, SEEK_SET ); + int bytes = 14; + if ( bytes > 0 ) + { + char *buffer = new char[bytes]; + ssize_t bytesRead; + if ( bytesRead = read ( file, buffer, bytes ) ) + write ( 1, buffer, bytesRead ); + else + { + cerr << "Could not read " << bytes << " bytes at position " << + resultPosition << ", error = " << errno; + pthread_mutex_unlock ( &docMap_mutex ); + return; + } + } + + } + pthread_mutex_unlock ( &docMap_mutex ); + return; + } + }; \ No newline at end of file diff --git a/shared/documentMap.h b/shared/documentMap.h deleted file mode 100644 index 720854e..0000000 --- a/shared/documentMap.h +++ /dev/null @@ -1,121 +0,0 @@ -// -// Created by Jake Close on 2/8/18. -// - -#pragma once - -#include "url.h" -#include <string> -#include <vector> -#include <pthread.h> - -using namespace std; - -namespace filepath - { - const char* DOC_MAP = "/docMap.txt"; - } - - - pthread_mutex_t docMap_mutex = PTHREAD_MUTEX_INITIALIZER; - -class Document - { - private: - ParsedUrl url; - long docID; - bool lastCrawlStatus; - int lastCrawlDate; - int lastCrawlPageCount; - - //add more info fields here - - public: - Document(string url_in) : url(ParsedUrl(url_in)) {} - - string DocToString() - { - return string(url.CompleteUrl, strlen(url.CompleteUrl)) + "\n"; - } - - int WriteToDocMap() - { - - pthread_mutex_lock(&docMap_mutex); - - //for now just write url - - string loc = util::GetCurrentWorkingDir() + filepath::DOC_MAP; - int file = util::getFileDescriptor(loc.c_str(), "W"); - off_t resultPosition = 0; - - try { - //check if its available - if (file == -1) { - throw("error opening docMap"); - } else { - //get the current size of the docMap - size_t seekPosition = util::FileSize(file); - //seek to the end of the file - resultPosition = lseek(file, seekPosition, SEEK_SET); - - if (resultPosition == -1) { - throw("Could not seek"); - } - cout << "Current docMap position on disk" << endl; - cout << resultPosition << endl; - - size_t success = write(file, this->DocToString().c_str(), strlen(this->DocToString().c_str())); - if (success == -1) { - throw("Error writing document object to document map"); - } - } - } - catch(const char* str){ - cerr << str << endl; - close(file); - pthread_mutex_unlock(&docMap_mutex); - return -1; - } - close( file ); - pthread_mutex_unlock(&docMap_mutex); - return resultPosition; - } - - - - static void PrintDocMap(string url, int location) - { - pthread_mutex_lock(&docMap_mutex); - - std::cout << url << " is " << location; - - string loc = util::GetCurrentWorkingDir() + filepath::DOC_MAP; - int file = util::getFileDescriptor( loc.c_str(), "R" ); - - - //check if its available - if ( file ) - { - off_t resultPosition = lseek( file, (size_t)location, SEEK_SET ); - int bytes = 14; - if ( bytes > 0 ) - { - char *buffer = new char[bytes]; - ssize_t bytesRead; - if ( bytesRead = read( file, buffer, bytes )) - write( 1, buffer, bytesRead ); - else - { - cerr << "Could not read " << bytes << " bytes at position " << - resultPosition << ", error = " << errno; - pthread_mutex_unlock(&docMap_mutex); - return; - } - } - - } - pthread_mutex_unlock(&docMap_mutex); - return; - } - }; \ No newline at end of file diff --git a/util/Stemmer.h b/util/Stemmer.h new file mode 100644 index 0000000..84e1990 --- /dev/null +++ b/util/Stemmer.h @@ -0,0 +1,15 @@ +// +// Created by Veronica Day on 2/22/18. +// + +#ifndef EECS398_SEARCH_STEMMER_H +#define EECS398_SEARCH_STEMMER_H + + +class Stemmer + { + + }; + + +#endif //EECS398_SEARCH_STEMMER_H diff --git a/parser/Tokenizer.h b/util/Tokenizer.h similarity index 95% rename from parser/Tokenizer.h rename to util/Tokenizer.h index a3443fe..3e28002 100644 --- a/parser/Tokenizer.h +++ b/util/Tokenizer.h @@ -5,7 +5,7 @@ #include <string> #include <unordered_map> #include <vector> -#include "../util/stringProcessing.h" +#include "stringProcessing.h" using namespace std; diff --git a/util/stringProcessing.h b/util/stringProcessing.h index 8c746f4..0afdee9 100644 --- a/util/stringProcessing.h +++ b/util/stringProcessing.h @@ -13,46 +13,56 @@ using namespace std; - -/* - * Takes in an iterator to the original text and a substring: specifically for a parser functionality - * Potentially make one that takes in two strings? Is this needed? +/** + * Set of stopwords + */ +set< string > stopWords = { "a", "all", "an", "and", "any", "are", "as", "at", "be", "been", "but", "by", "few", "from", + "for", "have", "he", "her", "here", "him", "his", "how", + "i", "in", "is", "it", "its", "many ", "me", "my", "none", "of", "on", "or", "our", "she", + "some", "the", "their", "them", "there", "they", "that", + "this", "to", "us", "was", "what", "when", "where", "which", "who", "why", "will", "with", + "you", "your" }; +/** + * Finds the needle in the haystack + * @param haystack + * @param needle + * @return */ -string::iterator findStr ( string::iterator originalText, string & subStr ) +string::iterator findStr ( string haystack, string needle ) { - auto begin_sub = subStr.begin ( ); - auto begin_original = originalText; + auto beginNeedle = needle.begin ( ); + auto beginHaystack = haystack.begin(); - while ( *begin_original != '\0' ) //*(forward++) != '\0' + while ( *beginHaystack != '\0' ) { //keep looking for instance of a match - if ( *begin_original != *begin_sub ) + if ( *beginHaystack != *beginNeedle ) { - ++begin_original; + ++beginHaystack; } - else if ( *begin_original == *begin_sub ) + else if ( *beginHaystack == *beginNeedle ) { /* want to keep the original iterator where it is so it can return the beginning of the matched word if found */ - auto temp = begin_original; - while ( *temp == *begin_sub ) + auto temp = beginHaystack; + while ( *temp == *beginNeedle ) { ++temp; - ++begin_sub; - //if it hits the end of the substring, it signifies an exact match - if ( *begin_sub == '\0' ) + ++beginNeedle; + //if it hits the end of the needleing, it signifies an exact match + if ( *beginNeedle == '\0' ) { //this is pointing at the beginning of the match - return begin_original; + return beginHaystack; } } //need to reset because still has to search rest of the string for a match - begin_sub = subStr.begin ( ); + beginNeedle = needle.begin ( ); //sets the original text pointer to where the last search left off - begin_original = temp; + beginHaystack = temp; } else @@ -61,18 +71,19 @@ string::iterator findStr ( string::iterator originalText, string & subStr ) } } - return begin_original; + return beginHaystack; } -set< string > stopWords = { "a", "all", "an", "and", "any", "are", "as", "at", "be", "been", "but", "by", "few", "from", - "for", "have", "he", "her", "here", "him", "his", "how", - "i", "in", "is", "it", "its", "many ", "me", "my", "none", "of", "on", "or", "our", "she", - "some", "the", "their", "them", "there", "they", "that", - "this", "to", "us", "was", "what", "when", "where", "which", "who", "why", "will", "with", - "you", "your" }; -vector< string > splitStr ( string & originalText, char delim ) + +/** + * Returns a vector of strings from @originalText, split by @delim + * @param originalText + * @param delim + * @return + */ +vector< string > splitStr ( string originalText, char delim ) { vector< string > splitWords; auto begin = originalText.begin ( ); @@ -93,14 +104,22 @@ vector< string > splitStr ( string & originalText, char delim ) return splitWords; } - -bool isStopWord ( string & word ) +/** + * Returns true if @word is a stopword + * @param word + * @return + */ +bool isStopWord ( string word ) { return ( stopWords.find ( word ) != stopWords.end ( ) ); } - -string toLower ( string & word ) +/** + * Returns lowercase @word + * @param word + * @return + */ +string toLower ( string word ) { auto iter = word.begin ( ); string lowerWord = ""; @@ -121,4 +140,15 @@ string toLower ( string & word ) return lowerWord; } +//TODO +/** + * Returns stemmed @word + * @param word + * @return + */ +string stemWord(string word) + { + return ""; + } + #endif //EECS398_SEARCH_STRINGPROCESSING_H diff --git a/util/tests/stemmerTest.cpp b/util/tests/stemmerTest.cpp new file mode 100644 index 0000000..f942e1a --- /dev/null +++ b/util/tests/stemmerTest.cpp @@ -0,0 +1,4 @@ +// +// Created by Veronica Day on 2/22/18. +// + diff --git a/parser/tests/StringProcessing_unit.cpp b/util/tests/stringProcessingTest.cpp similarity index 56% rename from parser/tests/StringProcessing_unit.cpp rename to util/tests/stringProcessingTest.cpp index 3643119..3562e92 100644 --- a/parser/tests/StringProcessing_unit.cpp +++ b/util/tests/stringProcessingTest.cpp @@ -4,19 +4,19 @@ #include <string> #include <vector> -#include "../../util/stringProcessing.h" +#include "../stringProcessing.h" #include <iostream> #include <cassert> using namespace std; -void test_findStr ( string original ); +void testFindStr ( string original ); -void test_splitStr ( string original ); +void testSplitStr ( string original ); -void test_toLower ( ); +void testToLower ( ); -void test_isStopWord ( ); +void testIsStopWord ( ); int main ( ) { @@ -27,66 +27,46 @@ int main ( ) "The point of using Lorem Ipsum is that it has a more-or-less normal distribution of letters, as opposed to using 'Content here, content here'," "making it look like readable English. "; - test_findStr ( original ); - test_splitStr ( original ); - test_toLower ( ); - test_isStopWord ( ); + testFindStr ( original ); + testSplitStr ( original ); + testToLower ( ); + testIsStopWord ( ); cout << "\nTests passed for StringProcessing_unit :D" << endl; } -void test_findStr ( string original ) +void testFindStr ( string original ) { cout << "Testing findStr..." << endl; - - string find = "established"; - auto word = findStr ( original.begin ( ), find ); - assert( *word == 'e' ); - - find = "Lorem Ipsum"; - auto word2 = findStr ( original.begin ( ), find ); - assert( *word2 == 'L' ); + assert( *findStr ( original, "established" ) == 'e' ); + assert( *findStr ( original, "Lorem Ipsum" ) == 'L' ); string title = "<title> This is a test </title>"; - find = "<title>"; - auto word3 = findStr ( title.begin ( ), find ); - assert( *word3 == '<' ); + auto word = findStr ( title, "<title>" ); + assert( *word == '<' ); auto titleIt = title.begin ( ); - while ( word3 != title.end ( ) && titleIt != title.end ( ) ) + while ( word != title.end ( ) && titleIt != title.end ( ) ) { - assert( *word3 == *titleIt ); - ++word3; + assert( *word == *titleIt ); + ++word; ++titleIt; } - find = "</title>"; - auto word4 = findStr ( title.begin ( ), find ); - assert( *word4 == '<' && *( word4 + 1 ) == '/' ); - - auto word0 = findStr ( original.begin ( ), find ); - assert( *word0 == '\0' ); - - find = "orange"; - auto word5 = findStr ( original.begin ( ), find ); - assert( *word5 == '\0' ); - - find = "orange"; - string test = "apple"; - auto word7 = findStr ( test.begin ( ), find ); - assert( *word7 == '\0' ); - - find = "bird"; - test = "bigbird"; - auto word6 = findStr ( test.begin ( ), find ); - assert( *word6 == 'b' && *( word6 + 1 ) == 'i' && *( word6 + 2 ) == 'r' ); + auto word1 = findStr ( title, "</title>" ); + assert( *word1 == '<' && *( word1 + 1 ) == '/' ); + assert( *findStr ( original, "</title>" ) == '\0' ); + assert( *findStr ( original, "orange" ) == '\0' ); + assert( *findStr ( "apple", "orange" ) == '\0' ); + auto word2 = findStr ( "bigbird", "bird" ); + assert( *word2 == 'b' && *( word2 + 1 ) == 'i' && *( word2 + 2 ) == 'r' ); - cout << "test_findStr passed" << endl; + cout << "testFindStr passed" << endl; } -void test_splitStr ( string original ) +void testSplitStr ( string original ) { cout << "Testing splitStr..." << endl; @@ -98,12 +78,12 @@ void test_splitStr ( string original ) assert( vec.size ( ) == 2 ); assert( vec[ 0 ] == "hello" && vec[ 1 ] == "goodbye" ); - cout << "test_splitStr passed" << endl; + cout << "testSplitStr passed" << endl; } -void test_toLower ( ) +void testToLower ( ) { cout << "Testing toLower..." << endl; @@ -126,11 +106,11 @@ void test_toLower ( ) assert ( test4 == "" ); assert ( test5 == " " ); - cout << "test_toLower passed" << endl; + cout << "testToLower passed" << endl; } -void test_isStopWord ( ) +void testIsStopWord ( ) { cout << "Testing isStopWord..." << endl; @@ -146,6 +126,6 @@ void test_isStopWord ( ) assert ( !isStopWord ( blank ) ); assert ( !isStopWord ( blank2 ) ); - cout << "test_isStopWord passed" << endl; + cout << "testIsStopWord passed" << endl; } \ No newline at end of file diff --git a/parser/tests/TokenizerTest_unit.cpp b/util/tests/tokenizerTest.cpp similarity index 81% rename from parser/tests/TokenizerTest_unit.cpp rename to util/tests/tokenizerTest.cpp index bc189c5..0ccb13b 100644 --- a/parser/tests/TokenizerTest_unit.cpp +++ b/util/tests/tokenizerTest.cpp @@ -10,7 +10,7 @@ using namespace std; -void test_execute ( string original ); +void testExecute ( string original ); int main ( ) @@ -22,18 +22,18 @@ int main ( ) "The point of using Lorem Ipsum is that it has a more-or-less normal distribution of letters, as opposed to using 'Content here, content here'," "making it look like readable English. "; - test_execute ( original ); + testExecute ( original ); cout << "\nTests passed for TokenizerTest_unit :D" << endl; } -void test_execute ( string original ) +void testExecute ( string original ) { - Tokenizer my_tokenizer; - my_tokenizer.execute ( original ); + Tokenizer myTokenizer; + myTokenizer.execute ( original ); - auto dict = my_tokenizer.get ( ); + auto dict = myTokenizer.get ( ); for ( auto it = dict->begin ( ); it != dict->end ( ); it++ ) { -- GitLab