From 2f0739e40d8d8f5156ef4c53cfd356bc01033199 Mon Sep 17 00:00:00 2001
From: vcday <vcday@umich.edu>
Date: Thu, 22 Feb 2018 17:33:17 -0500
Subject: [PATCH] changed parser function

---
 crawler/spider.cpp                            |   2 +-
 parser/Parser.h                               | 139 +++++++-----------
 parser/tests/ParserTest_endtoend.cpp          |  20 ---
 parser/tests/ParserTest_unit.cpp              |   4 -
 parser/tests/parserTest.cpp                   |  45 ++++++
 shared/Document.h                             | 129 ++++++++++++++++
 shared/documentMap.h                          | 121 ---------------
 util/Stemmer.h                                |  15 ++
 {parser => util}/Tokenizer.h                  |   2 +-
 util/stringProcessing.h                       |  92 ++++++++----
 util/tests/stemmerTest.cpp                    |   4 +
 .../tests/stringProcessingTest.cpp            |  82 ++++-------
 .../tests/tokenizerTest.cpp                   |  12 +-
 13 files changed, 349 insertions(+), 318 deletions(-)
 delete mode 100644 parser/tests/ParserTest_endtoend.cpp
 delete mode 100644 parser/tests/ParserTest_unit.cpp
 create mode 100644 parser/tests/parserTest.cpp
 create mode 100644 shared/Document.h
 delete mode 100644 shared/documentMap.h
 create mode 100644 util/Stemmer.h
 rename {parser => util}/Tokenizer.h (95%)
 create mode 100644 util/tests/stemmerTest.cpp
 rename parser/tests/StringProcessing_unit.cpp => util/tests/stringProcessingTest.cpp (56%)
 rename parser/tests/TokenizerTest_unit.cpp => util/tests/tokenizerTest.cpp (81%)

diff --git a/crawler/spider.cpp b/crawler/spider.cpp
index ebd6327..af06926 100644
--- a/crawler/spider.cpp
+++ b/crawler/spider.cpp
@@ -16,7 +16,7 @@
 
 #include "LocalReader.h"
 #include "SocketReader.h"
-#include "../shared/documentMap.h"
+#include "../shared/Document.h"
 
 
 string Spider::getUrl()
diff --git a/parser/Parser.h b/parser/Parser.h
index 52e4cbe..423d164 100644
--- a/parser/Parser.h
+++ b/parser/Parser.h
@@ -12,153 +12,126 @@
 #include <queue>
 #include <iostream>
 #include <fstream>
-#include "Tokenizer.h"
+#include "../util/Tokenizer.h"
 #include "../util/stringProcessing.h"
+#include "../shared/Document.h"
+#include "../shared/ProducerConsumerQueue.h"
 
 using namespace std;
 
-// Doc Id
-std::priority_queue< int > DOCID_PQ;
-std::priority_queue< string > URL_PQ;
-string PATH = "/doc";
-
-//TEMP - remove once getting actual crawler input
-
-
-//TODO
-// get doc id from DocIDqueue (sent from crawler)
-// go to disk and get the HTML file
-// parse the html file
-// if find url; send to crawler
-// if find title send string to tokenizer
+/**
+ * This class uses the Doc object from the Crawler to parse the text
+ * Returns a pointer to a dictionary that contains the tokenized input
+ */
 class Parser
 	{
 
 public:
 
-	struct raw_data
+	Parser ( ProducerConsumerQueue < string > * urlFrontierIn)
 		{
-		string url;
-		string html_data;
-
-		raw_data ( string u, string h ) : url ( u ), html_data ( h )
-			{ }
-		};
+		urlFrontier = urlFrontierIn;
+		}
 
 
 	/**
 	 * Parser
 	 * @return
 	 */
-	// input: object with char*  and URL string
-	//
-	const unordered_map< string, vector< int>> execute ( )
+	const unordered_map< string, vector< int>> * execute ( Document* document)
 		{
 		Tokenizer tokenizer;
-		//TEMP - until we get real input from crawler
-		raw_data data ( "url", "html" );
-		parse ( data.html_data, &tokenizer );
+		parse ( document->DocToString (), &tokenizer );
 		return tokenizer.get ( );
 		}
 
 
 private:
+	ProducerConsumerQueue < string >* urlFrontier;
 
 	/**
 	 * Parses file
 	 * @param inFile
 	 * @return
 	 */
-
-	string parse ( string & html_data, Tokenizer *tokenizer )
+	void parse ( string html, Tokenizer *tokenizer )
 		{
-		//figure out file handle syntax - pointer to file
+
 		string tokenizerInput = "";
 		string currentTerm = "";
-		for ( int i = 0; i < html_data.size ( ); ++i )
+		for ( int i = 0; i < html.size ( ); ++i )
 			{
-			while ( html_data[ i ] != ' ' )
+			while ( html.at( i ) != '\n' )
 				{
-				currentTerm += html_data[ i ];
+				currentTerm += html[ i ];
 				}
 
-			//one method that directly adds urls onto frontier instead of checking for them
-			add_urls ( currentTerm );
-			check_title ( currentTerm );
-			tokenizerInput += currentTerm;
-			}
+			string url = extract_url ( currentTerm );
+			if (url != "")
+				{
+				urlFrontier->Push (url);
+				}
+			else
+				{
+				string title = extract_title ( currentTerm );
+				if (title != "")
+					{
+					tokenizerInput += title;
+					}
+				}
 
+			}
 		tokenizer->execute ( tokenizerInput );
+
 		}
 
-	/*
-	 * Uses findStr function in stringProcessing.h: STILL HAVE TO TEST
-	 * Instead of bool, just directly adds on to url queue
+	/**
+	 * Returns a url, or "" if none
+	 * @param word
+	 * @return
 	 */
-	void add_urls ( string & word )
+	string extract_url ( string word )
 		{
-		string a_tag = "<a";
-		string http_start = "href=http";
-		string http_end_tag = ">";
-
-		auto word_iter = word.begin ( );
 		string url = "";
-		word_iter = findStr ( word_iter, a_tag );
-		if ( word_iter != nullptr )
+
+		if ( findStr ( word, "<a" ) != '\0' )
 			{
-			auto found_http = findStr ( word_iter, http_start );
-			if ( found_http != nullptr )
+			auto foundHttp = findStr ( word, "href=http" );
+			if ( foundHttp != '\0' )
 				{
 				url = "http";
-				found_http += 9;
-				auto end_http = findStr ( word_iter, http_end_tag );
-				while ( found_http != end_http )
+				foundHttp += 9;
+
+				while ( foundHttp != findStr ( word, "\">" ) )
 					{
-					url += *found_http;
-					++found_http;
+					url += *foundHttp;
+					++foundHttp;
 					}
 				}
 			}
 
-		else
-			{
-			return;
-			}
-
-		if ( url != "" )
-			{
-			URL_PQ.push ( url );
-			}
-
-
+		return url;
 		}
 
 	/**
-	 * <title >AJF</title>
+	 * Returns a title, or "" if none
 	 * @param word
+	 * @return
 	 */
-
-	bool check_title ( string & word )
+	string extract_title ( string & word )
 		{
-		if ( char *pos = strstr ( "<title>", word ) )
+		string title = "";
+		auto pos = findStr ( "<title>", word );
+		if ( pos != '\0')
 			{
 			pos += 6;
-			auto end_pos = strstr ( "</title>", word );
-			string title = "";
-			while ( pos != end_pos )
+			while ( pos != findStr ( "</title>", word ) )
 				{
 				++pos;
 				title += *pos;
-
 				}
-
-			return title;
 			}
-
-//        string begin_title = "<title>";
-//        auto word_begin = word.begin();
-//        auto word_iter = findStr(word_begin, begin_title);
-
+		return title;
 		}
 
 	};
diff --git a/parser/tests/ParserTest_endtoend.cpp b/parser/tests/ParserTest_endtoend.cpp
deleted file mode 100644
index 7664468..0000000
--- a/parser/tests/ParserTest_endtoend.cpp
+++ /dev/null
@@ -1,20 +0,0 @@
-//
-// Created by anvia on 2/6/2018.
-//
-
-#include <string>
-#include "../../util/stringProcessing.h"
-#include <iostream>
-using namespace std;
-
-int main()
-{
-    string original = "It is a long established fact that a reader will be distracted by the readable content of a page when looking at its layout."
-            "The point of using Lorem Ipsum is that it has a more-or-less normal distribution of letters, as opposed to using 'Content here, content here',"
-            "making it look like readable English. ";
-
-    string subStr = "readable";
-    auto iter = findStr(subStr, original);
-    cout << *iter << endl;
-}
-
diff --git a/parser/tests/ParserTest_unit.cpp b/parser/tests/ParserTest_unit.cpp
deleted file mode 100644
index 136907e..0000000
--- a/parser/tests/ParserTest_unit.cpp
+++ /dev/null
@@ -1,4 +0,0 @@
-//
-// Created by Veronica Day on 2/13/18.
-//
-
diff --git a/parser/tests/parserTest.cpp b/parser/tests/parserTest.cpp
new file mode 100644
index 0000000..bc5248b
--- /dev/null
+++ b/parser/tests/parserTest.cpp
@@ -0,0 +1,45 @@
+//
+// Created by anvia on 2/6/2018.
+//
+
+#include <string>
+#include <cassert>
+#include <iostream>
+#include "../Parser.h"
+#include "../../shared/Document.h"
+#include "../../shared/ProducerConsumerQueue.h"
+
+using namespace std;
+
+int main ( )
+	{
+	cout << "Testing Parser ... " << endl << endl;
+	ProducerConsumerQueue < string > * urlFrontierTest;
+	Document document ( "<!DOCTYPE html>\n"
+			                    "<html>\n"
+			                    "<head>\n"
+			                    "<!-- HTML Codes by Quackit.com -->\n"
+			                    "<title>\n"
+			                    "Story of Cat</title>\n"
+			                    "<meta name=\"viewport\" content=\"width=device-width, initial-scale=1\">\n"
+			                    "<meta name=\"keywords\" content=\"cat story\">\n"
+			                    "<meta name=\"description\" content=\"This is the tale of a cat names joe\">\n"
+			                    "<style>\n"
+			                    "body {background-color:#ffffff;background-repeat:no-repeat;background-position:top left;background-attachment:fixed;}\n"
+			                    "h1{font-family:Arial, sans-serif;color:#000000;background-color:#ffffff;}\n"
+			                    "p {font-family:Georgia, serif;font-size:14px;font-style:normal;font-weight:normal;color:#000000;background-color:#ffffff;}\n"
+			                    "</style>\n"
+			                    "</head>\n"
+			                    "<body>\n"
+			                    "<h1>Joe the cat</h1>\n"
+			                    "<p>On Saturday, joe the cat went to the store. He climbed up a mountain? It was weird. The store was called Food Store</p>\n"
+			                    "</body>\n"
+			                    "</html>" );
+
+	Parser parser ( urlFrontierTest );
+	auto dictionary = parser.execute ( &document );
+	assert( dictionary != nullptr );
+	cout << "Parser Tests Passed! :D" << endl;
+
+	}
+
diff --git a/shared/Document.h b/shared/Document.h
new file mode 100644
index 0000000..5aca64d
--- /dev/null
+++ b/shared/Document.h
@@ -0,0 +1,129 @@
+//
+// Created by Jake Close on 2/8/18.
+//
+
+#pragma once
+
+#include "url.h"
+#include <string>
+#include <vector>
+#include <pthread.h>
+
+using namespace std;
+
+namespace filepath
+	{
+		const char *DOC_MAP = "/docMap.txt";
+	}
+
+
+pthread_mutex_t docMap_mutex = PTHREAD_MUTEX_INITIALIZER;
+
+class Document
+	{
+private:
+	ParsedUrl url;
+	long docID;
+	bool lastCrawlStatus;
+	int lastCrawlDate;
+	int lastCrawlPageCount;
+
+	//add more info fields here
+
+public:
+	Document ( string url_in ) : url ( ParsedUrl ( url_in ) )
+		{ }
+
+	string DocToString ( )
+		{
+		return string ( url.CompleteUrl, strlen ( url.CompleteUrl ) ) + "\n";
+		}
+
+	int WriteToDocMap ( )
+		{
+
+		pthread_mutex_lock ( &docMap_mutex );
+
+		//for now just write url
+
+		string loc = util::GetCurrentWorkingDir ( ) + filepath::DOC_MAP;
+		int file = util::getFileDescriptor ( loc.c_str ( ), "W" );
+		off_t resultPosition = 0;
+
+		try
+			{
+			//check if its available
+			if ( file == -1 )
+				{
+				throw ( "error opening docMap" );
+				}
+			else
+				{
+				//get the current size of the docMap
+				size_t seekPosition = util::FileSize ( file );
+				//seek to the end of the file
+				resultPosition = lseek ( file, seekPosition, SEEK_SET );
+
+				if ( resultPosition == -1 )
+					{
+					throw ( "Could not seek" );
+					}
+				cout << "Current docMap position on disk" << endl;
+				cout << resultPosition << endl;
+
+				size_t success = write ( file, this->DocToString ( ).c_str ( ),
+				                         strlen ( this->DocToString ( ).c_str ( ) ) );
+				if ( success == -1 )
+					{
+					throw ( "Error writing document object to document map" );
+					}
+				}
+			}
+		catch ( const char *str )
+			{
+			cerr << str << endl;
+			close ( file );
+			pthread_mutex_unlock ( &docMap_mutex );
+			return -1;
+			}
+		close ( file );
+		pthread_mutex_unlock ( &docMap_mutex );
+		return resultPosition;
+		}
+
+
+	static void PrintDocMap ( string url, int location )
+		{
+		pthread_mutex_lock ( &docMap_mutex );
+
+		std::cout << url << " is " << location;
+
+		string loc = util::GetCurrentWorkingDir ( ) + filepath::DOC_MAP;
+		int file = util::getFileDescriptor ( loc.c_str ( ), "R" );
+
+
+		//check if its available
+		if ( file )
+			{
+			off_t resultPosition = lseek ( file, ( size_t ) location, SEEK_SET );
+			int bytes = 14;
+			if ( bytes > 0 )
+				{
+				char *buffer = new char[bytes];
+				ssize_t bytesRead;
+				if ( bytesRead = read ( file, buffer, bytes ) )
+					write ( 1, buffer, bytesRead );
+				else
+					{
+					cerr << "Could not read " << bytes << " bytes at position " <<
+					     resultPosition << ", error = " << errno;
+					pthread_mutex_unlock ( &docMap_mutex );
+					return;
+					}
+				}
+
+			}
+		pthread_mutex_unlock ( &docMap_mutex );
+		return;
+		}
+	};
\ No newline at end of file
diff --git a/shared/documentMap.h b/shared/documentMap.h
deleted file mode 100644
index 720854e..0000000
--- a/shared/documentMap.h
+++ /dev/null
@@ -1,121 +0,0 @@
-//
-// Created by Jake Close on 2/8/18.
-//
-
-#pragma once
-
-#include "url.h"
-#include <string>
-#include <vector>
-#include <pthread.h>
-
-using namespace std;
-
-namespace filepath
-	{
-		const char* DOC_MAP = "/docMap.txt";
-	}
-
-
-	pthread_mutex_t docMap_mutex = PTHREAD_MUTEX_INITIALIZER;
-
-class Document
-	{
-	private:
-		ParsedUrl url;
-		long docID;
-		bool lastCrawlStatus;
-		int lastCrawlDate;
-		int lastCrawlPageCount;
-
-		//add more info fields here
-
-	public:
-		Document(string url_in) : url(ParsedUrl(url_in)) {}
-
-		string DocToString()
-			{
-			return string(url.CompleteUrl, strlen(url.CompleteUrl)) + "\n";
-			}
-
-		int WriteToDocMap()
-			{
-
-			pthread_mutex_lock(&docMap_mutex);
-
-			//for now just write url
-
-			string loc = util::GetCurrentWorkingDir() + filepath::DOC_MAP;
-			int file = util::getFileDescriptor(loc.c_str(), "W");
-			off_t resultPosition = 0;
-
-			try {
-				//check if its available
-				if (file == -1) {
-					throw("error opening docMap");
-				} else {
-					//get the current size of the docMap
-					size_t seekPosition = util::FileSize(file);
-					//seek to the end of the file
-					resultPosition = lseek(file, seekPosition, SEEK_SET);
-
-					if (resultPosition == -1) {
-						throw("Could not seek");
-					}
-					cout << "Current docMap position on disk" << endl;
-					cout << resultPosition << endl;
-
-					size_t success = write(file, this->DocToString().c_str(), strlen(this->DocToString().c_str()));
-					if (success == -1) {
-						throw("Error writing document object to document map");
-					}
-				}
-			}
-			catch(const char* str){
-				cerr << str << endl;
-				close(file);
-				pthread_mutex_unlock(&docMap_mutex);
-				return -1;
-			}
-			close( file );
-			pthread_mutex_unlock(&docMap_mutex);
-			return resultPosition;
-			}
-
-
-
-		static void PrintDocMap(string url, int location)
-			{
-			pthread_mutex_lock(&docMap_mutex);
-
-			std::cout << url << " is " << location;
-
-			string loc = util::GetCurrentWorkingDir() + filepath::DOC_MAP;
-			int file = util::getFileDescriptor( loc.c_str(), "R" );
-
-
-			//check if its available
-			if ( file )
-			{
-				off_t resultPosition = lseek( file, (size_t)location, SEEK_SET );
-				int bytes = 14;
-				if ( bytes > 0 )
-				{
-					char *buffer = new char[bytes];
-					ssize_t bytesRead;
-					if ( bytesRead = read( file, buffer, bytes ))
-						write( 1, buffer, bytesRead );
-					else
-					{
-						cerr << "Could not read " << bytes << " bytes at position " <<
-							 resultPosition << ", error = " << errno;
-						pthread_mutex_unlock(&docMap_mutex);
-						return;
-					}
-				}
-
-			}
-			pthread_mutex_unlock(&docMap_mutex);
-			return;
-			}
-	};
\ No newline at end of file
diff --git a/util/Stemmer.h b/util/Stemmer.h
new file mode 100644
index 0000000..84e1990
--- /dev/null
+++ b/util/Stemmer.h
@@ -0,0 +1,15 @@
+//
+// Created by Veronica Day on 2/22/18.
+//
+
+#ifndef EECS398_SEARCH_STEMMER_H
+#define EECS398_SEARCH_STEMMER_H
+
+
+class Stemmer
+	{
+
+	};
+
+
+#endif //EECS398_SEARCH_STEMMER_H
diff --git a/parser/Tokenizer.h b/util/Tokenizer.h
similarity index 95%
rename from parser/Tokenizer.h
rename to util/Tokenizer.h
index a3443fe..3e28002 100644
--- a/parser/Tokenizer.h
+++ b/util/Tokenizer.h
@@ -5,7 +5,7 @@
 #include <string>
 #include <unordered_map>
 #include <vector>
-#include "../util/stringProcessing.h"
+#include "stringProcessing.h"
 
 using namespace std;
 
diff --git a/util/stringProcessing.h b/util/stringProcessing.h
index 8c746f4..0afdee9 100644
--- a/util/stringProcessing.h
+++ b/util/stringProcessing.h
@@ -13,46 +13,56 @@
 
 using namespace std;
 
-
-/*
- * Takes in an iterator to the original text and a substring: specifically for a parser functionality
- * Potentially make one that takes in two strings? Is this needed?
+/**
+ * Set of stopwords
+ */
+set< string > stopWords = { "a", "all", "an", "and", "any", "are", "as", "at", "be", "been", "but", "by", "few", "from",
+                            "for", "have", "he", "her", "here", "him", "his", "how",
+                            "i", "in", "is", "it", "its", "many ", "me", "my", "none", "of", "on", "or", "our", "she",
+                            "some", "the", "their", "them", "there", "they", "that",
+                            "this", "to", "us", "was", "what", "when", "where", "which", "who", "why", "will", "with",
+                            "you", "your" };
+/**
+ * Finds the needle in the haystack
+ * @param haystack
+ * @param needle
+ * @return
  */
-string::iterator findStr ( string::iterator originalText, string & subStr )
+string::iterator findStr ( string haystack, string needle )
 	{
 
-	auto begin_sub = subStr.begin ( );
-	auto begin_original = originalText;
+	auto beginNeedle = needle.begin ( );
+	auto beginHaystack = haystack.begin();
 
-	while ( *begin_original != '\0' ) //*(forward++) != '\0'
+	while ( *beginHaystack != '\0' )
 		{
 		//keep looking for instance of a match
-		if ( *begin_original != *begin_sub )
+		if ( *beginHaystack != *beginNeedle )
 			{
-			++begin_original;
+			++beginHaystack;
 			}
 
-		else if ( *begin_original == *begin_sub )
+		else if ( *beginHaystack == *beginNeedle )
 			{
 			/* want to keep the original iterator where it is so it
 				can return the beginning of the matched word if found */
-			auto temp = begin_original;
-			while ( *temp == *begin_sub )
+			auto temp = beginHaystack;
+			while ( *temp == *beginNeedle )
 				{
 				++temp;
-				++begin_sub;
-				//if it hits the end of the substring, it signifies an exact match
-				if ( *begin_sub == '\0' )
+				++beginNeedle;
+				//if it hits the end of the needleing, it signifies an exact match
+				if ( *beginNeedle == '\0' )
 					{
 					//this is pointing at the beginning of the match
-					return begin_original;
+					return beginHaystack;
 					}
 
 				}
 			//need to reset because still has to search rest of the string for a match
-			begin_sub = subStr.begin ( );
+			beginNeedle = needle.begin ( );
 			//sets the original text pointer to where the last search left off
-			begin_original = temp;
+			beginHaystack = temp;
 			}
 
 		else
@@ -61,18 +71,19 @@ string::iterator findStr ( string::iterator originalText, string & subStr )
 			}
 		}
 
-	return begin_original;
+	return beginHaystack;
 
 	}
 
-set< string > stopWords = { "a", "all", "an", "and", "any", "are", "as", "at", "be", "been", "but", "by", "few", "from",
-                            "for", "have", "he", "her", "here", "him", "his", "how",
-                            "i", "in", "is", "it", "its", "many ", "me", "my", "none", "of", "on", "or", "our", "she",
-                            "some", "the", "their", "them", "there", "they", "that",
-                            "this", "to", "us", "was", "what", "when", "where", "which", "who", "why", "will", "with",
-                            "you", "your" };
 
-vector< string > splitStr ( string & originalText, char delim )
+
+/**
+ * Returns a vector of strings from @originalText, split by @delim
+ * @param originalText
+ * @param delim
+ * @return
+ */
+vector< string > splitStr ( string originalText, char delim )
 	{
 	vector< string > splitWords;
 	auto begin = originalText.begin ( );
@@ -93,14 +104,22 @@ vector< string > splitStr ( string & originalText, char delim )
 	return splitWords;
 
 	}
-
-bool isStopWord ( string & word )
+/**
+ * Returns true if @word is a stopword
+ * @param word
+ * @return
+ */
+bool isStopWord ( string word )
 	{
 	return ( stopWords.find ( word ) != stopWords.end ( ) );
 
 	}
-
-string toLower ( string & word )
+/**
+ * Returns lowercase @word
+ * @param word
+ * @return
+ */
+string toLower ( string word )
 	{
 	auto iter = word.begin ( );
 	string lowerWord = "";
@@ -121,4 +140,15 @@ string toLower ( string & word )
 	return lowerWord;
 	}
 
+//TODO
+/**
+ * Returns stemmed @word
+ * @param word
+ * @return
+ */
+string stemWord(string word)
+	{
+	return "";
+	}
+
 #endif //EECS398_SEARCH_STRINGPROCESSING_H
diff --git a/util/tests/stemmerTest.cpp b/util/tests/stemmerTest.cpp
new file mode 100644
index 0000000..f942e1a
--- /dev/null
+++ b/util/tests/stemmerTest.cpp
@@ -0,0 +1,4 @@
+//
+// Created by Veronica Day on 2/22/18.
+//
+
diff --git a/parser/tests/StringProcessing_unit.cpp b/util/tests/stringProcessingTest.cpp
similarity index 56%
rename from parser/tests/StringProcessing_unit.cpp
rename to util/tests/stringProcessingTest.cpp
index 3643119..3562e92 100644
--- a/parser/tests/StringProcessing_unit.cpp
+++ b/util/tests/stringProcessingTest.cpp
@@ -4,19 +4,19 @@
 
 #include <string>
 #include <vector>
-#include "../../util/stringProcessing.h"
+#include "../stringProcessing.h"
 #include <iostream>
 #include <cassert>
 
 using namespace std;
 
-void test_findStr ( string original );
+void testFindStr ( string original );
 
-void test_splitStr ( string original );
+void testSplitStr ( string original );
 
-void test_toLower ( );
+void testToLower ( );
 
-void test_isStopWord ( );
+void testIsStopWord ( );
 
 int main ( )
 	{
@@ -27,66 +27,46 @@ int main ( )
 			"The point of using Lorem Ipsum is that it has a more-or-less normal distribution of letters, as opposed to using 'Content here, content here',"
 			"making it look like readable English. ";
 
-	test_findStr ( original );
-	test_splitStr ( original );
-	test_toLower ( );
-	test_isStopWord ( );
+	testFindStr ( original );
+	testSplitStr ( original );
+	testToLower ( );
+	testIsStopWord ( );
 
 	cout << "\nTests passed for StringProcessing_unit :D" << endl;
 
 	}
 
-void test_findStr ( string original )
+void testFindStr ( string original )
 	{
 	cout << "Testing findStr..." << endl;
-
-	string find = "established";
-	auto word = findStr ( original.begin ( ), find );
-	assert( *word == 'e' );
-
-	find = "Lorem Ipsum";
-	auto word2 = findStr ( original.begin ( ), find );
-	assert( *word2 == 'L' );
+	assert( *findStr ( original, "established" ) == 'e' );
+	assert( *findStr ( original, "Lorem Ipsum" ) == 'L' );
 
 	string title = "<title> This is a test </title>";
-	find = "<title>";
-	auto word3 = findStr ( title.begin ( ), find );
-	assert( *word3 == '<' );
+	auto word = findStr ( title, "<title>" );
+	assert( *word == '<' );
 	auto titleIt = title.begin ( );
-	while ( word3 != title.end ( ) && titleIt != title.end ( ) )
+	while ( word != title.end ( ) && titleIt != title.end ( ) )
 		{
-		assert( *word3 == *titleIt );
-		++word3;
+		assert( *word == *titleIt );
+		++word;
 		++titleIt;
 		}
 
-	find = "</title>";
-	auto word4 = findStr ( title.begin ( ), find );
-	assert( *word4 == '<' && *( word4 + 1 ) == '/' );
-
-	auto word0 = findStr ( original.begin ( ), find );
-	assert( *word0 == '\0' );
-
-	find = "orange";
-	auto word5 = findStr ( original.begin ( ), find );
-	assert( *word5 == '\0' );
-
-	find = "orange";
-	string test = "apple";
-	auto word7 = findStr ( test.begin ( ), find );
-	assert( *word7 == '\0' );
-
-	find = "bird";
-	test = "bigbird";
-	auto word6 = findStr ( test.begin ( ), find );
-	assert( *word6 == 'b' && *( word6 + 1 ) == 'i' && *( word6 + 2 ) == 'r' );
+	auto word1 = findStr ( title, "</title>" );
+	assert( *word1 == '<' && *( word1 + 1 ) == '/' );
+	assert( *findStr ( original, "</title>" ) == '\0' );
+	assert( *findStr ( original, "orange" ) == '\0' );
+	assert( *findStr ( "apple", "orange" ) == '\0' );
+	auto word2 = findStr ( "bigbird", "bird" );
+	assert( *word2 == 'b' && *( word2 + 1 ) == 'i' && *( word2 + 2 ) == 'r' );
 
-	cout << "test_findStr passed" << endl;
+	cout << "testFindStr passed" << endl;
 
 	}
 
 
-void test_splitStr ( string original )
+void testSplitStr ( string original )
 	{
 	cout << "Testing splitStr..." << endl;
 
@@ -98,12 +78,12 @@ void test_splitStr ( string original )
 	assert( vec.size ( ) == 2 );
 	assert( vec[ 0 ] == "hello" && vec[ 1 ] == "goodbye" );
 
-	cout << "test_splitStr passed" << endl;
+	cout << "testSplitStr passed" << endl;
 
 	}
 
 
-void test_toLower ( )
+void testToLower ( )
 	{
 	cout << "Testing toLower..." << endl;
 
@@ -126,11 +106,11 @@ void test_toLower ( )
 	assert ( test4 == "" );
 	assert ( test5 == " " );
 
-	cout << "test_toLower passed" << endl;
+	cout << "testToLower passed" << endl;
 	}
 
 
-void test_isStopWord ( )
+void testIsStopWord ( )
 	{
 	cout << "Testing isStopWord..." << endl;
 
@@ -146,6 +126,6 @@ void test_isStopWord ( )
 	assert ( !isStopWord ( blank ) );
 	assert ( !isStopWord ( blank2 ) );
 
-	cout << "test_isStopWord passed" << endl;
+	cout << "testIsStopWord passed" << endl;
 
 	}
\ No newline at end of file
diff --git a/parser/tests/TokenizerTest_unit.cpp b/util/tests/tokenizerTest.cpp
similarity index 81%
rename from parser/tests/TokenizerTest_unit.cpp
rename to util/tests/tokenizerTest.cpp
index bc189c5..0ccb13b 100644
--- a/parser/tests/TokenizerTest_unit.cpp
+++ b/util/tests/tokenizerTest.cpp
@@ -10,7 +10,7 @@
 
 using namespace std;
 
-void test_execute ( string original );
+void testExecute ( string original );
 
 
 int main ( )
@@ -22,18 +22,18 @@ int main ( )
 			"The point of using Lorem Ipsum is that it has a more-or-less normal distribution of letters, as opposed to using 'Content here, content here',"
 			"making it look like readable English. ";
 
-	test_execute ( original );
+	testExecute ( original );
 
 	cout << "\nTests passed for TokenizerTest_unit :D" << endl;
 
 	}
 
-void test_execute ( string original )
+void testExecute ( string original )
 	{
-	Tokenizer my_tokenizer;
-	my_tokenizer.execute ( original );
+	Tokenizer myTokenizer;
+	myTokenizer.execute ( original );
 
-	auto dict = my_tokenizer.get ( );
+	auto dict = myTokenizer.get ( );
 
 	for ( auto it = dict->begin ( ); it != dict->end ( ); it++ )
 		{
-- 
GitLab