diff --git a/.DS_Store b/.DS_Store index b13db4a0192f72a9f55abaca384c4156fb1366c4..407ff4f142413934bf7b250f292da7433a0250ce 100644 Binary files a/.DS_Store and b/.DS_Store differ diff --git a/CMakeLists.txt b/CMakeLists.txt index 721b631ce623a1a75a8d9f2e6eec338dd9b6b1c7..9016caf60a3fa8ccb51a8b35a1c8373d8b5933cc 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -16,8 +16,8 @@ add_executable(crawler-parser-test crawler/LocalReader.h shared/Document.cpp parser/Parser.cpp - util/Tokenizer.cpp util/Stemmer.cpp + util/Tokenizer.cpp util/stringProcessing.cpp) @@ -53,7 +53,7 @@ add_executable(URLTEST shared/url.h shared/urlTest.cpp) add_executable(search-engine search.cpp query/Query.cpp) -add_executable(ISRWord-tests constraintSolver/tests/ISRWordTests.cpp) +add_executable(ISRWord-tests util/util.cpp constraintSolver/ISRWord.cpp constraintSolver/tests/ISRWordTests.cpp ) find_package(OpenSSL REQUIRED) diff --git a/StringProcessingTest b/StringProcessingTest deleted file mode 100755 index 0707c11d67c865df0c9d81b982de47d211cb2088..0000000000000000000000000000000000000000 Binary files a/StringProcessingTest and /dev/null differ diff --git a/URLTEST b/URLTEST deleted file mode 100755 index e4798c6edbdcb8295d737208bf37be67f1373d99..0000000000000000000000000000000000000000 Binary files a/URLTEST and /dev/null differ diff --git a/constraintSolver/ISROr.cpp b/constraintSolver/ISROr.cpp index 9e9ac82e15fd3486aab7d268dba1e1976b0d1399..3bc1215374b31ca59bc949e4c1dd72cb21db654d 100644 --- a/constraintSolver/ISROr.cpp +++ b/constraintSolver/ISROr.cpp @@ -3,3 +3,68 @@ // #include "ISROr.h" + + +Location ISROr::GetStartLocation( ) + { + return nearestStartLocation; + } + + + +Location ISROr::GetEndLocation( ) + { + return nearestEndLocation; + } + + +Location ISROr::Seek( Location target ) + { + + // Seek all the ISRs to the first occurrence beginning at// the target location. Return null if there is no match. + // The document is the document containing the nearest term. + //seek past target locations, + //seek all terms in or past starting location, take the ones that nears + //the document that the nearest term is in is the document ur in + //updates private members + + + + + + + } +/* +Returns the location of the next document that is a match +*/ +ISR* ISROr::Next() + { + Location nearestEnd = this->nearestTerm->GetEndDocument(); + + while(*Terms) + { + Location newSeekLocation = *Terms->Seek(nearestEnd + 1); + if(newSeekLocation < nearestStartLocation) + { + nearestStartLocation = newSeekLocation; + nearestTerm = *Term; + } + *Terms++; + } + + return this->nearestTerm->GetDocumentISR(); + + + + + + + } + +ISR* ISROR::GetCurrentEndDoc( ){ + + return this->nearestTerm->GetDocumentISR(); + + } + + diff --git a/constraintSolver/ISROr.h b/constraintSolver/ISROr.h index fd4f5bf4ddd75e4fa51b9d36cf99f43b09f5f482..7927709f75e56ae7a5623c7f166cd95d8ce41e32 100644 --- a/constraintSolver/ISROr.h +++ b/constraintSolver/ISROr.h @@ -15,23 +15,46 @@ class ISROr : publicISR ISR ** Terms; unsigned NumberOfTerms; - Location GetStartLocation( );//{return nearestStartLocation;} - Location GetEndLocation( );// {return nearestEndLocation;} - Post* Seek( Location target); - // Seek all the ISRs to the first occurrence beginning at// the target location. Return null if there is no match. - // The document is the document containing the nearest term. - //seek past target locations, - //seek all terms in or past starting location, take the ones that nears - //the document that the nearest term is in is the document ur in - //updates private members - Post* Next( ); + Location GetStartLocation( ); + Location GetEndLocation( ); + Location Seek( Location target); + ISR* GetCurrentEndDoc( ); + Location Next( ); //{ Do a next on the nearest term, then return// the new nearest match.} //next on nearest term, return nearest temr - Post* NextDocument( ); + + + Location NextDocument( ); // // { Seek all the ISRs to the first occurrence just past the end of this document.returnSeek( DocumentEnd->GetEndLocation( ) + 1 );} + + + ISROr(ISR ** InputTerms) : Terms(InputTerms) { + + ISR* currentTerm = *InputTerms; + While( *currentTerm ) + { + currentTerm->First(); + Location currentLocation = currentTerm->currentLocation; + if(currentLocation < nearestStartLocation ) + { + nearestTerm = currentTerm; + nearestStartLocation = currentLocation; + + } + if(currentLocation > nearestEndLocation) + { + nearestEndLocation = currentLocation; + } + ++NumberOfTerms; + *currentTerm++; + + } + + } + private: - unsigned nearestTerm; + ISR* nearestTerm; // nearStartLocation and nearestEndLocation are// the start and end of the nearestTerm. Location nearestStartLocation, nearestEndLocation; diff --git a/constraintSolver/ISRWord.cpp b/constraintSolver/ISRWord.cpp index ba1ba7f0e3091ca6411300a170c6f64e67940963..a3497302d8e5c355e91e5fb0a03a2bf887af7e04 100644 --- a/constraintSolver/ISRWord.cpp +++ b/constraintSolver/ISRWord.cpp @@ -4,6 +4,8 @@ #include "ISRWord.h" +using namespace std; + ISRWord::ISRWord(char* word) : term(word) { getChunks(); currentChunk = 0; @@ -15,7 +17,10 @@ vector<size_t> ISRWord::getSeekContents(string fileName) { int file = open(fileName.c_str(), O_RDONLY); ssize_t fileSize = FileSize(file); vector<size_t> contents; + + char* memMap = (char*) mmap(nullptr, fileSize, PROT_READ, MAP_PRIVATE, file, 0); + // char* memMap = util::getFileMap(fileName); string word = ""; bool midWord = false; bool midFind = false; @@ -50,6 +55,7 @@ vector<size_t> ISRWord::getSeekContents(string fileName) { } void ISRWord::getChunks() { + listOfChunks = getSeekContents("index-test-files/twitter/index-master.txt"); // int chunkFile = open("index-test-files/twitter/index-master.txt", O_RDONLY); // ssize_t chunkFileSize = FileSize(chunkFile); diff --git a/constraintSolver/ISRWord.h b/constraintSolver/ISRWord.h index 5a1a6b21437df3049abf41f95515df1805e1bc36..e0eb8c86f7660a412d3d5ba8f39500dd441f705c 100644 --- a/constraintSolver/ISRWord.h +++ b/constraintSolver/ISRWord.h @@ -14,6 +14,9 @@ #include <sys/stat.h> #include <sys/types.h> +//#include "../util/util.h" + + size_t FileSize(int f) { struct stat fileInfo; fstat( f, &fileInfo); @@ -52,6 +55,8 @@ public: //set member variables to all of the chunks that occur, update current chunk void getChunks(); + + private: }; diff --git a/constraintSolver/tests/ISRWordTests.cpp b/constraintSolver/tests/ISRWordTests.cpp index 47d9f9b79679a30b0f00f71d63c646b2d0f861f3..d34c5ea37b28347d43470c73532d048e25f61cf6 100644 --- a/constraintSolver/tests/ISRWordTests.cpp +++ b/constraintSolver/tests/ISRWordTests.cpp @@ -6,10 +6,14 @@ #include "../ISRWord.h" #include "../ISRWord.cpp" + using namespace std; int main() { - ISRWord word = ISRWord("hello"); + char* w = new char [ 10 ]; + strcpy(w, "hello"); + ISRWord word = ISRWord(w); + while(1) { cout << word.next() << endl; } diff --git a/crawler-parser-test b/crawler-parser-test index 96073de455b2c3109363bb47103b3dbd7f53a204..e57fee527da5971f210dfae7dcc06d26661452a1 100755 Binary files a/crawler-parser-test and b/crawler-parser-test differ diff --git a/crawler/crawler.h b/crawler/crawler.h index 671acb2a879c45cdf2f25bdaa65e1932a8a59f1b..9b6c93880c28fa6d8c3575561bd9e2a479404c0f 100644 --- a/crawler/crawler.h +++ b/crawler/crawler.h @@ -16,7 +16,7 @@ class Crawler { public: - Crawler( string mode_in, ProducerConsumerQueue < string > *url_q_in ) + Crawler( string mode_in, ProducerConsumerQueue < ParsedUrl > *url_q_in ) : mode( mode_in ), urlFrontier( url_q_in ) { }; @@ -30,7 +30,7 @@ public: private: vector < Spider * > spiders; - ProducerConsumerQueue < string > *urlFrontier; + ProducerConsumerQueue < ParsedUrl > *urlFrontier; //CrawlerStatistics housekeeper; string mode; diff --git a/crawler/spider.cpp b/crawler/spider.cpp index d377fab22f4ecf9212037b5d95b2a0775c43005c..33333811ed07a68368c6143608a02045429e51c2 100644 --- a/crawler/spider.cpp +++ b/crawler/spider.cpp @@ -30,7 +30,7 @@ size_t Spider::hash(const char * s){ } -string Spider::getUrl() +ParsedUrl Spider::getUrl() { return urlFrontier->Pop( ); } @@ -46,9 +46,9 @@ void Spider::FuncToRun() { - string stringUrl = getUrl( ); //get url from url frontier + // ParsedUrl stringUrl = getUrl( ); //get url from url frontier char *fileMap; - ParsedUrl currentUrl = ParsedUrl(stringUrl); + ParsedUrl currentUrl = getUrl(); //url has not seen before or time since seen is past certain criteria if ( shouldURLbeCrawled( currentUrl )) { diff --git a/crawler/spider.h b/crawler/spider.h index 6b589bfe1127d9245a9790c1f934c767f84b03fd..cb58d793886d557e8dc145ff42aa5b56f32bf77f 100644 --- a/crawler/spider.h +++ b/crawler/spider.h @@ -23,7 +23,7 @@ class Spider : public ThreadClass public: - Spider( string mode_in, ProducerConsumerQueue < string > *url_q_in, + Spider( string mode_in, ProducerConsumerQueue < ParsedUrl > *url_q_in, unordered_map < string, int > *doc_map_lookup_in ) : mode( mode_in ), urlFrontier( url_q_in ), docMapLookup( doc_map_lookup_in ), parser( url_q_in) { @@ -31,7 +31,7 @@ public: //Takes a url off of the url frontier - string getUrl(); + ParsedUrl getUrl(); virtual void FuncToRun(); @@ -51,7 +51,7 @@ public: private: int locationOnDisk; - ProducerConsumerQueue < string > *urlFrontier; + ProducerConsumerQueue < ParsedUrl > *urlFrontier; string mode; unordered_map < string, int > *docMapLookup; Parser parser; diff --git a/main.cpp b/main.cpp index 94a8f9f1312ee31bf475368ab2073649160144f6..29b800a0163d9457e9bf7ebb09dc9474fce10a64 100644 --- a/main.cpp +++ b/main.cpp @@ -89,7 +89,7 @@ int main( int argc, char *argv[] ) bool restoreFromLog; - ProducerConsumerQueue < string > urlFrontier; + ProducerConsumerQueue < ParsedUrl > urlFrontier; cout << "Pushed File\n"; char *seeds; @@ -104,7 +104,8 @@ int main( int argc, char *argv[] ) if ( *seeds == '\n') { cout << "Pushing to Url Frontier..." << endl; - urlFrontier.Push(testFile); + ParsedUrl url = ParsedUrl(testFile); + urlFrontier.Push(url); testFile = ""; } diff --git a/makefile b/makefile index 9b55638f3a526bf4fbf94fcc75485773918350b5..48c2b284b540d8e068b0d9657486380df79e7350 100644 --- a/makefile +++ b/makefile @@ -111,17 +111,17 @@ depend: .PHONY : depend #============================================================================= -# Target rules for targets named URLTEST +# Target rules for targets named ISRWord-tests # Build rule for target. -URLTEST: cmake_check_build_system - $(MAKE) -f CMakeFiles/Makefile2 URLTEST -.PHONY : URLTEST +ISRWord-tests: cmake_check_build_system + $(MAKE) -f CMakeFiles/Makefile2 ISRWord-tests +.PHONY : ISRWord-tests # fast build rule for target. -URLTEST/fast: - $(MAKE) -f CMakeFiles/URLTEST.dir/build.make CMakeFiles/URLTEST.dir/build -.PHONY : URLTEST/fast +ISRWord-tests/fast: + $(MAKE) -f CMakeFiles/ISRWord-tests.dir/build.make CMakeFiles/ISRWord-tests.dir/build +.PHONY : ISRWord-tests/fast #============================================================================= # Target rules for targets named StemmerTest @@ -137,17 +137,17 @@ StemmerTest/fast: .PHONY : StemmerTest/fast #============================================================================= -# Target rules for targets named ParserEndToEndTest +# Target rules for targets named StringProcessingTest # Build rule for target. -ParserEndToEndTest: cmake_check_build_system - $(MAKE) -f CMakeFiles/Makefile2 ParserEndToEndTest -.PHONY : ParserEndToEndTest +StringProcessingTest: cmake_check_build_system + $(MAKE) -f CMakeFiles/Makefile2 StringProcessingTest +.PHONY : StringProcessingTest # fast build rule for target. -ParserEndToEndTest/fast: - $(MAKE) -f CMakeFiles/ParserEndToEndTest.dir/build.make CMakeFiles/ParserEndToEndTest.dir/build -.PHONY : ParserEndToEndTest/fast +StringProcessingTest/fast: + $(MAKE) -f CMakeFiles/StringProcessingTest.dir/build.make CMakeFiles/StringProcessingTest.dir/build +.PHONY : StringProcessingTest/fast #============================================================================= # Target rules for targets named TokenizerTest @@ -163,17 +163,30 @@ TokenizerTest/fast: .PHONY : TokenizerTest/fast #============================================================================= -# Target rules for targets named StringProcessingTest +# Target rules for targets named URLTEST # Build rule for target. -StringProcessingTest: cmake_check_build_system - $(MAKE) -f CMakeFiles/Makefile2 StringProcessingTest -.PHONY : StringProcessingTest +URLTEST: cmake_check_build_system + $(MAKE) -f CMakeFiles/Makefile2 URLTEST +.PHONY : URLTEST # fast build rule for target. -StringProcessingTest/fast: - $(MAKE) -f CMakeFiles/StringProcessingTest.dir/build.make CMakeFiles/StringProcessingTest.dir/build -.PHONY : StringProcessingTest/fast +URLTEST/fast: + $(MAKE) -f CMakeFiles/URLTEST.dir/build.make CMakeFiles/URLTEST.dir/build +.PHONY : URLTEST/fast + +#============================================================================= +# Target rules for targets named ParserTest + +# Build rule for target. +ParserTest: cmake_check_build_system + $(MAKE) -f CMakeFiles/Makefile2 ParserTest +.PHONY : ParserTest + +# fast build rule for target. +ParserTest/fast: + $(MAKE) -f CMakeFiles/ParserTest.dir/build.make CMakeFiles/ParserTest.dir/build +.PHONY : ParserTest/fast #============================================================================= # Target rules for targets named search-engine @@ -201,6 +214,60 @@ crawler-parser-test/fast: $(MAKE) -f CMakeFiles/crawler-parser-test.dir/build.make CMakeFiles/crawler-parser-test.dir/build .PHONY : crawler-parser-test/fast +constraintSolver/ISRWord.o: constraintSolver/ISRWord.cpp.o + +.PHONY : constraintSolver/ISRWord.o + +# target to build an object file +constraintSolver/ISRWord.cpp.o: + $(MAKE) -f CMakeFiles/ISRWord-tests.dir/build.make CMakeFiles/ISRWord-tests.dir/constraintSolver/ISRWord.cpp.o +.PHONY : constraintSolver/ISRWord.cpp.o + +constraintSolver/ISRWord.i: constraintSolver/ISRWord.cpp.i + +.PHONY : constraintSolver/ISRWord.i + +# target to preprocess a source file +constraintSolver/ISRWord.cpp.i: + $(MAKE) -f CMakeFiles/ISRWord-tests.dir/build.make CMakeFiles/ISRWord-tests.dir/constraintSolver/ISRWord.cpp.i +.PHONY : constraintSolver/ISRWord.cpp.i + +constraintSolver/ISRWord.s: constraintSolver/ISRWord.cpp.s + +.PHONY : constraintSolver/ISRWord.s + +# target to generate assembly for a file +constraintSolver/ISRWord.cpp.s: + $(MAKE) -f CMakeFiles/ISRWord-tests.dir/build.make CMakeFiles/ISRWord-tests.dir/constraintSolver/ISRWord.cpp.s +.PHONY : constraintSolver/ISRWord.cpp.s + +constraintSolver/tests/ISRWordTests.o: constraintSolver/tests/ISRWordTests.cpp.o + +.PHONY : constraintSolver/tests/ISRWordTests.o + +# target to build an object file +constraintSolver/tests/ISRWordTests.cpp.o: + $(MAKE) -f CMakeFiles/ISRWord-tests.dir/build.make CMakeFiles/ISRWord-tests.dir/constraintSolver/tests/ISRWordTests.cpp.o +.PHONY : constraintSolver/tests/ISRWordTests.cpp.o + +constraintSolver/tests/ISRWordTests.i: constraintSolver/tests/ISRWordTests.cpp.i + +.PHONY : constraintSolver/tests/ISRWordTests.i + +# target to preprocess a source file +constraintSolver/tests/ISRWordTests.cpp.i: + $(MAKE) -f CMakeFiles/ISRWord-tests.dir/build.make CMakeFiles/ISRWord-tests.dir/constraintSolver/tests/ISRWordTests.cpp.i +.PHONY : constraintSolver/tests/ISRWordTests.cpp.i + +constraintSolver/tests/ISRWordTests.s: constraintSolver/tests/ISRWordTests.cpp.s + +.PHONY : constraintSolver/tests/ISRWordTests.s + +# target to generate assembly for a file +constraintSolver/tests/ISRWordTests.cpp.s: + $(MAKE) -f CMakeFiles/ISRWord-tests.dir/build.make CMakeFiles/ISRWord-tests.dir/constraintSolver/tests/ISRWordTests.cpp.s +.PHONY : constraintSolver/tests/ISRWordTests.cpp.s + crawler/SocketReader.o: crawler/SocketReader.cpp.o .PHONY : crawler/SocketReader.o @@ -315,7 +382,7 @@ parser/Parser.o: parser/Parser.cpp.o # target to build an object file parser/Parser.cpp.o: - $(MAKE) -f CMakeFiles/ParserEndToEndTest.dir/build.make CMakeFiles/ParserEndToEndTest.dir/parser/Parser.cpp.o + $(MAKE) -f CMakeFiles/ParserTest.dir/build.make CMakeFiles/ParserTest.dir/parser/Parser.cpp.o $(MAKE) -f CMakeFiles/crawler-parser-test.dir/build.make CMakeFiles/crawler-parser-test.dir/parser/Parser.cpp.o .PHONY : parser/Parser.cpp.o @@ -325,7 +392,7 @@ parser/Parser.i: parser/Parser.cpp.i # target to preprocess a source file parser/Parser.cpp.i: - $(MAKE) -f CMakeFiles/ParserEndToEndTest.dir/build.make CMakeFiles/ParserEndToEndTest.dir/parser/Parser.cpp.i + $(MAKE) -f CMakeFiles/ParserTest.dir/build.make CMakeFiles/ParserTest.dir/parser/Parser.cpp.i $(MAKE) -f CMakeFiles/crawler-parser-test.dir/build.make CMakeFiles/crawler-parser-test.dir/parser/Parser.cpp.i .PHONY : parser/Parser.cpp.i @@ -335,7 +402,7 @@ parser/Parser.s: parser/Parser.cpp.s # target to generate assembly for a file parser/Parser.cpp.s: - $(MAKE) -f CMakeFiles/ParserEndToEndTest.dir/build.make CMakeFiles/ParserEndToEndTest.dir/parser/Parser.cpp.s + $(MAKE) -f CMakeFiles/ParserTest.dir/build.make CMakeFiles/ParserTest.dir/parser/Parser.cpp.s $(MAKE) -f CMakeFiles/crawler-parser-test.dir/build.make CMakeFiles/crawler-parser-test.dir/parser/Parser.cpp.s .PHONY : parser/Parser.cpp.s @@ -345,7 +412,7 @@ parser/tests/parserTest.o: parser/tests/parserTest.cpp.o # target to build an object file parser/tests/parserTest.cpp.o: - $(MAKE) -f CMakeFiles/ParserEndToEndTest.dir/build.make CMakeFiles/ParserEndToEndTest.dir/parser/tests/parserTest.cpp.o + $(MAKE) -f CMakeFiles/ParserTest.dir/build.make CMakeFiles/ParserTest.dir/parser/tests/parserTest.cpp.o .PHONY : parser/tests/parserTest.cpp.o parser/tests/parserTest.i: parser/tests/parserTest.cpp.i @@ -354,7 +421,7 @@ parser/tests/parserTest.i: parser/tests/parserTest.cpp.i # target to preprocess a source file parser/tests/parserTest.cpp.i: - $(MAKE) -f CMakeFiles/ParserEndToEndTest.dir/build.make CMakeFiles/ParserEndToEndTest.dir/parser/tests/parserTest.cpp.i + $(MAKE) -f CMakeFiles/ParserTest.dir/build.make CMakeFiles/ParserTest.dir/parser/tests/parserTest.cpp.i .PHONY : parser/tests/parserTest.cpp.i parser/tests/parserTest.s: parser/tests/parserTest.cpp.s @@ -363,7 +430,7 @@ parser/tests/parserTest.s: parser/tests/parserTest.cpp.s # target to generate assembly for a file parser/tests/parserTest.cpp.s: - $(MAKE) -f CMakeFiles/ParserEndToEndTest.dir/build.make CMakeFiles/ParserEndToEndTest.dir/parser/tests/parserTest.cpp.s + $(MAKE) -f CMakeFiles/ParserTest.dir/build.make CMakeFiles/ParserTest.dir/parser/tests/parserTest.cpp.s .PHONY : parser/tests/parserTest.cpp.s query/Query.o: query/Query.cpp.o @@ -426,7 +493,7 @@ shared/Document.o: shared/Document.cpp.o # target to build an object file shared/Document.cpp.o: - $(MAKE) -f CMakeFiles/ParserEndToEndTest.dir/build.make CMakeFiles/ParserEndToEndTest.dir/shared/Document.cpp.o + $(MAKE) -f CMakeFiles/ParserTest.dir/build.make CMakeFiles/ParserTest.dir/shared/Document.cpp.o $(MAKE) -f CMakeFiles/crawler-parser-test.dir/build.make CMakeFiles/crawler-parser-test.dir/shared/Document.cpp.o .PHONY : shared/Document.cpp.o @@ -436,7 +503,7 @@ shared/Document.i: shared/Document.cpp.i # target to preprocess a source file shared/Document.cpp.i: - $(MAKE) -f CMakeFiles/ParserEndToEndTest.dir/build.make CMakeFiles/ParserEndToEndTest.dir/shared/Document.cpp.i + $(MAKE) -f CMakeFiles/ParserTest.dir/build.make CMakeFiles/ParserTest.dir/shared/Document.cpp.i $(MAKE) -f CMakeFiles/crawler-parser-test.dir/build.make CMakeFiles/crawler-parser-test.dir/shared/Document.cpp.i .PHONY : shared/Document.cpp.i @@ -446,7 +513,7 @@ shared/Document.s: shared/Document.cpp.s # target to generate assembly for a file shared/Document.cpp.s: - $(MAKE) -f CMakeFiles/ParserEndToEndTest.dir/build.make CMakeFiles/ParserEndToEndTest.dir/shared/Document.cpp.s + $(MAKE) -f CMakeFiles/ParserTest.dir/build.make CMakeFiles/ParserTest.dir/shared/Document.cpp.s $(MAKE) -f CMakeFiles/crawler-parser-test.dir/build.make CMakeFiles/crawler-parser-test.dir/shared/Document.cpp.s .PHONY : shared/Document.cpp.s @@ -477,14 +544,88 @@ shared/urlTest.cpp.s: $(MAKE) -f CMakeFiles/URLTEST.dir/build.make CMakeFiles/URLTEST.dir/shared/urlTest.cpp.s .PHONY : shared/urlTest.cpp.s +util/Stemmer.o: util/Stemmer.cpp.o + +.PHONY : util/Stemmer.o + +# target to build an object file +util/Stemmer.cpp.o: + $(MAKE) -f CMakeFiles/StemmerTest.dir/build.make CMakeFiles/StemmerTest.dir/util/Stemmer.cpp.o + $(MAKE) -f CMakeFiles/StringProcessingTest.dir/build.make CMakeFiles/StringProcessingTest.dir/util/Stemmer.cpp.o + $(MAKE) -f CMakeFiles/TokenizerTest.dir/build.make CMakeFiles/TokenizerTest.dir/util/Stemmer.cpp.o + $(MAKE) -f CMakeFiles/ParserTest.dir/build.make CMakeFiles/ParserTest.dir/util/Stemmer.cpp.o + $(MAKE) -f CMakeFiles/crawler-parser-test.dir/build.make CMakeFiles/crawler-parser-test.dir/util/Stemmer.cpp.o +.PHONY : util/Stemmer.cpp.o + +util/Stemmer.i: util/Stemmer.cpp.i + +.PHONY : util/Stemmer.i + +# target to preprocess a source file +util/Stemmer.cpp.i: + $(MAKE) -f CMakeFiles/StemmerTest.dir/build.make CMakeFiles/StemmerTest.dir/util/Stemmer.cpp.i + $(MAKE) -f CMakeFiles/StringProcessingTest.dir/build.make CMakeFiles/StringProcessingTest.dir/util/Stemmer.cpp.i + $(MAKE) -f CMakeFiles/TokenizerTest.dir/build.make CMakeFiles/TokenizerTest.dir/util/Stemmer.cpp.i + $(MAKE) -f CMakeFiles/ParserTest.dir/build.make CMakeFiles/ParserTest.dir/util/Stemmer.cpp.i + $(MAKE) -f CMakeFiles/crawler-parser-test.dir/build.make CMakeFiles/crawler-parser-test.dir/util/Stemmer.cpp.i +.PHONY : util/Stemmer.cpp.i + +util/Stemmer.s: util/Stemmer.cpp.s + +.PHONY : util/Stemmer.s + +# target to generate assembly for a file +util/Stemmer.cpp.s: + $(MAKE) -f CMakeFiles/StemmerTest.dir/build.make CMakeFiles/StemmerTest.dir/util/Stemmer.cpp.s + $(MAKE) -f CMakeFiles/StringProcessingTest.dir/build.make CMakeFiles/StringProcessingTest.dir/util/Stemmer.cpp.s + $(MAKE) -f CMakeFiles/TokenizerTest.dir/build.make CMakeFiles/TokenizerTest.dir/util/Stemmer.cpp.s + $(MAKE) -f CMakeFiles/ParserTest.dir/build.make CMakeFiles/ParserTest.dir/util/Stemmer.cpp.s + $(MAKE) -f CMakeFiles/crawler-parser-test.dir/build.make CMakeFiles/crawler-parser-test.dir/util/Stemmer.cpp.s +.PHONY : util/Stemmer.cpp.s + +util/Tokenizer.o: util/Tokenizer.cpp.o + +.PHONY : util/Tokenizer.o + +# target to build an object file +util/Tokenizer.cpp.o: + $(MAKE) -f CMakeFiles/TokenizerTest.dir/build.make CMakeFiles/TokenizerTest.dir/util/Tokenizer.cpp.o + $(MAKE) -f CMakeFiles/ParserTest.dir/build.make CMakeFiles/ParserTest.dir/util/Tokenizer.cpp.o + $(MAKE) -f CMakeFiles/crawler-parser-test.dir/build.make CMakeFiles/crawler-parser-test.dir/util/Tokenizer.cpp.o +.PHONY : util/Tokenizer.cpp.o + +util/Tokenizer.i: util/Tokenizer.cpp.i + +.PHONY : util/Tokenizer.i + +# target to preprocess a source file +util/Tokenizer.cpp.i: + $(MAKE) -f CMakeFiles/TokenizerTest.dir/build.make CMakeFiles/TokenizerTest.dir/util/Tokenizer.cpp.i + $(MAKE) -f CMakeFiles/ParserTest.dir/build.make CMakeFiles/ParserTest.dir/util/Tokenizer.cpp.i + $(MAKE) -f CMakeFiles/crawler-parser-test.dir/build.make CMakeFiles/crawler-parser-test.dir/util/Tokenizer.cpp.i +.PHONY : util/Tokenizer.cpp.i + +util/Tokenizer.s: util/Tokenizer.cpp.s + +.PHONY : util/Tokenizer.s + +# target to generate assembly for a file +util/Tokenizer.cpp.s: + $(MAKE) -f CMakeFiles/TokenizerTest.dir/build.make CMakeFiles/TokenizerTest.dir/util/Tokenizer.cpp.s + $(MAKE) -f CMakeFiles/ParserTest.dir/build.make CMakeFiles/ParserTest.dir/util/Tokenizer.cpp.s + $(MAKE) -f CMakeFiles/crawler-parser-test.dir/build.make CMakeFiles/crawler-parser-test.dir/util/Tokenizer.cpp.s +.PHONY : util/Tokenizer.cpp.s + util/stringProcessing.o: util/stringProcessing.cpp.o .PHONY : util/stringProcessing.o # target to build an object file util/stringProcessing.cpp.o: - $(MAKE) -f CMakeFiles/ParserEndToEndTest.dir/build.make CMakeFiles/ParserEndToEndTest.dir/util/stringProcessing.cpp.o + $(MAKE) -f CMakeFiles/StemmerTest.dir/build.make CMakeFiles/StemmerTest.dir/util/stringProcessing.cpp.o $(MAKE) -f CMakeFiles/StringProcessingTest.dir/build.make CMakeFiles/StringProcessingTest.dir/util/stringProcessing.cpp.o + $(MAKE) -f CMakeFiles/TokenizerTest.dir/build.make CMakeFiles/TokenizerTest.dir/util/stringProcessing.cpp.o + $(MAKE) -f CMakeFiles/ParserTest.dir/build.make CMakeFiles/ParserTest.dir/util/stringProcessing.cpp.o $(MAKE) -f CMakeFiles/crawler-parser-test.dir/build.make CMakeFiles/crawler-parser-test.dir/util/stringProcessing.cpp.o .PHONY : util/stringProcessing.cpp.o @@ -494,8 +635,10 @@ util/stringProcessing.i: util/stringProcessing.cpp.i # target to preprocess a source file util/stringProcessing.cpp.i: - $(MAKE) -f CMakeFiles/ParserEndToEndTest.dir/build.make CMakeFiles/ParserEndToEndTest.dir/util/stringProcessing.cpp.i + $(MAKE) -f CMakeFiles/StemmerTest.dir/build.make CMakeFiles/StemmerTest.dir/util/stringProcessing.cpp.i $(MAKE) -f CMakeFiles/StringProcessingTest.dir/build.make CMakeFiles/StringProcessingTest.dir/util/stringProcessing.cpp.i + $(MAKE) -f CMakeFiles/TokenizerTest.dir/build.make CMakeFiles/TokenizerTest.dir/util/stringProcessing.cpp.i + $(MAKE) -f CMakeFiles/ParserTest.dir/build.make CMakeFiles/ParserTest.dir/util/stringProcessing.cpp.i $(MAKE) -f CMakeFiles/crawler-parser-test.dir/build.make CMakeFiles/crawler-parser-test.dir/util/stringProcessing.cpp.i .PHONY : util/stringProcessing.cpp.i @@ -505,8 +648,10 @@ util/stringProcessing.s: util/stringProcessing.cpp.s # target to generate assembly for a file util/stringProcessing.cpp.s: - $(MAKE) -f CMakeFiles/ParserEndToEndTest.dir/build.make CMakeFiles/ParserEndToEndTest.dir/util/stringProcessing.cpp.s + $(MAKE) -f CMakeFiles/StemmerTest.dir/build.make CMakeFiles/StemmerTest.dir/util/stringProcessing.cpp.s $(MAKE) -f CMakeFiles/StringProcessingTest.dir/build.make CMakeFiles/StringProcessingTest.dir/util/stringProcessing.cpp.s + $(MAKE) -f CMakeFiles/TokenizerTest.dir/build.make CMakeFiles/TokenizerTest.dir/util/stringProcessing.cpp.s + $(MAKE) -f CMakeFiles/ParserTest.dir/build.make CMakeFiles/ParserTest.dir/util/stringProcessing.cpp.s $(MAKE) -f CMakeFiles/crawler-parser-test.dir/build.make CMakeFiles/crawler-parser-test.dir/util/stringProcessing.cpp.s .PHONY : util/stringProcessing.cpp.s @@ -597,7 +742,8 @@ util/util.o: util/util.cpp.o # target to build an object file util/util.cpp.o: - $(MAKE) -f CMakeFiles/ParserEndToEndTest.dir/build.make CMakeFiles/ParserEndToEndTest.dir/util/util.cpp.o + $(MAKE) -f CMakeFiles/ISRWord-tests.dir/build.make CMakeFiles/ISRWord-tests.dir/util/util.cpp.o + $(MAKE) -f CMakeFiles/ParserTest.dir/build.make CMakeFiles/ParserTest.dir/util/util.cpp.o $(MAKE) -f CMakeFiles/crawler-parser-test.dir/build.make CMakeFiles/crawler-parser-test.dir/util/util.cpp.o .PHONY : util/util.cpp.o @@ -607,7 +753,8 @@ util/util.i: util/util.cpp.i # target to preprocess a source file util/util.cpp.i: - $(MAKE) -f CMakeFiles/ParserEndToEndTest.dir/build.make CMakeFiles/ParserEndToEndTest.dir/util/util.cpp.i + $(MAKE) -f CMakeFiles/ISRWord-tests.dir/build.make CMakeFiles/ISRWord-tests.dir/util/util.cpp.i + $(MAKE) -f CMakeFiles/ParserTest.dir/build.make CMakeFiles/ParserTest.dir/util/util.cpp.i $(MAKE) -f CMakeFiles/crawler-parser-test.dir/build.make CMakeFiles/crawler-parser-test.dir/util/util.cpp.i .PHONY : util/util.cpp.i @@ -617,7 +764,8 @@ util/util.s: util/util.cpp.s # target to generate assembly for a file util/util.cpp.s: - $(MAKE) -f CMakeFiles/ParserEndToEndTest.dir/build.make CMakeFiles/ParserEndToEndTest.dir/util/util.cpp.s + $(MAKE) -f CMakeFiles/ISRWord-tests.dir/build.make CMakeFiles/ISRWord-tests.dir/util/util.cpp.s + $(MAKE) -f CMakeFiles/ParserTest.dir/build.make CMakeFiles/ParserTest.dir/util/util.cpp.s $(MAKE) -f CMakeFiles/crawler-parser-test.dir/build.make CMakeFiles/crawler-parser-test.dir/util/util.cpp.s .PHONY : util/util.cpp.s @@ -628,14 +776,21 @@ help: @echo "... clean" @echo "... depend" @echo "... edit_cache" - @echo "... URLTEST" + @echo "... ISRWord-tests" @echo "... rebuild_cache" @echo "... StemmerTest" - @echo "... ParserEndToEndTest" - @echo "... TokenizerTest" @echo "... StringProcessingTest" + @echo "... TokenizerTest" + @echo "... URLTEST" + @echo "... ParserTest" @echo "... search-engine" @echo "... crawler-parser-test" + @echo "... constraintSolver/ISRWord.o" + @echo "... constraintSolver/ISRWord.i" + @echo "... constraintSolver/ISRWord.s" + @echo "... constraintSolver/tests/ISRWordTests.o" + @echo "... constraintSolver/tests/ISRWordTests.i" + @echo "... constraintSolver/tests/ISRWordTests.s" @echo "... crawler/SocketReader.o" @echo "... crawler/SocketReader.i" @echo "... crawler/SocketReader.s" @@ -666,6 +821,12 @@ help: @echo "... shared/urlTest.o" @echo "... shared/urlTest.i" @echo "... shared/urlTest.s" + @echo "... util/Stemmer.o" + @echo "... util/Stemmer.i" + @echo "... util/Stemmer.s" + @echo "... util/Tokenizer.o" + @echo "... util/Tokenizer.i" + @echo "... util/Tokenizer.s" @echo "... util/stringProcessing.o" @echo "... util/stringProcessing.i" @echo "... util/stringProcessing.s" diff --git a/parser/Parser.cpp b/parser/Parser.cpp index 3a70210d6ef859efe6001e724b37aa5dd5a8d8ca..15e1121aeb811c724409f01e89398b9b9dc96fe4 100644 --- a/parser/Parser.cpp +++ b/parser/Parser.cpp @@ -6,7 +6,7 @@ * Parser Cstor * @param urlFrontierIn */ -Parser::Parser ( ProducerConsumerQueue< string > *urlFrontierIn ) +Parser::Parser ( ProducerConsumerQueue< ParsedUrl > *urlFrontierIn ) { urlFrontier = urlFrontierIn; } @@ -67,7 +67,9 @@ void Parser::parse ( string html, ParsedUrl currentUrl, Tokenizer *tokenizer ) if ( isValid( url ) ) { // TODO ParsedUrl with anchor text - urlFrontier->Push( url ); + + ParsedUrl pUrl = ParsedUrl( url ); + urlFrontier->Push( pUrl ); cout << url << endl; } } diff --git a/parser/Parser.h b/parser/Parser.h index aa7740f62e4a6144d63ba1193ea0d7e50b83c0f9..828d7d1acb07d9b645b3e276445f145e828f9071 100644 --- a/parser/Parser.h +++ b/parser/Parser.h @@ -26,7 +26,7 @@ public: * Parser Cstor * @param urlFrontierIn */ - Parser ( ProducerConsumerQueue < string > * urlFrontierIn); + Parser ( ProducerConsumerQueue < ParsedUrl > * urlFrontierIn); /** @@ -37,7 +37,7 @@ public: private: - ProducerConsumerQueue < string >* urlFrontier; + ProducerConsumerQueue < ParsedUrl >* urlFrontier; /** * Parses file diff --git a/search-engine b/search-engine deleted file mode 100755 index 701ce491587160130b221b49b89471846ea24bdf..0000000000000000000000000000000000000000 Binary files a/search-engine and /dev/null differ diff --git a/util/util.cpp b/util/util.cpp index e31eb581d81179a4bb696caad009239e36030bf9..65cd15356a3f5c9d2d40a745791b43c236e17610 100644 --- a/util/util.cpp +++ b/util/util.cpp @@ -27,6 +27,7 @@ namespace util return open( fileName.c_str( ), O_WRONLY | O_CREAT, S_IRUSR | S_IWUSR ); } + return -1; }