diff --git a/CMakeLists.txt b/CMakeLists.txt index 5dcc7210cb225b59ca195fe34c54ed3a43670595..0519d88497ccf51eb5e5b262a382e68afce37502 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -40,6 +40,8 @@ add_executable(ParserEndToEndTest parser/tests/parserTest.cpp) +add_executable(URLTEST shared/url.h shared/urlTest.cpp) + find_package(OpenSSL REQUIRED) diff --git a/URLTEST b/URLTEST new file mode 100755 index 0000000000000000000000000000000000000000..e4798c6edbdcb8295d737208bf37be67f1373d99 Binary files /dev/null and b/URLTEST differ diff --git a/makefile b/makefile index 363ce8c2c5cc0ad19b036816ccb8e05b89135ecd..d807decbf544f097414a8abb92140b7e7f904dcf 100644 --- a/makefile +++ b/makefile @@ -110,6 +110,19 @@ depend: $(CMAKE_COMMAND) -H$(CMAKE_SOURCE_DIR) -B$(CMAKE_BINARY_DIR) --check-build-system CMakeFiles/Makefile.cmake 1 .PHONY : depend +#============================================================================= +# Target rules for targets named URLTEST + +# Build rule for target. +URLTEST: cmake_check_build_system + $(MAKE) -f CMakeFiles/Makefile2 URLTEST +.PHONY : URLTEST + +# fast build rule for target. +URLTEST/fast: + $(MAKE) -f CMakeFiles/URLTEST.dir/build.make CMakeFiles/URLTEST.dir/build +.PHONY : URLTEST/fast + #============================================================================= # Target rules for targets named ParserEndToEndTest @@ -432,6 +445,33 @@ shared/ProducerConsumerQueue.cpp.s: $(MAKE) -f CMakeFiles/search.dir/build.make CMakeFiles/search.dir/shared/ProducerConsumerQueue.cpp.s .PHONY : shared/ProducerConsumerQueue.cpp.s +shared/urlTest.o: shared/urlTest.cpp.o + +.PHONY : shared/urlTest.o + +# target to build an object file +shared/urlTest.cpp.o: + $(MAKE) -f CMakeFiles/URLTEST.dir/build.make CMakeFiles/URLTEST.dir/shared/urlTest.cpp.o +.PHONY : shared/urlTest.cpp.o + +shared/urlTest.i: shared/urlTest.cpp.i + +.PHONY : shared/urlTest.i + +# target to preprocess a source file +shared/urlTest.cpp.i: + $(MAKE) -f CMakeFiles/URLTEST.dir/build.make CMakeFiles/URLTEST.dir/shared/urlTest.cpp.i +.PHONY : shared/urlTest.cpp.i + +shared/urlTest.s: shared/urlTest.cpp.s + +.PHONY : shared/urlTest.s + +# target to generate assembly for a file +shared/urlTest.cpp.s: + $(MAKE) -f CMakeFiles/URLTEST.dir/build.make CMakeFiles/URLTEST.dir/shared/urlTest.cpp.s +.PHONY : shared/urlTest.cpp.s + util/tests/stemmerTest.o: util/tests/stemmerTest.cpp.o .PHONY : util/tests/stemmerTest.o @@ -547,6 +587,7 @@ help: @echo "... clean" @echo "... depend" @echo "... edit_cache" + @echo "... URLTEST" @echo "... ParserEndToEndTest" @echo "... rebuild_cache" @echo "... StemmerTest" @@ -579,6 +620,9 @@ help: @echo "... shared/ProducerConsumerQueue.o" @echo "... shared/ProducerConsumerQueue.i" @echo "... shared/ProducerConsumerQueue.s" + @echo "... shared/urlTest.o" + @echo "... shared/urlTest.i" + @echo "... shared/urlTest.s" @echo "... util/tests/stemmerTest.o" @echo "... util/tests/stemmerTest.i" @echo "... util/tests/stemmerTest.s" diff --git a/shared/ProducerConsumerQueue.h b/shared/ProducerConsumerQueue.h index d1ad2dd6585ce1171a914337c57152091b03c6e8..2647d1c6ff6d54d5dfbe362bb1b03b337db17378 100644 --- a/shared/ProducerConsumerQueue.h +++ b/shared/ProducerConsumerQueue.h @@ -20,7 +20,9 @@ private: public: ProducerConsumerQueue() {} - void Push(T obj); + + + void Push(T obj); T Pop(); size_t Size(); diff --git a/shared/url.h b/shared/url.h index 95c42e58bb53d90714fe952af3660650a3995953..44fc016fa6f79f31151e80e056677b83f7eafcd8 100644 --- a/shared/url.h +++ b/shared/url.h @@ -7,10 +7,18 @@ #include <string> #include <iostream> #include "../util/util.h" +#include <math.h> //#include "../crawler/SocketReader.h" using namespace std; +#define GOV ".gov" +#define COM ".com" +#define EDU ".edu" +#define ORG ".org" +#define NET ".net" +#define MIL ".mil" +#define INT ".int" @@ -25,6 +33,7 @@ public: *Host, *Domain, *Path; + double Score; ParsedUrl( string input_url ) { @@ -69,7 +78,12 @@ public: //char * domainBuffer = new char[ 20 ]; //get the domain: - for(int i = strlen(Host); Host[i] != Period; i--){ + char *i = Host; + for(; *i; i++){ + + if(*i == Period) + Domain = i; + } @@ -90,6 +104,8 @@ public: } else Host = Path = p; + + setScore(); } void printUrl() @@ -97,12 +113,32 @@ public: cout << "Complete URL: " << CompleteUrl << endl; cout << "Service: " << Service << endl; cout << "Host: " << Host << endl; + cout << "Domain: " << Domain << endl; cout << "Path: " << Path << endl; + cout << "Score: " << Score << endl; } - + void setScore(){ + double lengthOfUrl = strlen(CompleteUrl); + Score += 4 * 1/ log( lengthOfUrl ); + + if ( strcmp ( Domain , ORG ) ) + Score += 5; + else if ( strcmp ( Domain , EDU ) ) + Score += 4; + else if ( strcmp ( Domain , GOV ) ) + Score += 3; + else if ( strcmp ( Domain , COM ) ) + Score += 2; + else if ( strcmp ( Domain , NET ) ) + Score += 1; + else if ( strcmp ( Domain , INT ) ) + Score += 1; + else if ( strcmp ( Domain , MIL ) ) + Score += .5; + } ~ParsedUrl( ) { diff --git a/shared/urlTest.cpp b/shared/urlTest.cpp index 7fda4f8ed3deb028e7ecbac0632160956b44b683..feab256807611e3fb9fd00f05ff1359b14e594ba 100644 --- a/shared/urlTest.cpp +++ b/shared/urlTest.cpp @@ -22,20 +22,21 @@ int main(int argc, const char * argv[]) ParsedUrl fragmentTest = ParsedUrl("http://www.example.com/path/to/myfile.html?key1=value1&key2=value2#SomewhereInTheDocument"); - //fragmentTest.printUrl(); + fragmentTest.printUrl(); //assert( strcmp(fragmentTest.Service, "http")); //assert( strcmp(fragmentTest.Host, "example.com")); + ParsedUrl gov = ParsedUrl("http://www.goverment.gov/path/to/myfile.html"); + gov.printUrl(); + //ParsedUrl relativeURLTest = ParsedUrl("/wiki/List_of_sheep_breeds"); + //relativeURLTest.printUrl(); - ParsedUrl relativeURLTest = ParsedUrl("/wiki/List_of_sheep_breeds"); - relativeURLTest.printUrl(); + //ParsedUrl pointToFragment = ParsedUrl("#topOfPage"); - ParsedUrl pointToFragment = ParsedUrl("#topOfPage"); - - ParsedUrl mailToTest = ParsedUrl("mailto:someone@example.com?cc=someoneelse@example.com&bcc=andsomeoneelse@example.com\n" - "&subject=Summer%20Party&body=You%20are%20invited%20to%20a%20big%20summer%20party!\""); - mailToTest.printUrl(); - std::cout << "URL TEST PASSED" << std::endl; + //ParsedUrl mailToTest = ParsedUrl("mailto:someone@example.com?cc=someoneelse@example.com&bcc=andsomeoneelse@example.com\n" + // "&subject=Summer%20Party&body=You%20are%20invited%20to%20a%20big%20summer%20party!\""); + //mailToTest.printUrl(); + //std::cout << "URL TEST PASSED" << std::endl; } \ No newline at end of file