Skip to content
Snippets Groups Projects
Commit af2b73d2 authored by jsclose's avatar jsclose
Browse files

implemented score element to url based off of length of url and domain

parent f96179eb
No related branches found
No related tags found
No related merge requests found
...@@ -40,6 +40,8 @@ add_executable(ParserEndToEndTest ...@@ -40,6 +40,8 @@ add_executable(ParserEndToEndTest
parser/tests/parserTest.cpp) parser/tests/parserTest.cpp)
add_executable(URLTEST shared/url.h shared/urlTest.cpp)
find_package(OpenSSL REQUIRED) find_package(OpenSSL REQUIRED)
......
URLTEST 0 → 100755
File added
...@@ -110,6 +110,19 @@ depend: ...@@ -110,6 +110,19 @@ depend:
$(CMAKE_COMMAND) -H$(CMAKE_SOURCE_DIR) -B$(CMAKE_BINARY_DIR) --check-build-system CMakeFiles/Makefile.cmake 1 $(CMAKE_COMMAND) -H$(CMAKE_SOURCE_DIR) -B$(CMAKE_BINARY_DIR) --check-build-system CMakeFiles/Makefile.cmake 1
.PHONY : depend .PHONY : depend
#=============================================================================
# Target rules for targets named URLTEST
# Build rule for target.
URLTEST: cmake_check_build_system
$(MAKE) -f CMakeFiles/Makefile2 URLTEST
.PHONY : URLTEST
# fast build rule for target.
URLTEST/fast:
$(MAKE) -f CMakeFiles/URLTEST.dir/build.make CMakeFiles/URLTEST.dir/build
.PHONY : URLTEST/fast
#============================================================================= #=============================================================================
# Target rules for targets named ParserEndToEndTest # Target rules for targets named ParserEndToEndTest
...@@ -432,6 +445,33 @@ shared/ProducerConsumerQueue.cpp.s: ...@@ -432,6 +445,33 @@ shared/ProducerConsumerQueue.cpp.s:
$(MAKE) -f CMakeFiles/search.dir/build.make CMakeFiles/search.dir/shared/ProducerConsumerQueue.cpp.s $(MAKE) -f CMakeFiles/search.dir/build.make CMakeFiles/search.dir/shared/ProducerConsumerQueue.cpp.s
.PHONY : shared/ProducerConsumerQueue.cpp.s .PHONY : shared/ProducerConsumerQueue.cpp.s
shared/urlTest.o: shared/urlTest.cpp.o
.PHONY : shared/urlTest.o
# target to build an object file
shared/urlTest.cpp.o:
$(MAKE) -f CMakeFiles/URLTEST.dir/build.make CMakeFiles/URLTEST.dir/shared/urlTest.cpp.o
.PHONY : shared/urlTest.cpp.o
shared/urlTest.i: shared/urlTest.cpp.i
.PHONY : shared/urlTest.i
# target to preprocess a source file
shared/urlTest.cpp.i:
$(MAKE) -f CMakeFiles/URLTEST.dir/build.make CMakeFiles/URLTEST.dir/shared/urlTest.cpp.i
.PHONY : shared/urlTest.cpp.i
shared/urlTest.s: shared/urlTest.cpp.s
.PHONY : shared/urlTest.s
# target to generate assembly for a file
shared/urlTest.cpp.s:
$(MAKE) -f CMakeFiles/URLTEST.dir/build.make CMakeFiles/URLTEST.dir/shared/urlTest.cpp.s
.PHONY : shared/urlTest.cpp.s
util/tests/stemmerTest.o: util/tests/stemmerTest.cpp.o util/tests/stemmerTest.o: util/tests/stemmerTest.cpp.o
.PHONY : util/tests/stemmerTest.o .PHONY : util/tests/stemmerTest.o
...@@ -547,6 +587,7 @@ help: ...@@ -547,6 +587,7 @@ help:
@echo "... clean" @echo "... clean"
@echo "... depend" @echo "... depend"
@echo "... edit_cache" @echo "... edit_cache"
@echo "... URLTEST"
@echo "... ParserEndToEndTest" @echo "... ParserEndToEndTest"
@echo "... rebuild_cache" @echo "... rebuild_cache"
@echo "... StemmerTest" @echo "... StemmerTest"
...@@ -579,6 +620,9 @@ help: ...@@ -579,6 +620,9 @@ help:
@echo "... shared/ProducerConsumerQueue.o" @echo "... shared/ProducerConsumerQueue.o"
@echo "... shared/ProducerConsumerQueue.i" @echo "... shared/ProducerConsumerQueue.i"
@echo "... shared/ProducerConsumerQueue.s" @echo "... shared/ProducerConsumerQueue.s"
@echo "... shared/urlTest.o"
@echo "... shared/urlTest.i"
@echo "... shared/urlTest.s"
@echo "... util/tests/stemmerTest.o" @echo "... util/tests/stemmerTest.o"
@echo "... util/tests/stemmerTest.i" @echo "... util/tests/stemmerTest.i"
@echo "... util/tests/stemmerTest.s" @echo "... util/tests/stemmerTest.s"
......
...@@ -20,7 +20,9 @@ private: ...@@ -20,7 +20,9 @@ private:
public: public:
ProducerConsumerQueue() {} ProducerConsumerQueue() {}
void Push(T obj);
void Push(T obj);
T Pop(); T Pop();
size_t Size(); size_t Size();
......
...@@ -7,10 +7,18 @@ ...@@ -7,10 +7,18 @@
#include <string> #include <string>
#include <iostream> #include <iostream>
#include "../util/util.h" #include "../util/util.h"
#include <math.h>
//#include "../crawler/SocketReader.h" //#include "../crawler/SocketReader.h"
using namespace std; using namespace std;
#define GOV ".gov"
#define COM ".com"
#define EDU ".edu"
#define ORG ".org"
#define NET ".net"
#define MIL ".mil"
#define INT ".int"
...@@ -25,6 +33,7 @@ public: ...@@ -25,6 +33,7 @@ public:
*Host, *Host,
*Domain, *Domain,
*Path; *Path;
double Score;
ParsedUrl( string input_url ) ParsedUrl( string input_url )
{ {
...@@ -69,7 +78,12 @@ public: ...@@ -69,7 +78,12 @@ public:
//char * domainBuffer = new char[ 20 ]; //char * domainBuffer = new char[ 20 ];
//get the domain: //get the domain:
for(int i = strlen(Host); Host[i] != Period; i--){ char *i = Host;
for(; *i; i++){
if(*i == Period)
Domain = i;
} }
...@@ -90,6 +104,8 @@ public: ...@@ -90,6 +104,8 @@ public:
} }
else else
Host = Path = p; Host = Path = p;
setScore();
} }
void printUrl() void printUrl()
...@@ -97,12 +113,32 @@ public: ...@@ -97,12 +113,32 @@ public:
cout << "Complete URL: " << CompleteUrl << endl; cout << "Complete URL: " << CompleteUrl << endl;
cout << "Service: " << Service << endl; cout << "Service: " << Service << endl;
cout << "Host: " << Host << endl; cout << "Host: " << Host << endl;
cout << "Domain: " << Domain << endl;
cout << "Path: " << Path << endl; cout << "Path: " << Path << endl;
cout << "Score: " << Score << endl;
} }
void setScore(){
double lengthOfUrl = strlen(CompleteUrl);
Score += 4 * 1/ log( lengthOfUrl );
if ( strcmp ( Domain , ORG ) )
Score += 5;
else if ( strcmp ( Domain , EDU ) )
Score += 4;
else if ( strcmp ( Domain , GOV ) )
Score += 3;
else if ( strcmp ( Domain , COM ) )
Score += 2;
else if ( strcmp ( Domain , NET ) )
Score += 1;
else if ( strcmp ( Domain , INT ) )
Score += 1;
else if ( strcmp ( Domain , MIL ) )
Score += .5;
}
~ParsedUrl( ) ~ParsedUrl( )
{ {
......
...@@ -22,20 +22,21 @@ int main(int argc, const char * argv[]) ...@@ -22,20 +22,21 @@ int main(int argc, const char * argv[])
ParsedUrl fragmentTest = ParsedUrl("http://www.example.com/path/to/myfile.html?key1=value1&key2=value2#SomewhereInTheDocument"); ParsedUrl fragmentTest = ParsedUrl("http://www.example.com/path/to/myfile.html?key1=value1&key2=value2#SomewhereInTheDocument");
//fragmentTest.printUrl(); fragmentTest.printUrl();
//assert( strcmp(fragmentTest.Service, "http")); //assert( strcmp(fragmentTest.Service, "http"));
//assert( strcmp(fragmentTest.Host, "example.com")); //assert( strcmp(fragmentTest.Host, "example.com"));
ParsedUrl gov = ParsedUrl("http://www.goverment.gov/path/to/myfile.html");
gov.printUrl();
//ParsedUrl relativeURLTest = ParsedUrl("/wiki/List_of_sheep_breeds");
//relativeURLTest.printUrl();
ParsedUrl relativeURLTest = ParsedUrl("/wiki/List_of_sheep_breeds");
relativeURLTest.printUrl();
//ParsedUrl pointToFragment = ParsedUrl("#topOfPage");
ParsedUrl pointToFragment = ParsedUrl("#topOfPage"); //ParsedUrl mailToTest = ParsedUrl("mailto:someone@example.com?cc=someoneelse@example.com&bcc=andsomeoneelse@example.com\n"
// "&subject=Summer%20Party&body=You%20are%20invited%20to%20a%20big%20summer%20party!\"");
ParsedUrl mailToTest = ParsedUrl("mailto:someone@example.com?cc=someoneelse@example.com&bcc=andsomeoneelse@example.com\n" //mailToTest.printUrl();
"&subject=Summer%20Party&body=You%20are%20invited%20to%20a%20big%20summer%20party!\""); //std::cout << "URL TEST PASSED" << std::endl;
mailToTest.printUrl();
std::cout << "URL TEST PASSED" << std::endl;
} }
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment