Skip to content
Snippets Groups Projects
Commit af2b73d2 authored by jsclose's avatar jsclose
Browse files

implemented score element to url based off of length of url and domain

parent f96179eb
No related branches found
No related tags found
No related merge requests found
......@@ -40,6 +40,8 @@ add_executable(ParserEndToEndTest
parser/tests/parserTest.cpp)
add_executable(URLTEST shared/url.h shared/urlTest.cpp)
find_package(OpenSSL REQUIRED)
......
URLTEST 0 → 100755
File added
......@@ -110,6 +110,19 @@ depend:
$(CMAKE_COMMAND) -H$(CMAKE_SOURCE_DIR) -B$(CMAKE_BINARY_DIR) --check-build-system CMakeFiles/Makefile.cmake 1
.PHONY : depend
#=============================================================================
# Target rules for targets named URLTEST
# Build rule for target.
URLTEST: cmake_check_build_system
$(MAKE) -f CMakeFiles/Makefile2 URLTEST
.PHONY : URLTEST
# fast build rule for target.
URLTEST/fast:
$(MAKE) -f CMakeFiles/URLTEST.dir/build.make CMakeFiles/URLTEST.dir/build
.PHONY : URLTEST/fast
#=============================================================================
# Target rules for targets named ParserEndToEndTest
......@@ -432,6 +445,33 @@ shared/ProducerConsumerQueue.cpp.s:
$(MAKE) -f CMakeFiles/search.dir/build.make CMakeFiles/search.dir/shared/ProducerConsumerQueue.cpp.s
.PHONY : shared/ProducerConsumerQueue.cpp.s
shared/urlTest.o: shared/urlTest.cpp.o
.PHONY : shared/urlTest.o
# target to build an object file
shared/urlTest.cpp.o:
$(MAKE) -f CMakeFiles/URLTEST.dir/build.make CMakeFiles/URLTEST.dir/shared/urlTest.cpp.o
.PHONY : shared/urlTest.cpp.o
shared/urlTest.i: shared/urlTest.cpp.i
.PHONY : shared/urlTest.i
# target to preprocess a source file
shared/urlTest.cpp.i:
$(MAKE) -f CMakeFiles/URLTEST.dir/build.make CMakeFiles/URLTEST.dir/shared/urlTest.cpp.i
.PHONY : shared/urlTest.cpp.i
shared/urlTest.s: shared/urlTest.cpp.s
.PHONY : shared/urlTest.s
# target to generate assembly for a file
shared/urlTest.cpp.s:
$(MAKE) -f CMakeFiles/URLTEST.dir/build.make CMakeFiles/URLTEST.dir/shared/urlTest.cpp.s
.PHONY : shared/urlTest.cpp.s
util/tests/stemmerTest.o: util/tests/stemmerTest.cpp.o
.PHONY : util/tests/stemmerTest.o
......@@ -547,6 +587,7 @@ help:
@echo "... clean"
@echo "... depend"
@echo "... edit_cache"
@echo "... URLTEST"
@echo "... ParserEndToEndTest"
@echo "... rebuild_cache"
@echo "... StemmerTest"
......@@ -579,6 +620,9 @@ help:
@echo "... shared/ProducerConsumerQueue.o"
@echo "... shared/ProducerConsumerQueue.i"
@echo "... shared/ProducerConsumerQueue.s"
@echo "... shared/urlTest.o"
@echo "... shared/urlTest.i"
@echo "... shared/urlTest.s"
@echo "... util/tests/stemmerTest.o"
@echo "... util/tests/stemmerTest.i"
@echo "... util/tests/stemmerTest.s"
......
......@@ -20,7 +20,9 @@ private:
public:
ProducerConsumerQueue() {}
void Push(T obj);
void Push(T obj);
T Pop();
size_t Size();
......
......@@ -7,10 +7,18 @@
#include <string>
#include <iostream>
#include "../util/util.h"
#include <math.h>
//#include "../crawler/SocketReader.h"
using namespace std;
#define GOV ".gov"
#define COM ".com"
#define EDU ".edu"
#define ORG ".org"
#define NET ".net"
#define MIL ".mil"
#define INT ".int"
......@@ -25,6 +33,7 @@ public:
*Host,
*Domain,
*Path;
double Score;
ParsedUrl( string input_url )
{
......@@ -69,7 +78,12 @@ public:
//char * domainBuffer = new char[ 20 ];
//get the domain:
for(int i = strlen(Host); Host[i] != Period; i--){
char *i = Host;
for(; *i; i++){
if(*i == Period)
Domain = i;
}
......@@ -90,6 +104,8 @@ public:
}
else
Host = Path = p;
setScore();
}
void printUrl()
......@@ -97,12 +113,32 @@ public:
cout << "Complete URL: " << CompleteUrl << endl;
cout << "Service: " << Service << endl;
cout << "Host: " << Host << endl;
cout << "Domain: " << Domain << endl;
cout << "Path: " << Path << endl;
cout << "Score: " << Score << endl;
}
void setScore(){
double lengthOfUrl = strlen(CompleteUrl);
Score += 4 * 1/ log( lengthOfUrl );
if ( strcmp ( Domain , ORG ) )
Score += 5;
else if ( strcmp ( Domain , EDU ) )
Score += 4;
else if ( strcmp ( Domain , GOV ) )
Score += 3;
else if ( strcmp ( Domain , COM ) )
Score += 2;
else if ( strcmp ( Domain , NET ) )
Score += 1;
else if ( strcmp ( Domain , INT ) )
Score += 1;
else if ( strcmp ( Domain , MIL ) )
Score += .5;
}
~ParsedUrl( )
{
......
......@@ -22,20 +22,21 @@ int main(int argc, const char * argv[])
ParsedUrl fragmentTest = ParsedUrl("http://www.example.com/path/to/myfile.html?key1=value1&key2=value2#SomewhereInTheDocument");
//fragmentTest.printUrl();
fragmentTest.printUrl();
//assert( strcmp(fragmentTest.Service, "http"));
//assert( strcmp(fragmentTest.Host, "example.com"));
ParsedUrl gov = ParsedUrl("http://www.goverment.gov/path/to/myfile.html");
gov.printUrl();
//ParsedUrl relativeURLTest = ParsedUrl("/wiki/List_of_sheep_breeds");
//relativeURLTest.printUrl();
ParsedUrl relativeURLTest = ParsedUrl("/wiki/List_of_sheep_breeds");
relativeURLTest.printUrl();
//ParsedUrl pointToFragment = ParsedUrl("#topOfPage");
ParsedUrl pointToFragment = ParsedUrl("#topOfPage");
ParsedUrl mailToTest = ParsedUrl("mailto:someone@example.com?cc=someoneelse@example.com&bcc=andsomeoneelse@example.com\n"
"&subject=Summer%20Party&body=You%20are%20invited%20to%20a%20big%20summer%20party!\"");
mailToTest.printUrl();
std::cout << "URL TEST PASSED" << std::endl;
//ParsedUrl mailToTest = ParsedUrl("mailto:someone@example.com?cc=someoneelse@example.com&bcc=andsomeoneelse@example.com\n"
// "&subject=Summer%20Party&body=You%20are%20invited%20to%20a%20big%20summer%20party!\"");
//mailToTest.printUrl();
//std::cout << "URL TEST PASSED" << std::endl;
}
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment