Skip to content
Snippets Groups Projects
Commit 24f56373 authored by jsclose's avatar jsclose
Browse files

modifying queue rate

parent 70e42436
No related branches found
No related tags found
No related merge requests found
No preview for this file type
......@@ -32,11 +32,11 @@ void UrlFrontier::checkUrl(ParsedUrl* url)
{
//get the last time it was seen and find the time difference
time_t lastSeen = this->domainMap->at( url->getHost( ));
difference = difftime( now ,lastSeen);
difference = difftime( now , lastSeen);
if(difference == 0)
difference = 5;
difference = 5 ;
else
difference = 1/difference;
difference = difference/10;
url->updateScore( difference );
}
......@@ -45,7 +45,7 @@ void UrlFrontier::checkUrl(ParsedUrl* url)
//add url to the duplicate url map
this->duplicateUrlMap->insert( std::make_pair( url->getCompleteUrl( ), 1 ));
this->duplicateUrlMap->insert( url->getCompleteUrl( ) );
return;
}
}
......
......@@ -7,6 +7,7 @@
#include "../shared/url.h"
#include <time.h>
#include <unordered_map>
#include <set>
using namespace std;
......@@ -40,7 +41,7 @@ class UrlFrontier
private:
unordered_map< string , bool > *duplicateUrlMap = new unordered_map< string, bool >( );
set< string > *duplicateUrlMap = new set< string>( );
unordered_map< string , time_t > *domainMap = new unordered_map< string, time_t >( );
};
......
2018-03-29.11:05:59
\ No newline at end of file
......@@ -63,9 +63,9 @@ int main ( int argc, char *argv[] )
string bad_url = "http-equiv=X-UA-Compatiblecontent=IE=edge,chrome=1";
string bad_url2 ="http-equiv=Content-Type";
string bad_url3 = "\"http-equiv=\\\"refresh\\\" content=\\\"1;url=/2.73.0/static/unsupp.html\\\" /><![endif]--><!--[if gt IE 9><!--><!--<![endif]--><title>White House says Trump continues to deny Stormy Daniels affair - CNNPolitics</title>\";
string bad_url3 = "http-equiv=refresh content=1;url=/2.73.0/static/unsupp.html /><![endif]--><!--[if gt IE 9><!--><!--<![endif]--><title>White House says Trump continues to deny Stormy Daniels affair - CNNPolitics</title>";
//ParsedUrl url = ParsedUrl(bad_url);
ParsedUrl url1 = ParsedUrl(bad_url);
ParsedUrl url1 = ParsedUrl(bad_url3);
ParsedUrl url2 = ParsedUrl(bad_url2);
urlFrontier->Push(&url1);
......
File added
......@@ -176,7 +176,7 @@ int main ( int argc, char *argv[] )
crawler->SpawnSpiders( numberOfSpiders );
HouseKeeper logger( crawler );
logger.StartThread( );
//logger.StartThread( );
string input;
while(true)
......
......@@ -90,7 +90,7 @@ public:
//char * domainBuffer = new char[ 20 ];
//get the domain:
char *i = temp_Host;
//temp_Domain = null;
temp_Domain = nullptr;
if(i)
{
for ( ; *i; i++ )
......@@ -116,7 +116,7 @@ public:
CompleteUrl = string(temp_CompleteUrl, strlen(temp_CompleteUrl));
Service = string(temp_Service, strlen(temp_Service));
Host = string(temp_Host, strlen(temp_Host));
if( sizeof( temp_Domain) > 0 )
if( temp_Domain != nullptr )
Domain = string(temp_Domain, strlen(temp_Domain));
Path = string(temp_Path, strlen(temp_Path));
......@@ -211,7 +211,7 @@ public:
void updateScore( double time )
{
Score += time;
Score += 3 * time;
}
std::string getAnchorText ( )
......
https://www.boston.com/cars/new-car-deals?s_campaign=bg:hp:well:cars
http://www.fastcompany.com/
http://www.bbc.com/
https://www.eecs.umich.edu/
https://www.nytimes.com/
http://www.bostonglobe.com/
https://www.huffingtonpost.com/2015/01/14/strangest-wikipedia-entries_n_6463488.html
https://www.wired.com/
http://www.espn.com/
http://fivethirtyeight.com/
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment