Skip to content
Snippets Groups Projects
Commit b181bd3f authored by jsclose's avatar jsclose
Browse files

converted url frontier

parent bcc19b3b
Branches
No related tags found
No related merge requests found
No preview for this file type
......@@ -16,7 +16,7 @@ class Crawler
{
public:
Crawler( string mode_in, ProducerConsumerQueue < string > *url_q_in )
Crawler( string mode_in, ProducerConsumerQueue < ParsedUrl > *url_q_in )
: mode( mode_in ), urlFrontier( url_q_in )
{ };
......@@ -30,7 +30,7 @@ public:
private:
vector < Spider * > spiders;
ProducerConsumerQueue < string > *urlFrontier;
ProducerConsumerQueue < ParsedUrl > *urlFrontier;
//CrawlerStatistics housekeeper;
string mode;
......
......@@ -30,7 +30,7 @@ size_t Spider::hash(const char * s){
}
string Spider::getUrl()
ParsedUrl Spider::getUrl()
{
return urlFrontier->Pop( );
}
......@@ -46,9 +46,9 @@ void Spider::FuncToRun()
{
string stringUrl = getUrl( ); //get url from url frontier
// ParsedUrl stringUrl = getUrl( ); //get url from url frontier
char *fileMap;
ParsedUrl currentUrl = ParsedUrl(stringUrl);
ParsedUrl currentUrl = getUrl();
//url has not seen before or time since seen is past certain criteria
if ( shouldURLbeCrawled( currentUrl ))
{
......
......@@ -23,7 +23,7 @@ class Spider : public ThreadClass
public:
Spider( string mode_in, ProducerConsumerQueue < string > *url_q_in,
Spider( string mode_in, ProducerConsumerQueue < ParsedUrl > *url_q_in,
unordered_map < string, int > *doc_map_lookup_in )
: mode( mode_in ), urlFrontier( url_q_in ), docMapLookup( doc_map_lookup_in ), parser( url_q_in)
{
......@@ -31,7 +31,7 @@ public:
//Takes a url off of the url frontier
string getUrl();
ParsedUrl getUrl();
virtual void FuncToRun();
......@@ -51,7 +51,7 @@ public:
private:
int locationOnDisk;
ProducerConsumerQueue < string > *urlFrontier;
ProducerConsumerQueue < ParsedUrl > *urlFrontier;
string mode;
unordered_map < string, int > *docMapLookup;
Parser parser;
......
......@@ -89,7 +89,7 @@ int main( int argc, char *argv[] )
bool restoreFromLog;
ProducerConsumerQueue < string > urlFrontier;
ProducerConsumerQueue < ParsedUrl > urlFrontier;
cout << "Pushed File\n";
char *seeds;
......@@ -104,7 +104,8 @@ int main( int argc, char *argv[] )
if ( *seeds == '\n')
{
cout << "Pushing to Url Frontier..." << endl;
urlFrontier.Push(testFile);
ParsedUrl url = ParsedUrl(testFile);
urlFrontier.Push(url);
testFile = "";
}
......
......@@ -6,7 +6,7 @@
* Parser Cstor
* @param urlFrontierIn
*/
Parser::Parser ( ProducerConsumerQueue< string > *urlFrontierIn )
Parser::Parser ( ProducerConsumerQueue< ParsedUrl > *urlFrontierIn )
{
urlFrontier = urlFrontierIn;
}
......@@ -63,7 +63,10 @@ void Parser::parse ( string html, ParsedUrl currentUrl, Tokenizer *tokenizer )
completeUrl.assign( currentUrl.CompleteUrl );
url = completeUrl + url;
}
urlFrontier->Push( url );
ParsedUrl pUrl = ParsedUrl(url);
urlFrontier->Push( pUrl );
cout << url << endl;
}
// check if line is title
......
......@@ -26,7 +26,7 @@ public:
* Parser Cstor
* @param urlFrontierIn
*/
Parser ( ProducerConsumerQueue < string > * urlFrontierIn);
Parser ( ProducerConsumerQueue < ParsedUrl > * urlFrontierIn);
/**
......@@ -37,7 +37,7 @@ public:
private:
ProducerConsumerQueue < string >* urlFrontier;
ProducerConsumerQueue < ParsedUrl >* urlFrontier;
/**
* Parses file
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment