Skip to content
Snippets Groups Projects
Commit 9a1995c4 authored by jsclose's avatar jsclose
Browse files

shutdown mechanism for the crawler + indexer with atomic bool working

parent 24f56373
No related branches found
No related tags found
No related merge requests found
No preview for this file type
......@@ -12,11 +12,11 @@
*/
void Crawler::SpawnSpiders ( size_t num_spiders )
void Crawler::SpawnSpiders ( size_t num_spiders , atomic_bool * alive)
{
for ( size_t i = 0; i < num_spiders; i++ )
{
Spider *temp = new Spider( this->mode, this->urlFrontier, this->IndexerQueue );
Spider *temp = new Spider( this->mode, this->urlFrontier, this->IndexerQueue , alive);
temp->StartThread( );
this->spiders.push_back( temp );
}
......
......@@ -27,7 +27,7 @@ public:
{ };
//spawns a number of works
void SpawnSpiders ( size_t num_spiders );
void SpawnSpiders ( size_t num_spiders, atomic_bool* alive );
//Creates a housekeeping thread
void houseKeeper ( );
......
This diff is collapsed.
......@@ -82,7 +82,7 @@ void Spider::run ( )
std::cout << "Spider is crawling" << endl;
int cond = 0;
while (alive)
while (*alive)
{
ParsedUrl * currentUrl = getUrl( );
size_t docID = hash( currentUrl->getCompleteUrl().c_str() );
......@@ -118,7 +118,7 @@ void Spider::run ( )
void Spider::kill()
{
this->alive = false;
*(this->alive) = false;
}
......
......@@ -23,12 +23,14 @@ public:
Spider ( string mode_in,
UrlFrontier *url_q_in,
ProducerConsumerQueue< DocIndex * > *doc_index_queue_in
ProducerConsumerQueue< DocIndex * > *doc_index_queue_in,
atomic_bool * bool_in
)
: mode( mode_in ),
urlFrontier( url_q_in ),
parser( url_q_in ),
IndexerQueue( doc_index_queue_in )
IndexerQueue( doc_index_queue_in ),
alive( bool_in )
{
};
......@@ -58,6 +60,6 @@ private:
ProducerConsumerQueue< DocIndex * > *IndexerQueue;
string mode;
Parser parser;
bool alive = true;
atomic_bool* alive;
};
\ No newline at end of file
......@@ -65,11 +65,11 @@ int main ( int argc, char *argv[] )
string bad_url2 ="http-equiv=Content-Type";
string bad_url3 = "http-equiv=refresh content=1;url=/2.73.0/static/unsupp.html /><![endif]--><!--[if gt IE 9><!--><!--<![endif]--><title>White House says Trump continues to deny Stormy Daniels affair - CNNPolitics</title>";
//ParsedUrl url = ParsedUrl(bad_url);
ParsedUrl url1 = ParsedUrl(bad_url3);
ParsedUrl url2 = ParsedUrl(bad_url2);
urlFrontier->Push(&url1);
ParsedUrl * url1 = new ParsedUrl(bad_url3);
ParsedUrl * url2 = new ParsedUrl(bad_url2);
urlFrontier->Push(url1);
urlFrontier->Push(&url2);
urlFrontier->Push(url2);
indexer.StartThread( );
Crawler crawler( mode, urlFrontier, IndexerQueue );
......
No preview for this file type
......@@ -172,8 +172,9 @@ int main ( int argc, char *argv[] )
indexer.StartThread( );
Crawler *crawler = new Crawler( mode, urlFrontier, IndexerQueue );
atomic_bool *alive = new atomic_bool(true); // At the beginning of the program
crawler->SpawnSpiders( numberOfSpiders );
crawler->SpawnSpiders( numberOfSpiders , alive);
HouseKeeper logger( crawler );
//logger.StartThread( );
......@@ -206,6 +207,9 @@ int main ( int argc, char *argv[] )
}
//main threads is just reading command
//if it wants work, has to spawn thread to do it
//thread we spawn, periodically pulls should
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment