Newer
Older
#include <csignal>
#include <iostream>
#include <chrono>
#include <future>
#include <ctime>
#include "crawler/HouseKeeper.h"
using DocIndex = const unordered_map< string, vector< unsigned long > >;
string wait_for_user_input()
{
std::string answer;
std::cin >> answer;
return answer; ;
}
void signalHandler( int signum ) {
cout << "Interrupt signal (" << signum << ") received.\n";
cout << "Ending the Index build" << endl;
// cleanup and close up stuff here
// terminate program
exit(signum);
}
{
/*
*
* Settings Flags to control program on start up
* to be read in via command line with default settings
*
* string : Mode : Getting content from the web vs local
*
* string : Seed : filename of list of starting urls
*
* int : numberOfSpiders: # of spiders crawler spawns
*
* int : numberOfParsers: # of parsers spawned
*
* bool : restoreFromLog: bool represeting if the program should load from saved state
*/
jsclose
committed
int numberOfSpiders = 1;
opterr = true;
int choice;
int option_index = 0;
option long_options[] = {
{ "num_crawlers", optional_argument, nullptr, 'c' },
{ "from_restart", optional_argument, nullptr, 'r' }
while ( ( choice = getopt_long( argc, argv, "m:c:r:", long_options, &option_index ) ) != -1 )
numberOfSpiders = atoi( optarg );
if ( numberOfSpiders > 100 )
{
ProducerConsumerQueue< DocIndex * > *IndexerQueue = new ProducerConsumerQueue< DocIndex * >( );
if ( mode == "local" )
seeds = util::getFileMap( "/tests/localSeed.txt" );
else
{
seeds = util::getFileMap( "/tests/webSeed.txt" );
string testFile;
while ( *seeds )
{
if ( *seeds == '\n' )
{
ParsedUrl * url = new ParsedUrl( testFile );
cout << "Pushing: " << testFile << " to queue\n";
urlFrontier->Push( url );
testFile = "";
}
else
testFile.push_back( *seeds );
++seeds;
}
if ( testFile != "" )
cout << "Pushing: " << testFile << " to queue\n";
Crawler *crawler = new Crawler( mode, urlFrontier, IndexerQueue );
atomic_bool *alive = new atomic_bool(true);
crawler->SpawnSpiders( numberOfSpiders , alive);
HouseKeeper logger( crawler );
string input;
while(true)
{
cout << "press enter to quit\n" << std::endl ;
//getline (cin, input);
cin >> input;
if(input == "q")
{
cout << "Shutting down the indexer " << endl ;
crawler->KillAllSpiders();
crawler->WaitOnAllSpiders( );
indexer.Kill();
indexer.WaitForFinish( );
urlFrontier->writeDataToDisk();
delete urlFrontier;
delete IndexerQueue;
cout << "Indexer has finished running " << endl;
return 0;
}
}