Newer
Older
#define PATH_TO_BLACKLIST = '/bin/blacklist.txt'
#define PATH_TO_VISITED_URL = 'bin/urls.txt'
#define PATH_TO_HTML_DIR = 'bin/html/'
#define PATH_TO_INDEX = 'bin/index/wordIDX'
#define PATH_TO_DOC_INDEX = 'bin/index/docIDX'
using DocIndex = const unordered_map< string, vector< unsigned long > >;
{
/*
*
* Settings Flags to control program on start up
* to be read in via command line with default settings
*
* string : Mode : Getting content from the web vs local
*
* string : Seed : filename of list of starting urls
*
* int : numberOfSpiders: # of spiders crawler spawns
*
* int : numberOfParsers: # of parsers spawned
*
* bool : restoreFromLog: bool represeting if the program should load from saved state
*/
jsclose
committed
int numberOfSpiders = 1;
opterr = true;
int choice;
int option_index = 0;
option long_options[] = {
{ "mode", optional_argument, nullptr, 'm' },
{ "num_crawlers", optional_argument, nullptr, 'c' }
while ( ( choice = getopt_long( argc, argv, "m:c:", long_options, &option_index ) ) != -1 )
{
switch ( choice )
{
numberOfSpiders = atoi( optarg );
if ( numberOfSpiders > 100 )
{
break;
default:
cerr << "Unknown input option";
unordered_map< size_t, int > *duplicateUrlMap = new unordered_map< size_t, int >( );
ProducerConsumerQueue< ParsedUrl * > *urlFrontier = new ProducerConsumerQueue< ParsedUrl * >( );
ProducerConsumerQueue< DocIndex * > *IndexerQueue = new ProducerConsumerQueue< DocIndex * >( );
if ( mode == "local" )
seeds = util::getFileMap( "/tests/localSeed.txt" );
else
{
seeds = util::getFileMap( "/tests/webSeed.txt" );
cout << "Pushing: " << testFile << " to queue\n";
cout << "Pushing: " << testFile << " to queue\n";
crawler.SpawnSpiders( numberOfSpiders, duplicateUrlMap );
string aa;
cin >> aa;
if(aa == "q") {
return 0;
}
int x = 0;
delete urlFrontier;