Skip to content
Snippets Groups Projects
main.cpp 4.01 KiB
Newer Older
  • Learn to ignore specific revisions
  • benbergk's avatar
    benbergk committed
    //
    //  main.cpp
    //
    
    
    benbergk's avatar
    benbergk committed
    #include <iostream>
    #include <stdlib.h>
    
    benbergk's avatar
    benbergk committed
    #include <unistd.h>
    
    benbergk's avatar
    benbergk committed
    #include <pthread.h>
    #include <queue>
    
    #include "crawler/crawler.h"
    
    #include <openssl/ssl.h>
    
    #include <string>
    
    jsclose's avatar
    jsclose committed
    //#include "crawler/CrawlerStatistics.h"
    
    #include <unordered_map>
    
    #include "util/util.h"
    
    benbergk's avatar
    benbergk committed
    #include <getopt.h>
    
    #include "indexer/Indexer.h"
    
    jsclose's avatar
    jsclose committed
    #include "crawler/UrlFrontier.h"
    
    #include <csignal>
    #include <iostream>
    #include <chrono>
    #include <future>
    #include <ctime>
    
    #include "crawler/HouseKeeper.h"
    
    using DocIndex = const unordered_map< string, vector< unsigned long > >;
    
    
    using namespace std;
    
    
    string wait_for_user_input()
    	{
    	std::string answer;
    	std::cin >> answer;
    	return answer; ;
    	}
    
    
    
    void signalHandler( int signum ) {
    	cout << "Interrupt signal (" << signum << ") received.\n";
    	cout << "Ending the Index build" << endl;
    	// cleanup and close up stuff here
    	// terminate program
    
    	exit(signum);
    	}
    
    
    benbergk's avatar
    benbergk committed
    
    
    vcday's avatar
    vcday committed
    int main ( int argc, char *argv[] )
    
    	{
    	/*
    	 *
    	 * Settings Flags to control program on start up
    	 * to be read in via command line with default settings
    	 *
    	 * string :  Mode : Getting content from the web vs local
    	 *
    	 * string : Seed : filename of list of starting urls
    	 *
    	 * int  : numberOfSpiders: # of spiders crawler spawns
    	 *
    	 * int  : numberOfParsers:  # of parsers  spawned
    	 *
    	 * bool : restoreFromLog: bool represeting if the program should load from saved state
    	 */
    
    jsclose's avatar
    jsclose committed
    	string mode = "web";
    
    jsclose's avatar
    jsclose committed
    	bool restart = false;
    
    benbergk's avatar
    benbergk committed
    
    	opterr = true;
    	int choice;
    	int option_index = 0;
    	option long_options[] = {
    
    vcday's avatar
    vcday committed
    			{ "mode",         optional_argument, nullptr, 'm' },
    
    jsclose's avatar
    jsclose committed
    			{ "num_crawlers", optional_argument, nullptr, 'c' },
    			{ "from_restart", optional_argument, nullptr, 'r' }
    
    jsclose's avatar
    jsclose committed
    	while ( ( choice = getopt_long( argc, argv, "m:c:r:", long_options, &option_index ) ) != -1 )
    
    vcday's avatar
    vcday committed
    		{
    		switch ( choice )
    			{
    
    benbergk's avatar
    benbergk committed
    			case 'm':
    
    				mode = optarg;
    
    vcday's avatar
    vcday committed
    				if ( mode != "web" && mode != "local" )
    					{
    
    benbergk's avatar
    benbergk committed
    					cerr << "Unknown input option";
    
    vcday's avatar
    vcday committed
    					exit( 1 );
    					}
    
    benbergk's avatar
    benbergk committed
    				break;
    
    			case 'c':
    
    
    vcday's avatar
    vcday committed
    				numberOfSpiders = atoi( optarg );
    				if ( numberOfSpiders > 100 )
    					{
    
    benbergk's avatar
    benbergk committed
    					cerr << "Too many crawlers!";
    
    vcday's avatar
    vcday committed
    					exit( 1 );
    					}
    
    benbergk's avatar
    benbergk committed
    				break;
    
    jsclose's avatar
    jsclose committed
    			case 'r':
    
    				restart = true;
    				break;
    
    benbergk's avatar
    benbergk committed
    
    			default:
    				cerr << "Unknown input option";
    
    vcday's avatar
    vcday committed
    				exit( 1 );
    
    vcday's avatar
    vcday committed
    			}
    
    benbergk's avatar
    benbergk committed
    		}
    
    	bool restoreFromLog;
    
    jsclose's avatar
    jsclose committed
    	UrlFrontier *urlFrontier = new UrlFrontier();
    
    vcday's avatar
    vcday committed
    	ProducerConsumerQueue< DocIndex * > *IndexerQueue = new ProducerConsumerQueue< DocIndex * >( );
    
    vcday's avatar
    vcday committed
    	if ( mode == "local" )
    		seeds = util::getFileMap( "/tests/localSeed.txt" );
    	else
    		{
    		seeds = util::getFileMap( "/tests/webSeed.txt" );
    
    		SSL_library_init( );
    
    vcday's avatar
    vcday committed
    		}
    
    jsclose's avatar
    jsclose committed
    	if(restart == false)
    
    vcday's avatar
    vcday committed
    		{
    
    jsclose's avatar
    jsclose committed
    		string testFile;
    		while ( *seeds )
    			{
    			if ( *seeds == '\n' )
    				{
    
    				ParsedUrl * url = new ParsedUrl( testFile );
    				cout << "Pushing: " << testFile << " to queue\n";
    				urlFrontier->Push( url );
    				testFile = "";
    				}
    			else
    				testFile.push_back( *seeds );
    			++seeds;
    			}
    		if ( testFile != "" )
    
    vcday's avatar
    vcday committed
    			{
    
    			cout << "Pushing: " << testFile << " to queue\n";
    
    jsclose's avatar
    jsclose committed
    			ParsedUrl * url = new ParsedUrl( testFile );
    
    vcday's avatar
    vcday committed
    			urlFrontier->Push( url );
    			}
    		}
    
    jsclose's avatar
    jsclose committed
    	//else
    		//urlFrontier->ReadDataFromDisk();
    
    
    
    
    
    
    
    vcday's avatar
    vcday committed
    	Indexer indexer( IndexerQueue );
    	indexer.StartThread( );
    
    	Crawler *crawler = new Crawler( mode, urlFrontier, IndexerQueue );
    
    	atomic_bool *alive = new atomic_bool(true); // At the beginning of the program
    
    	crawler->SpawnSpiders( numberOfSpiders , alive);
    
    	HouseKeeper logger( crawler );
    
    jsclose's avatar
    jsclose committed
    	//logger.StartThread( );
    
    	string input;
    	while(true)
    		{
    		cout << "press enter to quit\n" << std::endl ;
    		//getline (cin, input);
    		cin >> input;
    		if(input == "q")
    			{
    
    			cout << "Shutting down the indexer  " << endl ;
    
    			crawler->KillAllSpiders();
    			crawler->WaitOnAllSpiders( );
    
    			indexer.Kill();
    			indexer.WaitForFinish( );
    
    			urlFrontier->writeDataToDisk();
    
    			delete urlFrontier;
    			delete IndexerQueue;
    
    			cout << "Indexer has finished running " << endl;
    			return 0;
    
    			}
    
    		}
    
    
    
    	//main threads is just reading command
    	//if it wants work, has to spawn thread to do it
    	//thread we spawn, periodically pulls should
    
    vcday's avatar
    vcday committed
    	}