Skip to content
Snippets Groups Projects
main.cpp 1.88 KiB
Newer Older
benbergk's avatar
benbergk committed
//
//  main.cpp
//

benbergk's avatar
benbergk committed
#include <iostream>
#include <stdlib.h>
benbergk's avatar
benbergk committed
#include <unistd.h>
benbergk's avatar
benbergk committed
#include <pthread.h>
#include <queue>
#include "crawler/crawler.h"
#include <string>
jsclose's avatar
jsclose committed
//#include "crawler/CrawlerStatistics.h"
#include <unordered_map>
#include "util/util.h"

#define PATH_TO_BLACKLIST = '/bin/blacklist.txt'
#define PATH_TO_VISITED_URL = 'bin/urls.txt'
#define PATH_TO_HTML_DIR = 'bin/html/'
#define PATH_TO_INDEX = 'bin/index/wordIDX'
#define PATH_TO_DOC_INDEX = 'bin/index/docIDX'


using namespace std;

benbergk's avatar
benbergk committed

int main( int argc, const char *argv[] )
	{
	/*
	 *
	 * Settings Flags to control program on start up
	 * to be read in via command line with default settings
	 *
	 * string :  Mode : Getting content from the web vs local
	 *
	 * string : Seed : filename of list of starting urls
	 *
	 * int  : numberOfSpiders: # of spiders crawler spawns
	 *
	 * int  : numberOfParsers:  # of parsers  spawned
	 *
	 * bool : restoreFromLog: bool represeting if the program should load from saved state
	 */
jsclose's avatar
jsclose committed
	string mode = "local";
	// Seed urls?
	string seed;
	//
	int numberOfSpiders;
	int numberOfParsers;
	bool restoreFromLog;
	ProducerConsumerQueue < string > urlFrontier;
	cout << "Pushed File\n";
	char *seeds;
	if ( mode == "local" )
		seeds = util::getFileMap( "/tests/localSeed.txt" );
	else
		seeds = util::getFileMap( "/tests/webSeed.txt" );
	string testFile;
	while ( *seeds )
		{
		if ( *seeds == '\n')
			{
			urlFrontier.Push(testFile);
			testFile = "";
			}
		else
			testFile.push_back(*seeds);
		++seeds;
	}
	urlFrontier.Push(testFile);
//urlFrontier.Push("tests/store.html");
unordered_map < string, int > *docMapLookUp = new unordered_map < string, int >( );
Crawler crawler( mode, &urlFrontier );
jsclose's avatar
jsclose committed
crawler.SpawnSpiders(3 , docMapLookUp);
WaitOnAllSpiders();
//This part is a work in progress I was just trying to simulate the
// parser and see if they could open and read the file
benbergk's avatar
benbergk committed
}