Newer
Older
#include "crawler/crawler.h"
#include <string>
#define PATH_TO_BLACKLIST = '/bin/blacklist.txt'
#define PATH_TO_VISITED_URL = 'bin/urls.txt'
#define PATH_TO_HTML_DIR = 'bin/html/'
#define PATH_TO_INDEX = 'bin/index/wordIDX'
#define PATH_TO_DOC_INDEX = 'bin/index/docIDX'
using namespace std;
/*
*
* Settings Flags to control program on start up
* to be read in via command line with default settings
*
* string : Mode : Getting content from the web vs local
*
* string : Seed : filename of list of starting urls
*
* int : numberOfSpiders: # of spiders crawler spawns
*
* int : numberOfParsers: # of parsers spawned
*
* bool : restoreFromLog: bool represeting if the program should load from saved state
*/
//
string mode = "local";
// Seed urls?
string seed;
//
int numberOfSpiders;
int numberOfParsers;
bool restoreFromLog;
unordered_map<string, int>* docMapLookUp = new unordered_map<string, int>();
crawler.SpawnSpiders(1 , docMapLookUp);
//This part is a work in progress I was just trying to simulate the
// parser and see if they could open and read the file
cout << "Done Waiting\nQueue Size is: " << fileQueue.Size();
auto top = fileQueue.Pop();
char buf[100];
auto ret = read(top, buf, 100);
cout << "read val: " << ret;
for(int i = 0; i < 100; i++){
cout << buf[i];
}