Skip to content
Snippets Groups Projects
main.cpp 1.92 KiB
Newer Older
  • Learn to ignore specific revisions
  • benbergk's avatar
    benbergk committed
    //
    //  main.cpp
    //
    
    
    benbergk's avatar
    benbergk committed
    #include <iostream>
    #include <stdlib.h>
    
    benbergk's avatar
    benbergk committed
    #include <unistd.h>
    
    benbergk's avatar
    benbergk committed
    #include <pthread.h>
    #include <queue>
    
    #include "crawler/crawler.h"
    #include <string>
    
    jsclose's avatar
    jsclose committed
    //#include "crawler/CrawlerStatistics.h"
    
    #include <unordered_map>
    
    
    #define PATH_TO_BLACKLIST = '/bin/blacklist.txt'
    #define PATH_TO_VISITED_URL = 'bin/urls.txt'
    #define PATH_TO_HTML_DIR = 'bin/html/'
    #define PATH_TO_INDEX = 'bin/index/wordIDX'
    #define PATH_TO_DOC_INDEX = 'bin/index/docIDX'
    
    
    using namespace std;
    
    
    benbergk's avatar
    benbergk committed
    
    int main(int argc, const char * argv[])
    {
    
        /*
         *
         * Settings Flags to control program on start up
         * to be read in via command line with default settings
         *
         * string :  Mode : Getting content from the web vs local
         *
         * string : Seed : filename of list of starting urls
         *
         * int  : numberOfSpiders: # of spiders crawler spawns
         *
         * int  : numberOfParsers:  # of parsers  spawned
         *
         * bool : restoreFromLog: bool represeting if the program should load from saved state
         */
    
        //
        string mode = "local";
        // Seed urls?
        string seed;
        //
        int numberOfSpiders;
        int numberOfParsers;
        bool restoreFromLog;
    
    
    
    jsclose's avatar
    jsclose committed
        ProducerConsumerQueue<string> urlFrontier;
    
    benbergk's avatar
    benbergk committed
        ProducerConsumerQueue<int> fileQueue;
    
    benbergk's avatar
    benbergk committed
        cout << "Pushed File\n";
    
        urlFrontier.Push("tests/cats.html");
    
        urlFrontier.Push("tests/store.html");
    
    
       unordered_map<string, int>* docMapLookUp = new unordered_map<string, int>();
    
    jsclose's avatar
    jsclose committed
        Crawler crawler(mode, &urlFrontier);
    
        crawler.SpawnSpiders(1 , docMapLookUp);
    
    benbergk's avatar
    benbergk committed
        crawler.WaitOnAllSpiders();
    
    benbergk's avatar
    benbergk committed
    
    
        //This part is a work in progress I was just trying to simulate the
        // parser and see if they could open and read the file
    
        cout << "Done Waiting\nQueue Size is: " << fileQueue.Size();
       auto top = fileQueue.Pop();
        char buf[100];
        auto ret = read(top, buf, 100);
    
        cout << "read val: " << ret;
        for(int i = 0; i < 100; i++){
    
            cout << buf[i];
        }
    
    
    benbergk's avatar
    benbergk committed
    	
    }