Skip to content
Snippets Groups Projects
crawlerTest.cpp 1.84 KiB
Newer Older
//
// Created by Jake Close on 3/21/18.
//

#include "../crawler.h"
#include "../spider.h"
#include <iostream>
#include <stdlib.h>
#include <unistd.h>
#include <pthread.h>
#include <queue>
#include "../../crawler/crawler.h"
#include <openssl/ssl.h>
#include <string>
#include <unordered_map>
#include "../../util/util.h"
#include <getopt.h>
#include "../../indexer/Indexer.h"



using DocIndex = const unordered_map< string, vector< unsigned long > >;

using namespace std;


int main ( int argc, char *argv[] )
	{

	int numberOfSpiders = 1;
	unordered_map < size_t, int > *duplicateUrlMap = new unordered_map < size_t, int >( );
jsclose's avatar
jsclose committed
	UrlFrontier *urlFrontier = new UrlFrontier( );
	ProducerConsumerQueue < DocIndex * > *IndexerQueue = new ProducerConsumerQueue < DocIndex * >( );
	Indexer indexer( IndexerQueue );
jsclose's avatar
jsclose committed
	string path = util::GetCurrentWorkingDir() +"/crawler/tests/testSeeds.txt";
	/*
	seeds =  util::getFileMap( path );



	string testFile;
	while ( *seeds )
		{
		if ( *seeds == '\n' )
			{

			ParsedUrl url = ParsedUrl( testFile );
			cout << "Pushing: " << testFile << " to queue\n";
			urlFrontier->Push( url );
			testFile = "";
			}
		else
			testFile.push_back( *seeds );
		++seeds;
		}
jsclose's avatar
jsclose committed
	*/
jsclose's avatar
jsclose committed
	SSL_library_init( );
jsclose's avatar
jsclose committed
	//string url1 = "https://fivethirtyeight.com";
	//string url2 = "https:";

	string bad_url = "http-equiv=X-UA-Compatiblecontent=IE=edge,chrome=1";
	string bad_url2 ="http-equiv=Content-Type";
	//ParsedUrl url = ParsedUrl(bad_url);
	ParsedUrl url1 = ParsedUrl(bad_url);
	ParsedUrl url2 = ParsedUrl(bad_url2);
jsclose's avatar
jsclose committed
	urlFrontier->Push(&url1);
jsclose's avatar
jsclose committed

jsclose's avatar
jsclose committed
	urlFrontier->Push(&url2);
	Crawler crawler( mode, urlFrontier, IndexerQueue );
jsclose's avatar
jsclose committed
	crawler.SpawnSpiders( numberOfSpiders );

	crawler.WaitOnAllSpiders( );
	indexer.WaitForFinish( );


	auto f = urlFrontier->Pop( );
	int x = 0;
	delete urlFrontier;
	return 1;
	}