Skip to content
Snippets Groups Projects
Commit 3685c6b9 authored by benbergk's avatar benbergk
Browse files

Created Crawler Statistics

parent e6bbbee1
Branches
No related tags found
No related merge requests found
......@@ -6,7 +6,7 @@
#include <stdlib.h>
#include <pthread.h>
#include "ProducerConsumerQueue.h"
#include "ProducerConsumerQueue.cpp" // needed here because class is a template
pthread_mutex_t cout_lock = PTHREAD_MUTEX_INITIALIZER;
......
File added
//
// Created by Ben Bergkamp on 2/1/18.
//
#include "CrawlerStatistics.h"
void CrawlerStatistics::FuncToRun()
{
//Sleep(3 minutes)
//Gather data
}
\ No newline at end of file
//
// Created by Ben Bergkamp on 2/1/18.
//
#ifndef EECS398_SEARCH_CRAWLERSTATISTICS_H
#define EECS398_SEARCH_CRAWLERSTATISTICS_H
#include "../ThreadClass.h"
class CrawlerStatistics : public ThreadClass {
public:
CrawlerStatistics() { };
virtual void FuncToRun();
private:
//members
};
#endif //EECS398_SEARCH_CRAWLERSTATISTICS_H
......@@ -4,6 +4,7 @@
#include "spider.h"
#include<string>
#include "../ProducerConsumerQueue.h"
#include "CrawlerStatistics.h"
/*
*
*/
......@@ -12,7 +13,7 @@ using namespace std;
class Crawler {
public:
Crawler(string mode_in, ProducerConsumerQueue<string>* url_q_in , ProducerConsumerQueue<string>* html_q_in)
Crawler(string mode_in, ProducerConsumerQueue<string>* url_q_in , ProducerConsumerQueue<int>* html_q_in)
: mode( mode_in ), urlFrontier(url_q_in) , fileQueue(html_q_in) { } ;
//spawns a number of works
......@@ -26,7 +27,8 @@ public:
private:
vector<Spider*> spiders;
ProducerConsumerQueue<string> *urlFrontier;
ProducerConsumerQueue<string> *fileQueue;
ProducerConsumerQueue<int> *fileQueue;
CrawlerStatistics housekeeper;
string mode;
};
......
......@@ -3,6 +3,10 @@
//
#include "spider.h"
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <unistd.h>
string Spider::getUrl()
{
......@@ -23,6 +27,7 @@ void Spider::FuncToRun()
// markURLSeen( currentUrl );
// writeHTMLtoDisk( );
// addHTMLToQueue( );
cond = false;
}
else
{
......@@ -34,25 +39,48 @@ void Spider::FuncToRun()
bool Spider::request( string url )
{
char buf[100];
if ( this->mode == "local" )
{
{
ifstream inFile;
string in;
inFile.open(url);
if ( !inFile )
{
{
cout << "Unable to open file";
exit(1); // terminate with error
}
while (inFile >> in)
{
cout << in << endl;
}
exit(1); // terminate with error////
}
int i = 0;
while (i < 100 && inFile >> buf[i])
{
i++;
}
inFile.close();
int file = writeFileToDisk(buf, 100);
fileQueue->Push(file);
return true;
}
}
return false;
}
\ No newline at end of file
}
int Spider::writeFileToDisk( char * fileContents, size_t fileSize)
{
int fd = creat("/Users/benbergkamp/Desktop/398/eecs398-search/test.txt", S_IRWXU);
ssize_t bytes_written = 0;
if(fd != -1)
{
bytes_written = write(fd, fileContents, fileSize);
} else
{
cout << "ERROR CREATING FILE\n";
}
if(bytes_written != 100)
{
cout << "ERROR: Only " << bytes_written << " bytes written\n";
}
return fd;
}
\ No newline at end of file
......@@ -14,7 +14,7 @@ class Spider : public ThreadClass{
public:
Spider(string mode_in, ProducerConsumerQueue<string>* url_q_in , ProducerConsumerQueue<string>* html_q_in)
Spider(string mode_in, ProducerConsumerQueue<string>* url_q_in , ProducerConsumerQueue<int>* html_q_in)
: mode( mode_in ), urlFrontier(url_q_in) , fileQueue(html_q_in) {};
......@@ -29,7 +29,7 @@ public:
bool request( string url );
//Where to write to disk? What type of data are we reading in?
void writeHTMLtoDisk( );
int writeFileToDisk(char * fileContents, size_t fileSize );
//Adds location
void addHTMLToQueue();
......@@ -41,7 +41,8 @@ private:
int locationOnDisk;
ProducerConsumerQueue<string> *urlFrontier;
ProducerConsumerQueue<string> *fileQueue;
ProducerConsumerQueue<int> *fileQueue;
string mode;
};
\ No newline at end of file
......@@ -6,6 +6,7 @@
#include <iostream>
#include <stdlib.h>
#include <unistd.h>
#include <pthread.h>
#include <queue>
#include "crawler/crawler.h"
......@@ -53,7 +54,7 @@ int main(int argc, const char * argv[])
ProducerConsumerQueue<string> urlFrontier;
ProducerConsumerQueue<string> fileQueue;
ProducerConsumerQueue<int> fileQueue;
cout << "Pushed File\n";
urlFrontier.Push("tests/cats.html");
......@@ -65,5 +66,21 @@ int main(int argc, const char * argv[])
crawler.WaitOnAllSpiders();
//This part is a work in progress I was just trying to simulate the
// parser and see if they could open and read the file
cout << "Done Waiting\nQueue Size is: " << fileQueue.Size();
auto top = fileQueue.Pop();
char buf[100];
auto ret = read(top, buf, 100);
cout << "read val: " << ret;
for(int i = 0; i < 100; i++){
cout << buf[i];
}
}
\ No newline at end of file
all:
g++ -std=c++11 main.cpp crawler/crawler.cpp crawler/spider.cpp -o crawler.exe -lpthread
\ No newline at end of file
g++ -std=c++11 main.cpp crawler/crawler.cpp crawler/spider.cpp crawler/CrawlerStatistics.cpp -o crawler.exe -lpthread
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment