Skip to content
Snippets Groups Projects
Commit 0ae2c4d8 authored by jsclose's avatar jsclose
Browse files

refactoring with utils

parent 3685c6b9
No related branches found
No related tags found
No related merge requests found
No preview for this file type
......@@ -4,7 +4,7 @@
#include "spider.h"
#include<string>
#include "../ProducerConsumerQueue.h"
#include "CrawlerStatistics.h"
//#include "CrawlerStatistics.h"
/*
*
*/
......@@ -28,7 +28,7 @@ private:
vector<Spider*> spiders;
ProducerConsumerQueue<string> *urlFrontier;
ProducerConsumerQueue<int> *fileQueue;
CrawlerStatistics housekeeper;
//CrawlerStatistics housekeeper;
string mode;
};
......
......@@ -7,6 +7,10 @@
#include <sys/stat.h>
#include <fcntl.h>
#include <unistd.h>
#include "../util/util.h"
string Spider::getUrl()
{
......@@ -19,14 +23,23 @@ void Spider::FuncToRun()
std::cout << "Spider is crawling" << endl;
bool cond = true;
while( cond )
{
string currentUrl = getUrl();
if ( request( currentUrl ) )
char * fileMap;
if ( request( currentUrl , fileMap ) )
{
// markURLSeen( currentUrl );
// writeHTMLtoDisk( );
// addHTMLToQueue( );
string HARDCODEDLOCATION = "../crawlerOutput/" + currentUrl;
int fd = writeFileToDisk(fileMap , HARDCODEDLOCATION );
//Write to disk successful
if( fd !=-1 )
{
addFDToQueue( fd );
}
cond = false;
}
else
......@@ -37,50 +50,28 @@ void Spider::FuncToRun()
}
bool Spider::request( string url )
bool Spider::request( string url , char* fileMap)
{
char buf[100];
if ( this->mode == "local" )
{
ifstream inFile;
string in;
inFile.open(url);
if ( !inFile )
{
cout << "Unable to open file";
exit(1); // terminate with error////
}
int i = 0;
while (i < 100 && inFile >> buf[i])
{
i++;
fileMap = getFileMap( url );
if (fileMap != nullptr )
return true;
}
inFile.close();
int file = writeFileToDisk(buf, 100);
fileQueue->Push(file);
return true;
}
return false;
}
int Spider::writeFileToDisk( char * fileContents, size_t fileSize)
int Spider::writeFileToDisk( char * fileContents , string locationOnDisk)
{
int fd = creat("/Users/benbergkamp/Desktop/398/eecs398-search/test.txt", S_IRWXU);
ssize_t bytes_written = 0;
if(fd != -1)
{
bytes_written = write(fd, fileContents, fileSize);
} else
{
cout << "ERROR CREATING FILE\n";
}
if(bytes_written != 100)
{
cout << "ERROR: Only " << bytes_written << " bytes written\n";
}
return fd;
return writeToNewFileToLocation( fileContents, locationOnDisk );
}
void Spider::addFDToQueue( int fileDescriptor )
{
fileQueue->Push( fileDescriptor );
}
\ No newline at end of file
......@@ -26,13 +26,13 @@ public:
//Makes request to given url
// if successful, writes file to disk, stores location to memeber value
// else return false and error information, retry if necessary
bool request( string url );
bool request( string url , char* fileMap);
//Where to write to disk? What type of data are we reading in?
int writeFileToDisk(char * fileContents, size_t fileSize );
int writeFileToDisk( char * fileContents , string locationOnDisk);
//Adds location
void addHTMLToQueue();
void addFDToQueue( int fileDescriptor );
void markURLSeen( string URL );
......
<!DOCTYPE
html>
<html>
<head>
<!--
HTML
Codes
by
Quackit.com
-->
<title>
Food
store
is
here</title>
<meta
name="viewport"
content="width=device-width,
initial-scale=1">
<meta
name="keywords"
content="store
food
dinner
lunch">
<meta
name="description"
content="The
food
store
sells
cat
food
for
dinner,
lunch,
and
breakfast.">
<style>
body
{background-color:#ffffff;background-repeat:no-repeat;background-position:top
left;background-attachment:fixed;}
h1{font-family:Arial,
sans-serif;color:#000000;background-color:#ffffff;}
p
{font-family:Georgia,
serif;font-size:14px;font-style:normal;font-weight:normal;color:#000000;background-color:#ffffff;}
</style>
</head>
<body>
<h1>COme
shop
Come
shop
at
our
Store</h1>
<p>Please
come
to
our
store!</p>
</body>
</html>
......@@ -13,6 +13,7 @@
#include <string>
#include "ProducerConsumerQueue.h"
#include "crawler/spider.h"
//#include "crawler/CrawlerStatistics.h"
#define PATH_TO_BLACKLIST = '/bin/blacklist.txt'
......
//
// Created by Jake Close on 2/6/18.
//
#pragma once
#include <sys/types.h>
#include <sys/stat.h>
#include <sys/mman.h>
#include <fcntl.h>
#include <unistd.h>
#include <stdlib.h>
#include <iostream>
using namespace std;
size_t FileSize( int f )
{
struct stat fileInfo;
fstat( f, &fileInfo );
return fileInfo.st_size;
}
/*
* Takes filename as input, maps file into character pointer
* If cannot open file, returns a nullptr
*
*
*/
char * getFileMap( string fileName )
{
int f = open( fileName.c_str() , O_RDONLY );
if ( f != -1 )
{
size_t fileSize = FileSize(f);
char *map = (char *) mmap(nullptr, fileSize, PROT_READ, MAP_PRIVATE, f, 0);
if ( map != MAP_FAILED ){
return map;
}
}
return nullptr;
}
/*
* Takes integer file descriptor, returns char* map of file
*
*/
char * getFileMap( int fileDescriptor )
{
if ( fileDescriptor != -1 )
{
size_t fileSize = FileSize(fileDescriptor);
char *map = (char *) mmap(nullptr, fileSize, PROT_READ, MAP_PRIVATE, fileDescriptor, 0);
if ( map != MAP_FAILED ){
return map;
}
}
return nullptr;
}
/*
*
* Takes a point to start of a file and string of a location on disk
* writes filemap to disk at location, and returns the file descriptor to the new file
*/
int writeToNewFileToLocation( char * fileContents , string locationOnDisk)
{
int fileSize = strlen( fileContents );
int fd = creat( locationOnDisk.c_str() , S_IRWXU );
ssize_t bytes_written = 0;
if( fd != -1 )
bytes_written = write( fd, fileContents, fileSize );
else
cout << "ERROR CREATING FILE\n";
if( bytes_written != fileSize )
cout << "ERROR: Only " << bytes_written << " bytes written\n";
return fd;
}
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment