Skip to content
Snippets Groups Projects
Commit 2693bb0f authored by benbergk's avatar benbergk
Browse files

fixed linker error, made formatting changes

parent 02c39b5b
Branches
No related tags found
No related merge requests found
//
// Created by Ben Bergkamp on 2/14/18.
//
#ifndef EECS398_SEARCH_LOCALREADER_H
#define EECS398_SEARCH_LOCALREADER_H
#include "StreamReader.h"
class LocalReader : public StreamReader
{
string fileName;
void fillBuffer(){
strcpy(buffer, util::getFileMap( fileName )) ;
}
public:
LocalReader( string url_in ) : fileName( url_in ) { }
};
#endif //EECS398_SEARCH_LOCALREADER_H
//
// Created by Ben Bergkamp on 2/14/18.
//
#include "SocketReader.h"
void SocketReader::fillBuffer()
{
int s = socket( AF_INET, SOCK_STREAM, IPPROTO_TCP );
assert( s != -1 );
// Get the host address.
struct hostent *host = gethostbyname( url.Host );
assert( host );
struct sockaddr_in address;
memset( &address, 0, sizeof( address ) );
address.sin_family = AF_INET;
address.sin_port = htons( 80 );
memcpy( &address.sin_addr, host->h_addr, host->h_length );
// Connect to the host.
int connectResult = connect( s, ( struct sockaddr * )&address,
sizeof( address ) );
assert( connectResult == 0 );
// Send a GET message for the desired page.
string getMessage = "GET ";
getMessage += url.CompleteUrl;
getMessage += " HTTP/1.1\r\nHost: ";
getMessage += url.Host;
getMessage += "\r\nConnection: close\r\n\r\n";
cout << getMessage << endl;
send( s, getMessage.c_str( ), getMessage.length( ), 0 );
// Read from the socket until there's no more data.
char buffer[ 10240 ];
int bytes;
while ( ( bytes = recv( s, buffer, sizeof( buffer ), 0 ) ) > 0 )
write( 1, buffer, bytes );
close( s );
}
\ No newline at end of file
//
// Created by Ben Bergkamp on 2/14/18.
//
#ifndef EECS398_SEARCH_SOCKETREADER_H
#define EECS398_SEARCH_SOCKETREADER_H
#include "StreamReader.h"
class SocketReader : public StreamReader
{
public:
SocketReader( string url_in ) : url( ParsedUrl( url_in ) ) { }
virtual void fillBuffer();
private:
ParsedUrl url;
};
#endif //EECS398_SEARCH_SOCKETREADER_H
......@@ -14,84 +14,18 @@
#include <string.h>
#include <string>
#include <cassert>
class StreamReader
{
public:
char *buffer;
StreamReader(){ };
virtual void fillBuffer() = 0;
StreamReader(){ };
};
class SocketReader : public StreamReader
{
ParsedUrl url;
void fillBuffer()
{
int s = socket( AF_INET, SOCK_STREAM, IPPROTO_TCP );
assert( s != -1 );
// Get the host address.
struct hostent *host = gethostbyname( url.Host );
assert( host );
struct sockaddr_in address;
memset( &address, 0, sizeof( address ) );
address.sin_family = AF_INET;
address.sin_port = htons( 80 );
memcpy( &address.sin_addr, host->h_addr, host->h_length );
// Connect to the host.
int connectResult = connect( s, ( struct sockaddr * )&address,
sizeof( address ) );
assert( connectResult == 0 );
// Send a GET message for the desired page.
string getMessage = "GET ";
getMessage += url.CompleteUrl;
getMessage += " HTTP/1.1\r\nHost: ";
getMessage += url.Host;
getMessage += "\r\nConnection: close\r\n\r\n";
cout << getMessage << endl;
send( s, getMessage.c_str( ), getMessage.length( ), 0 );
// Read from the socket until there's no more data.
char buffer[ 10240 ];
int bytes;
while ( ( bytes = recv( s, buffer, sizeof( buffer ), 0 ) ) > 0 )
write( 1, buffer, bytes );
close( s );
}
public:
SocketReader( string url_in ) : url( ParsedUrl( url_in ) ) { }
protected:
char *buffer;
};
class LocalReader : public StreamReader
{
string fileName;
void fillBuffer(){
strcpy(buffer, getFileMap( fileName )) ;
}
public:
LocalReader( string url_in ) : fileName( url_in ) { }
};
......@@ -7,9 +7,18 @@
#include <sys/stat.h>
#include <fcntl.h>
#include <unistd.h>
#include "../util/util.h"
#include "StreamReader.h"
#include <unistd.h>
#include "../util/util.h"
#include "LocalReader.h"
#include "SocketReader.h"
namespace filepath
{
const char* DOC_MAP = "/docMap.txt";
}
string Spider::getUrl()
{
return urlFrontier->Pop( );
......@@ -35,7 +44,7 @@ void Spider::FuncToRun()
if ( cond )
{
// markURLSeen( currentUrl );
StreamReader* reader = request( currentUrl );
//StreamReader* reader = request( currentUrl );
//parser.parse(fileMap);
cond = false;
} else
......@@ -69,14 +78,15 @@ bool Spider::shouldURLbeCrawled( string url )
{
//cerr << "Url Not Found In Cache Lookup" << endl;
//get file descriptor for the docMap on disk
int file = getFileDescriptor( "/Users/jakeclose/Desktop/398/project/eecs398-search/docMap.txt", "W" );
string loc = util::GetCurrentWorkingDir() + filepath::DOC_MAP;
int file = util::getFileDescriptor( loc.c_str(), "W" );
//check if its available
if ( file == -1 )
cerr << "Error opening docMap" << endl;
else
{
//get the current size of the docMap
size_t seekPosition = FileSize( file );
size_t seekPosition = util::FileSize( file );
//seack to the end of the file
off_t resultPosition = lseek( file, seekPosition, SEEK_SET );
......@@ -113,7 +123,8 @@ bool Spider::shouldURLbeCrawled( string url )
std::cout << locationOnDisk->first << " is " << locationOnDisk->second;
int file = getFileDescriptor( "/Users/jakeclose/Desktop/398/project/eecs398-search/docMap.txt", "R" );
string loc = util::GetCurrentWorkingDir() + filepath::DOC_MAP;
int file = util::getFileDescriptor( loc.c_str(), "R" );
//check if its available
if ( file )
{
......@@ -150,6 +161,7 @@ bool Spider::shouldURLbeCrawled( string url )
returns true if fileMap was created, otherwise false
Modifies the filemap to be a char* of the file of the url passed
*/
StreamReader* Spider::request( string url )
{
string localFile;
......@@ -171,7 +183,7 @@ StreamReader* Spider::request( string url )
int Spider::writeFileToDisk( char *fileContents, string locationOnDisk )
{
return writeToNewFileToLocation( fileContents, locationOnDisk );
return util::writeToNewFileToLocation(fileContents, locationOnDisk);
}
......
......@@ -9,8 +9,10 @@
#include<iostream>
#include <unordered_map>
#include "StreamReader.h"
using namespace std;
class Spider : public ThreadClass
{
......@@ -34,9 +36,6 @@ public:
//Where to write to disk? What type of data are we reading in?
int writeFileToDisk( char *fileContents, string locationOnDisk );
//Adds location
void addFDToQueue( int fileDescriptor );
void markURLSeen( string URL );
......
No preview for this file type
......@@ -13,6 +13,7 @@
#include <string>
//#include "crawler/CrawlerStatistics.h"
#include <unordered_map>
#include "util/util.h"
#define PATH_TO_BLACKLIST = '/bin/blacklist.txt'
#define PATH_TO_VISITED_URL = 'bin/urls.txt'
......
......@@ -13,7 +13,7 @@ void ProducerConsumerQueue<T>::Push(T obj)
if(queue.size() == 1)
{
pthread_cond_signal(&consumer_cv);
pthread_cond_broadcast(&consumer_cv);
}
pthread_mutex_unlock(&m);
......
//
// Created by Ben Bergkamp on 2/13/18.
//
#include "util.h"
namespace util
{
size_t FileSize(int f)
{
struct stat fileInfo;
fstat(f, &fileInfo);
return fileInfo.st_size;
}
int getFileDescriptor(string fileName, string type)
{
if (type == "R") {
return open(fileName.c_str(), O_RDONLY);
} else if (type == "W") {
return open(fileName.c_str(), O_WRONLY | O_CREAT, S_IRUSR | S_IWUSR);
}
}
char *getFileMap(string fileName)
{
string localPath = util::GetCurrentWorkingDir();
fileName = localPath + fileName;
int f = open(fileName.c_str(), O_RDONLY);
if (f != -1) {
size_t fileSize = FileSize(f);
char *map = (char *) mmap(nullptr, fileSize, PROT_READ, MAP_PRIVATE, f, 0);
if (map != MAP_FAILED) {
return map;
}
}
return nullptr;
}
char *getFileMap(int fileDescriptor)
{
if (fileDescriptor != -1) {
size_t fileSize = FileSize(fileDescriptor);
char *map = (char *) mmap(nullptr, fileSize, PROT_READ, MAP_PRIVATE, fileDescriptor, 0);
if (map != MAP_FAILED) {
return map;
}
}
return nullptr;
}
int writeToNewFileToLocation(char *fileContents, string locationOnDisk)
{
int fileSize = strlen(fileContents);
int fd = creat(locationOnDisk.c_str(), S_IRWXU);
ssize_t bytes_written = 0;
if (fd != -1)
bytes_written = write(fd, fileContents, fileSize);
else
cout << "ERROR CREATING FILE\n";
if (bytes_written != fileSize)
cout << "ERROR: Only " << bytes_written << " bytes written\n";
return fd;
}
string GetCurrentWorkingDir(void)
{
return (getenv("PWD"));;
}
}
\ No newline at end of file
......@@ -11,106 +11,43 @@
#include <unistd.h>
#include <stdlib.h>
#include <iostream>
#include <string>
#define PATH_TO_BLACKLIST = '/bin/blacklist.txt'
#define PATH_TO_VISITED_URL = 'bin/urls.txt'
#define PATH_TO_HTML_DIR = 'bin/html/'
#define PATH_TO_INDEX = 'bin/index/wordIDX'
#define PATH_TO_DOC_INDEX = 'bin/index/docIDX'
using namespace std;
using namespace std;
size_t FileSize( int f )
namespace util
{
struct stat fileInfo;
fstat( f, &fileInfo );
return fileInfo.st_size;
}
size_t FileSize(int f);
/*
* Takes filename as input, maps file into character pointer
* If cannot open file, returns a nullptr
*
*
*/
int getFileDescriptor( string fileName , string type)
{
if(type == "R")
{
return open( fileName.c_str( ), O_RDONLY );
}
else if( type == "W")
{
return open( fileName.c_str( ), O_WRONLY | O_CREAT, S_IRUSR | S_IWUSR );
}
int getFileDescriptor(string fileName, string type);
}
char *getFileMap(string fileName);
char *getFileMap( string fileName )
{
string localPath = "/Users/jakeclose/Desktop/398/project/eecs398-search/";
fileName = localPath + fileName;
int f = open( fileName.c_str( ), O_RDONLY );
if ( f != -1 )
{
size_t fileSize = FileSize( f );
char *map = (char *) mmap( nullptr, fileSize, PROT_READ, MAP_PRIVATE, f, 0 );
if ( map != MAP_FAILED)
{
return map;
}
}
return nullptr;
}
/*
* Takes integer file descriptor, returns char* map of file
*
*/
char *getFileMap( int fileDescriptor )
{
if ( fileDescriptor != -1 )
{
size_t fileSize = FileSize( fileDescriptor );
char *map = (char *) mmap( nullptr, fileSize, PROT_READ, MAP_PRIVATE, fileDescriptor, 0 );
if ( map != MAP_FAILED)
{
return map;
}
}
return nullptr;
}
char *getFileMap(int fileDescriptor);
/*
*
* Takes a point to start of a file and string of a location on disk
* writes filemap to disk at location, and returns the file descriptor to the new file
*/
int writeToNewFileToLocation(char *fileContents, string locationOnDisk);
int writeToNewFileToLocation( char *fileContents, string locationOnDisk )
{
int fileSize = strlen( fileContents );
int fd = creat( locationOnDisk.c_str( ), S_IRWXU );
ssize_t bytes_written = 0;
if ( fd != -1 )
bytes_written = write( fd, fileContents, fileSize );
else
cout << "ERROR CREATING FILE\n";
if ( bytes_written != fileSize )
cout << "ERROR: Only " << bytes_written << " bytes written\n";
return fd;
}
string GetCurrentWorkingDir(void);
}
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment