Skip to content
Snippets Groups Projects
Commit 8f7de9b7 authored by jsclose's avatar jsclose
Browse files

working on ssl

parent 0fe06957
No related branches found
No related tags found
No related merge requests found
.idea/*
openssl-1.1.0g/*
.vagrant/*
CMakeLists.txt
cmake-build-debug/*
......
......@@ -4,7 +4,7 @@
#include "SocketReader.h"
/*
void SocketReader::fillBuffer()
{
int s = socket( AF_INET, SOCK_STREAM, IPPROTO_TCP );
......@@ -51,4 +51,72 @@ void SocketReader::fillBuffer()
close( s );
return;
}
*/
void SocketReader::fillBuffer( )
{
struct hostent *host = gethostbyname( url.Host );
assert( host );
struct sockaddr_in address;
memset( &address, 0, sizeof( address ) );
address.sin_family = AF_INET;
address.sin_port = htons( 443 );
memcpy( &address.sin_addr, host->h_addr, host->h_length );
// Create a TCP/IP socket.
int s = socket( AF_INET, SOCK_STREAM, IPPROTO_TCP );
assert( s != -1 );
// Connect the socket to the host address.
int connectResult = connect( s, ( struct sockaddr * )&address,
sizeof( address ) );
assert( connectResult == 0 );
// Build an SSL layer and set it to read/write
// to the socket we've connected.
SSL_library_init( );
SSL_CTX *ctx = SSL_CTX_new( SSLv23_method( ) );
assert( ctx );
SSL *ssl = SSL_new( ctx );
assert( ssl );
SSL_set_fd( ssl, s );
// Establish an SSL connection.
int sslConnectResult = SSL_connect( ssl );
assert( sslConnectResult == 1 );
// Send a GET message for the desired page through the SSL.
string getMessage = "GET ";
getMessage += url.CompleteUrl;
getMessage += " HTTP/1.1\r\nHost: ";
getMessage += url.Host;
getMessage += "\r\nConnection: close\r\n\r\n";
cout << getMessage << endl;
SSL_write( ssl, getMessage.c_str( ), getMessage.length( ) );
// Read from the SSL until there's no more data.
char buffer[ 10240 ];
int bytes;
while ( ( bytes = SSL_read( ssl, buffer,
sizeof( buffer ) ) ) > 0 )
write( 1, buffer, bytes );
SSL_shutdown( ssl );
SSL_free( ssl );
SSL_CTX_free( ctx );
close( s );
}
\ No newline at end of file
......@@ -14,6 +14,8 @@ public:
SocketReader( string url_in ) : url( ParsedUrl( url_in ) ) { }
virtual void fillBuffer();
//virtual void fillBuffer(char ssl);
private:
ParsedUrl url;
};
......
......@@ -14,6 +14,7 @@
#include <string.h>
#include <string>
#include <cassert>
#include <openssl/ssl.h>
......
......@@ -34,9 +34,11 @@ void Spider::FuncToRun()
while ( cond )
{
string currentUrl = getUrl( );
string currentUrl = getUrl( ); //get url from url frontier
char *fileMap;
//url has not seen before or time since seen is past certain criteria
if ( shouldURLbeCrawled( currentUrl ))
{
bool success = writeDocToDisk(currentUrl);
......
No preview for this file type
......@@ -44,12 +44,11 @@ int main( int argc, const char *argv[] )
*/
//
string mode = "local";
string mode = "web";
// Seed urls?
string seed;
//
int numberOfSpiders;
int numberOfParsers;
bool restoreFromLog;
......
......@@ -6,6 +6,9 @@
#include <string>
#include <iostream>
#include "../util/util.h"
//#include "../crawler/StreamReader.h"
//#include "../crawler/SocketReader.h"
using namespace std;
//
......@@ -157,6 +160,44 @@ public:
Host = Path = p;
}
/*
//check if path in url is in the robots txt
void checkRobots()
{
string pathToRobots = util::GetCurrentWorkingDir() + '/' + Service;
int robotsFileD = util::getFileDescriptor(pathToRobots , "R");
//File does not exist yet
if(robotsFileD == -1)
{
robotsFileD = getRobots();
}
char* robotsTXT = util::getFileMap(robotsFileD);
}
//Makes request to get a new robots txt file, returns the file pointer
int getRobots( )
{
StreamReader reader;
string pathToRobots = util::GetCurrentWorkingDir() + '/' + Service;
reader = new SocketReader(CompleteUrl+ '/' + 'robots.txt');
reader->fillBuffer();
int fd = util::writeToNewFileToLocation( reader->buffer, pathToRobots);
if( fd == -1)
{
cerr << "Error getting Robots.txt file " << endl;
}
return fd;
return 1;
};
*/
~ParsedUrl( )
{
delete [ ] pathBuffer;
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment