Skip to content
Snippets Groups Projects
Commit 23693af7 authored by jsclose's avatar jsclose
Browse files

more tests passing

parent a63621a6
Branches url-parsing
No related tags found
No related merge requests found
......@@ -13,6 +13,7 @@ add_executable(crawler-parser-test
shared/ThreadClass.h
shared/url.h
crawler/crawler.cpp
crawler/UrlFrontier.cpp
crawler/Readers/StreamReader.h
crawler/Readers/HttpReader.cpp
crawler/Readers/HttpsReader.cpp
......@@ -53,6 +54,7 @@ add_executable(isolated-integration
crawler/tests/crawlerTest.cpp
shared/ProducerConsumerQueue.h
shared/ThreadClass.h
crawler/UrlFrontier.cpp
shared/url.h
crawler/crawler.cpp
crawler/Readers/StreamReader.h
......
No preview for this file type
......@@ -31,7 +31,7 @@ int main ( int argc, char *argv[] )
char *seeds;
int numberOfSpiders = 1;
unordered_map < size_t, int > *duplicateUrlMap = new unordered_map < size_t, int >( );
ProducerConsumerQueue < ParsedUrl > *urlFrontier = new ProducerConsumerQueue < ParsedUrl >( );
UrlFrontier *urlFrontier = new UrlFrontier( );
ProducerConsumerQueue < DocIndex * > *IndexerQueue = new ProducerConsumerQueue < DocIndex * >( );
Indexer indexer( IndexerQueue );
string path = util::GetCurrentWorkingDir() +"/crawler/tests/testSeeds.txt";
......@@ -66,9 +66,9 @@ int main ( int argc, char *argv[] )
//ParsedUrl url = ParsedUrl(bad_url);
ParsedUrl url1 = ParsedUrl(bad_url);
ParsedUrl url2 = ParsedUrl(bad_url2);
urlFrontier->Push(url1);
urlFrontier->Push(&url1);
urlFrontier->Push(url2);
urlFrontier->Push(&url2);
indexer.StartThread( );
Crawler crawler( mode, urlFrontier, IndexerQueue );
......
No preview for this file type
......@@ -12,7 +12,7 @@
#include "../../crawler/Readers/HttpReader.h"
#include "../../crawler/Readers/HttpsReader.h"
#include "../../util/util.h"
#include "../../crawler/UrlFrontier.h"
using namespace std;
void testSimple( );
......@@ -52,7 +52,7 @@ void printDictionary ( const unordered_map< string, vector< unsigned long > > di
void testSimple ( )
{
cout << "Testing Simple: " << endl;
ProducerConsumerQueue< ParsedUrl > urlFrontierTest;
UrlFrontier urlFrontierTest;
Parser parser( &urlFrontierTest );
ParsedUrl fake_url = ParsedUrl( "http://www.cats.com" );
string filepath = util::GetCurrentWorkingDir( ) + "/tests/plaintext.txt";
......@@ -93,11 +93,11 @@ void testSimple ( )
void testHttp( )
{
cout << "Testing Complex: " << endl;
ProducerConsumerQueue< ParsedUrl > urlFrontierTest;
UrlFrontier urlFrontierTest;
Parser parser( &urlFrontierTest );
ParsedUrl httpURL = ParsedUrl( "http://veronicacday.com/" );
HttpReader reader( httpURL );
HttpReader reader( &httpURL );
auto success = reader.request( );
if ( !success )
{
......@@ -109,10 +109,10 @@ void testHttp( )
printDictionary( *dictionary );
urlFrontierTest.Pop( );
assert( urlFrontierTest.Pop( ).getCompleteUrl( ) == "https://trove.com/" );
assert( urlFrontierTest.Pop( ).getCompleteUrl( ) == "http://arcinnovations.xyz/" );
assert( urlFrontierTest.Pop( ).getCompleteUrl( ) == "https://gwydion.co/" );
assert( urlFrontierTest.Pop( ).getCompleteUrl( ) == "https://madeatmichigan.umich.edu/ventures/venture/gwydion/" );
assert( urlFrontierTest.Pop( )->getCompleteUrl( ) == "https://trove.com/" );
assert( urlFrontierTest.Pop( )->getCompleteUrl( ) == "http://arcinnovations.xyz/" );
assert( urlFrontierTest.Pop( )->getCompleteUrl( ) == "https://gwydion.co/" );
assert( urlFrontierTest.Pop( )->getCompleteUrl( ) == "https://madeatmichigan.umich.edu/ventures/venture/gwydion/" );
assert ( dictionary != nullptr );
assert ( dictionary->size( ) == 67 );
......@@ -136,7 +136,7 @@ void testHttp( )
void testURL ( )
{
cout << "Testing URL: " << endl;
ProducerConsumerQueue< ParsedUrl > urlFrontierTest;
UrlFrontier urlFrontierTest ;
Parser parser( &urlFrontierTest );
ParsedUrl fake_url = ParsedUrl( "http://testurl.com" );
string filepath = util::GetCurrentWorkingDir( ) + "/tests/urlTest.html";
......@@ -156,7 +156,7 @@ void testURL ( )
assert ( dictionary != nullptr );
assert ( dictionary->size( ) == 3 );
assert ( dictionary->at( "=testurl.com/" )[ 0 ] == 0 );
assert ( urlFrontierTest.Pop( ).getCompleteUrl( ) == "http://www.bafta.org/" );
assert ( urlFrontierTest.Pop( )->getCompleteUrl( ) == "http://www.bafta.org/" );
assert ( dictionary->find( "$bafta" ) == dictionary->end( ) );
assert ( dictionary->at( "$testurl" )[ 0 ] == 0 );
assert ( dictionary->at( "$com" )[ 0 ] == 1 );
......@@ -170,7 +170,7 @@ void testURL ( )
void testBody ( )
{
cout << "Testing Body: " << endl;
ProducerConsumerQueue< ParsedUrl > urlFrontierTest;
UrlFrontier urlFrontierTest;
Parser parser( &urlFrontierTest );
ParsedUrl fake_url = ParsedUrl( "http://www.testingBody.edu" );
string filepath = util::GetCurrentWorkingDir( ) + "/tests/testParserBody.html";
......@@ -220,7 +220,7 @@ void testBody ( )
void testExtractBody ( )
{
cout << "Testing ExtractBody: " << endl;
ProducerConsumerQueue< ParsedUrl > urlFrontierTest;
UrlFrontier urlFrontierTest;
Parser parser( &urlFrontierTest );
ParsedUrl fake_url = ParsedUrl( "https://developer.mozilla.org/en-US/docs/Learn" );
string filepath = util::GetCurrentWorkingDir( ) + "/tests/testExtractBodyTest.html";
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment