From 1a4a31490b4cbd13f8553cca0a4204a3c2f52337 Mon Sep 17 00:00:00 2001
From: jsclose <jsclose@umich.edu>
Date: Wed, 14 Feb 2018 15:16:30 -0500
Subject: [PATCH] trying with 3 spiders

---
 crawler/SocketReader.cpp |   4 ++++
 crawler/spider.cpp       |   2 --
 docMap.txt               | Bin 182 -> 336 bytes
 main.cpp                 |   2 +-
 tests/webSeed.txt        |   1 +
 5 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/crawler/SocketReader.cpp b/crawler/SocketReader.cpp
index e669924..2ffe3e4 100644
--- a/crawler/SocketReader.cpp
+++ b/crawler/SocketReader.cpp
@@ -29,6 +29,8 @@ void SocketReader::fillBuffer()
 
 	// Send a GET message for the desired page.
 
+	cout << "Socket Reader is pulling from the web" << endl;
+
 	string getMessage = "GET ";
 	getMessage += url.CompleteUrl;
 	getMessage += " HTTP/1.1\r\nHost: ";
@@ -43,8 +45,10 @@ void SocketReader::fillBuffer()
 	char buffer[ 10240 ];
 	int bytes;
 
+
 	while ( ( bytes = recv( s, buffer, sizeof( buffer ), 0 ) ) > 0 )
 		write( 1, buffer, bytes );
 
 	close( s );
+	return;
 	}
\ No newline at end of file
diff --git a/crawler/spider.cpp b/crawler/spider.cpp
index 5aa9ba2..8e59c22 100644
--- a/crawler/spider.cpp
+++ b/crawler/spider.cpp
@@ -47,9 +47,7 @@ void Spider::FuncToRun()
 			{
 			if ( cond )
 				{
-				// markURLSeen( currentUrl );
 
-				//StreamReader* reader = request( currentUrl );
 
 				StreamReader *reader = request( currentUrl );
 
diff --git a/docMap.txt b/docMap.txt
index 318dc5f5753bbbc781f081777db0e0a9677752cc..3b0c8c71437350f5607a5b3e314e572dd1066281 100644
GIT binary patch
delta 15
TcmdnSc!6ocwuuF9V8jOiHjM^t

delta 6
Ncmcb>w2g7XHUJ5s0?+^e

diff --git a/main.cpp b/main.cpp
index a158cda..4e0eac0 100644
--- a/main.cpp
+++ b/main.cpp
@@ -84,7 +84,7 @@ unordered_map < string, int > *docMapLookUp = new unordered_map < string, int >(
 
 Crawler crawler( mode, &urlFrontier );
 
-crawler.SpawnSpiders(1 , docMapLookUp);
+crawler.SpawnSpiders(3 , docMapLookUp);
 
 crawler.
 
diff --git a/tests/webSeed.txt b/tests/webSeed.txt
index 8c509b3..8e3e8dd 100644
--- a/tests/webSeed.txt
+++ b/tests/webSeed.txt
@@ -1,2 +1,3 @@
 http://www.dailymail.co.uk/ushome/index.html
 http://www.bbc.com/
+http://www.bbc.co.uk/news/business-42959138
\ No newline at end of file
-- 
GitLab