Skip to content
Snippets Groups Projects
Commit 9f3b7562 authored by jsclose's avatar jsclose
Browse files

push

parent 5d455c33
Branches
No related tags found
No related merge requests found
No preview for this file type
......@@ -77,7 +77,7 @@ void Spider::run()
std::cout << "Spider is crawling" << endl;
int cond = 0;
while ( cond < 25 )
while ( cond < 50 )
{
ParsedUrl currentUrl = getUrl();
size_t docID = hash(currentUrl.CompleteUrl);
......
#71st 73
#academi 77
#award 81
#bbc 85
#break 91
#british 94
#film 98
#home 102
#homepag 106
#multimed 109
#new 113
#time 123
#wikiped 126
#world 130
#york 133
$71st 136
$academi 140
$award 144
$bbc 148
$british 154
$co 158
$com 162
$en 167
$film 171
$new 175
$nytim 179
$org 182
$uk 186
$wiki 190
$wikiped 194
=docEnding 221
#auto 73
#bbc 77
#capit 89
#futur 93
#home 97
#new 109
#travel 113
$auto 117
$bbc 121
$capit 133
$com 137
$futur 149
$new 153
$travel 157
=docEnding 184
740p05740wrjpg 135
$bbc 139
$capit 149
$co 153
$com 157
$new 165
$travel 171
$uk 175
=docEnding 202
#affair 73
#anada 77
#back 81
#bbc 85
#canada 92
#claim 96
#control 100
#daniel 104
#gun 108
#introduc 112
#measur 116
#new 120
#polygraph 129
#stormi 133
#trump 137
$43475403 141
$43479597 145
$bbc 149
$canada 156
$com 163
$new 170
$world 177
=docEnding 207
#100yearold 73
#bbc 77
#end 84
#look 89
#new 94
#oldest 99
#planetarium 104
#rium 109
#secret 114
#share 118
#travel 122
#woman 127
#work 131
#world 136
#young 141
$20180320 146
$43431176 151
$bbc 156
$com 163
$england 170
$new 175
$oldest 179
$planetarium 184
$stori 189
$travel 194
$uk 199
$work 203
$world 208
=docEnding 236
#10th 73
#bbc 78
#brain 86
#centuri 91
#crave 96
#cultur 101
#deep 106
#did 111
#food 116
#futur 121
#height 126
#like 131
#look 136
#luxuri 141
#nsilencedfood 146
#silenc 151
#stimul 156
$10th 161
$20180312 166
$20180320 171
$bbc 176
$brain 184
$centuri 189
$com 194
$crave 202
$cultur 207
$did 212
$electron 217
$food 222
$futur 227
$height 232
$like 237
$look 242
$luxuri 247
$silenc 252
$stimul 257
$stori 262
=docEnding 293
#bbc 73
#bee 81
#bsessedwith 86
#capit 91
#job 96
#lithuanian 101
#obsess 106
#robot 111
#steal 116
#travel 121
#won39t 126
#yet 131
$20180316 136
$20180319 141
$bbc 146
$bee 154
$capit 159
$com 164
$job 172
$lithuanian 177
$obsess 182
$robot 187
$steal 192
$stori 197
$travel 205
$wont 210
$yet 215
=docEnding 243
#bbc 73
#best 81
#cultur 86
#film 91
#food 96
#good 101
#healthi 106
#lthyov 111
#oat 116
#on 121
#overnight 126
#player 131
#readi 136
#recip 141
#review 146
$20180320 151
$bbc 156
$bbcgoodfood 161
$best 166
$com 171
$cultur 179
$film 184
$guid 189
$healthi 194
$howto 199
$oat 204
$on 209
$overnight 214
$player 219
$readi 224
$recip 229
$review 234
$stori 239
=docEnding 267
#2019 73
#bbc 78
#big 85
#boi 90
#could 95
#cultur 100
#everybodi 105
#gang 110
#join 115
#new 120
#onli 125
#oscar 130
#think 135
#tthe201 140
#win 145
$20180320 150
$2019 155
$43377380 160
$bbc 165
$big 173
$com 178
$could 186
$cultur 191
$new 196
$oscar 201
$stori 206
$win 214
=docEnding 242
#43474650 73
#ahead 78
#bbc 83
#beatl 91
#breakfast 96
#cyberspi 101
#former 106
#ill 111
#med 116
#new 121
#on 129
#ringo 134
#starr 139
#staye 144
#step 149
#wear 154
$43259900 159
$43474650 164
$art 169
$av 174
$bbc 179
$beatl 187
$breakfast 192
$busi 197
$com 202
$entertain 210
$former 215
$ll 220
$med 225
$new 230
$ringo 238
$starr 243
$wear 248
=docEnding 276
......@@ -3,7 +3,6 @@
Indexer::Indexer(ProducerConsumerQueue < DocIndex* > *doc_index_queue_in) : pointerToDictionaries( doc_index_queue_in ){
currentFile = 0;
currentlyIndexed = 0;
currentBlockNumberWords = 0;
currentBlockNumberDocs = 0;
......@@ -38,7 +37,7 @@ void Indexer::run() {
docEnd.docNumWords = indexedCount;
docEndings.push_back(docEnd);
if(currentBlockNumberWords >= 100000) {
if(currentBlockNumberWords >= 500) {
save();
reset();
}
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment