Newer
Older
//
// Created by Veronica Day on 1/28/18.
//
// keep running count of offset, if stop word: don't incrememnt and remove stopword
// tokenizer returns pointer to document dictionary, parser puts it on the indexer's queue
//
#include <string>
#include <functional>
#include <queue>
#include <iostream>
#include <fstream>
#include "../shared/Document.h"
#include "../shared/ProducerConsumerQueue.h"
/**
* This class uses the Doc object from the Crawler to parse the text
* Returns a pointer to a dictionary that contains the tokenized input
*/
Parser ( ProducerConsumerQueue < string > * urlFrontierIn)
const unordered_map< string, vector< int>> * execute ( Document* document)
string url = extract_url ( currentTerm );
if (url != "")
{
urlFrontier->Push (url);
}
else
{
string title = extract_title ( currentTerm );
if (title != "")
{
tokenizerInput += title;
}
}
/**
* Returns a url, or "" if none
* @param word
* @return