Skip to content
Snippets Groups Projects
Indexer.h 1.06 KiB
Newer Older
  • Learn to ignore specific revisions
  • zldunn's avatar
    zldunn committed
    #ifndef indexer_h
    #define indexer_h
    
    #include "../ProducerConsumerQueue.h"
    #include "../ProducerConsumerQueue.cpp"
    
    #include "DocumentEnding.h"
    
    #include <unordered_map>
    
    #include <map>
    
    #include <vector>
    #include <iostream>
    
    #include <fstream>
    
    #include <fcntl.h>
    #include <unistd.h>
    
    yangni's avatar
    yangni committed
    /*
    
    Objective: Pulls small dictionaries from the parser and merges them into the 
    master index.
    
    
    TODO:
     Use deltas between the offsets
     Save with UTF-8 encoding
    
     Concrete block size - 100MB per block?
    
     Save document endings and other relevant metadata?
    
    
    yangni's avatar
    yangni committed
    */
    
    
    using namespace std;
    
    
    yangni's avatar
    yangni committed
    class Indexer {
        public:
            Indexer();
    
    		void run();
    		void verbose_run();
    
    zldunn's avatar
    zldunn committed
    		void verbose_save();
    
    		ProducerConsumerQueue<unordered_map<string, vector<int> > * > pointerToDictionaries;
    
        private:
            void save();
            void reset();
    
            unordered_map<string, vector<size_t> > masterDictionary;
    
            vector<DocumentEnding> docEndings;
    
    
            size_t currentFile;
    
    
            size_t currentBlockNumberWords;
            size_t currentBlockNumberDocs;