#include "Indexer.h"

Indexer::Indexer() {
    indexedCount = 0;
    currentFile = 0;
    totalIndexed = 0;
    currentlyIndexed = 0;

    currentBlockNumberWords = 0;
    currentBlockNumberDocs = 0;
}

void Indexer::run() {
    while(pointerToDictionaries.Size() != 0) {
        unordered_map<string, vector<int> >* dictionary = pointerToDictionaries.Pop();
        DocumentEnding docEnd = DocumentEnding();
        currentBlockNumberDocs++;
        for(auto word : *dictionary) {
            indexedCount += word.second.size();
            totalIndexed += word.second.size();
            currentBlockNumberWords += word.second.size();
            if(word.first.at(0) == '=') {
                docEnd.url = word.first;
                continue;
            }
            for(auto location : word.second) {
                masterDictionary[word.first].push_back(currentlyIndexed + location);
            }
        }
        currentlyIndexed += indexedCount;
        docEnd.docEndPosition = currentlyIndexed;
        docEnd.docNumWords = indexedCount;
        docEndings.push_back(docEnd);
        if(currentBlockNumberWords >= 300000) {
            save();
            reset();
        }
        indexedCount = 0;
    }
    save();
}

void Indexer::verbose_run() {
    while(pointerToDictionaries.Size() != 0) {
        unordered_map<string, vector<int>> dictionary = *pointerToDictionaries.Pop();
        for(auto word : dictionary) {
	        for(auto location : word.second) {
                indexedCount++;
                masterDictionary[word.first].push_back(location);
                }
            }
        }
    }

void Indexer::save() {
    map<string, vector<size_t> > maps(masterDictionary.begin(), masterDictionary.end());
    map<string, size_t> seeker;
    string fileName = "index" + to_string(currentFile) + ".txt";
    int file = open(fileName.c_str(), O_CREAT | O_WRONLY, S_IRWXU);
    // TODO: these should really be c strings
    string header = "===STATS===\n";
    string uniqueWords = "unique words: " + to_string(masterDictionary.size()) + "\n";
    string numberWords = "number words: " + to_string(currentBlockNumberWords) + "\n";
    string numberDocs = "number docs: " + to_string(currentBlockNumberDocs) + "\n";
    string footer = "===========\n";
    write(file, header.c_str(), strlen(header.c_str()));
    write(file, uniqueWords.c_str(), strlen(uniqueWords.c_str()));
    write(file, numberWords.c_str(), strlen(numberWords.c_str()));
    write(file, numberDocs.c_str(), strlen(numberDocs.c_str()));
    write(file, footer.c_str(), strlen(footer.c_str()));

    for(auto word : maps) {
        string wordBreak = word.first + "\n";
        write(file, wordBreak.c_str(), strlen(wordBreak.c_str()));
        for(auto location : word.second) {
            string locationSpace = to_string(location) + " ";
            write(file, locationSpace.c_str(), strlen(locationSpace.c_str()));
        }
        seeker[word.first] = 013;
        write(file, "\n", 1);
    }

    string docEndingHeader = "===Document Endings===\n";
    write(file, docEndingHeader.c_str(), strlen(docEndingHeader.c_str()));

    for(auto ending : docEndings) {
        string docEndString = "[" +
                ending.url + ", " +
                to_string(ending.docEndPosition) + ", " +
                to_string(ending.docNumWords) + "]\n";
        write(file, docEndString.c_str(), strlen(docEndString.c_str()));
    }

    // TODO: seek dictionary
    string seekFileName = "index" + to_string(currentFile) + "-seek.txt";
    int seekFile = open(seekFileName.c_str(), O_CREAT | O_WRONLY, S_IRWXU);
    for(auto word : seeker) {
        string line = word.first + " " + to_string(word.second) + "\n";
        write(seekFile, line.c_str(), strlen(line.c_str()));
    }

    close(file);
    currentFile++;
}

void Indexer::verbose_save() {
    map<string, vector<size_t> > maps(masterDictionary.begin(), masterDictionary.end());
    for(auto word : maps) {
        cout << word.first << endl;
        for(auto location : word.second) {
            cout << location << " ";
            }
	    cout << endl;
        }
    currentFile++;
    }

void Indexer::reset() {
    unordered_map<string, vector<size_t> > lastOne;

    for(auto bucket : masterDictionary) {
        lastOne[bucket.first].push_back(bucket.second.back());
    }

    this->lastOne = lastOne;
    masterDictionary.clear();

    currentBlockNumberWords = 0;
    currentBlockNumberDocs = 0;
}