#include "Indexer.h"

Indexer::Indexer() {
    currentFile = 0;
    currentlyIndexed = 0;

    currentBlockNumberWords = 0;
    currentBlockNumberDocs = 0;
}

void Indexer::run() {
    while(pointerToDictionaries.Size() != 0) {
        unordered_map<string, vector<int> >* dictionary = pointerToDictionaries.Pop();
        DocumentEnding docEnd = DocumentEnding();
        size_t indexedCount = 0;
        currentBlockNumberDocs++;

        for(auto word : *dictionary) {
            if(word.first.at(0) == '=') {
                docEnd.url = word.first.substr(1, word.first.length());
                continue;
            }

            indexedCount += word.second.size();
            currentBlockNumberWords += word.second.size();

            for(auto location : word.second) {
                masterDictionary[word.first].push_back(currentlyIndexed + location);
            }
        }

        currentlyIndexed += indexedCount;
        docEnd.docEndPosition = currentlyIndexed;
        docEnd.docNumWords = indexedCount;
        docEndings.push_back(docEnd);

        if(currentBlockNumberWords >= 300000) {
            save();
            reset();
        }
    }

    save();
    reset();
}

void Indexer::verbose_run() {
    while(pointerToDictionaries.Size() != 0) {
        unordered_map<string, vector<int>> dictionary = *pointerToDictionaries.Pop();
        for(auto word : dictionary) {
	        for(auto location : word.second) {
//                indexedCount++;
                masterDictionary[word.first].push_back(location);
                }
            }
        }
    }

void Indexer::save() {
    map<string, vector<size_t> > maps(masterDictionary.begin(), masterDictionary.end());
    map<string, size_t> seeker;
    string fileName = "index" + to_string(currentFile) + ".txt";
    int file = open(fileName.c_str(), O_CREAT | O_WRONLY, S_IRWXU);

    // TODO: these should really be c strings
    string header = "===STATS===\n";
    string uniqueWords = "unique words: " + to_string(masterDictionary.size()) + "\n";
    string numberWords = "number words: " + to_string(currentBlockNumberWords) + "\n";
    string numberDocs = "number docs: " + to_string(currentBlockNumberDocs) + "\n";
    string footer = "===========\n";
    write(file, header.c_str(), strlen(header.c_str()));
    write(file, uniqueWords.c_str(), strlen(uniqueWords.c_str()));
    write(file, numberWords.c_str(), strlen(numberWords.c_str()));
    write(file, numberDocs.c_str(), strlen(numberDocs.c_str()));
    write(file, footer.c_str(), strlen(footer.c_str()));

    // REALLY GROSS HACK
    size_t seekOffset = strlen(header.c_str()) +
                     strlen(numberDocs.c_str()) +
                     strlen(numberWords.c_str()) +
                     strlen(uniqueWords.c_str()) +
                     strlen(footer.c_str());

    for(auto word : maps) {
        seeker[word.first] = seekOffset;
//        string wordBreak = word.first + "\n";
//        write(file, wordBreak.c_str(), strlen(wordBreak.c_str()));
//        seekOffset += strlen(wordBreak.c_str());
        bool firstPost = true;
        size_t lastOne = 0;
        for(auto location : word.second) {
            if(firstPost) {
                string locationSpace = to_string(location) + " ";
                write(file, locationSpace.c_str(), strlen(locationSpace.c_str()));
                seekOffset += strlen(locationSpace.c_str());
                firstPost = false;
            } else {
                size_t delta = location - lastOne;
                string deltaSpace = to_string(delta) + " ";
                write(file, deltaSpace.c_str(), strlen(deltaSpace.c_str()));
                seekOffset += strlen(deltaSpace.c_str());
            }
            lastOne = location;
        }
        write(file, "\n", 1);
        seekOffset += 1;
    }

    string docEndingHeader = "===Document Endings===\n";
    write(file, docEndingHeader.c_str(), strlen(docEndingHeader.c_str()));

    for(auto ending : docEndings) {
        string docEndString = "[" +
                ending.url + ", " +
                to_string(ending.docEndPosition) + ", " +
                to_string(ending.docNumWords) + "]\n";
        write(file, docEndString.c_str(), strlen(docEndString.c_str()));
    }

    // TODO: seek dictionary
    string seekFileName = "index" + to_string(currentFile) + "-seek.txt";
    int seekFile = open(seekFileName.c_str(), O_CREAT | O_WRONLY, S_IRWXU);
    for(auto word : seeker) {
        string line = word.first + " " + to_string(word.second) + "\n";
        write(seekFile, line.c_str(), strlen(line.c_str()));
    }

    close(file);
    currentFile++;
}

void Indexer::verbose_save() {
    map<string, vector<size_t> > maps(masterDictionary.begin(), masterDictionary.end());
    for(auto word : maps) {
        cout << word.first << endl;
        for(auto location : word.second) {
            cout << location << " ";
            }
	    cout << endl;
        }
    currentFile++;
    }

void Indexer::reset() {
    masterDictionary.clear();
    docEndings.clear();

    currentBlockNumberWords = 0;
    currentBlockNumberDocs = 0;
}