Skip to content
Snippets Groups Projects
Commit 6d1b759a authored by Nicholas Yang's avatar Nicholas Yang
Browse files

todo added and minor fix to save chunks

parent 75780d20
No related branches found
No related tags found
1 merge request!1Add makefile for everyone and tests for the indexer
......@@ -9,9 +9,10 @@ Indexer::Indexer() {
void Indexer::run() {
while(pointerToDictionaries.Size() != 0) {
if(totalIndexed > 5) {
if(totalIndexed > 50000) {
save();
reset();
totalIndexed = 0;
}
unordered_map<string, vector<int>>* dictionary = pointerToDictionaries.Pop();
for(auto word : *dictionary) {
......@@ -22,7 +23,6 @@ void Indexer::run() {
}
}
currentlyIndexed += indexedCount;
indexedCount = 0;
}
save();
}
......@@ -70,5 +70,4 @@ void Indexer::verbose_save() {
void Indexer::reset() {
masterDictionary.clear();
totalIndexed = 0;
}
......@@ -16,6 +16,12 @@
Objective: Pulls small dictionaries from the parser and merges them into the
master index.
TODO:
Use deltas between the offsets
Save with UTF-8 encoding
Concrete block size - 500MB per block?
Save document endings and other relevant metadata?
*/
using namespace std;
......
......@@ -16,10 +16,10 @@ int main() {
unordered_map<string, vector<int>> test2;
unordered_map<string, vector<int>> test3;
unordered_map<string, vector<int>> test4;
ifstream ifstream1("tests/s-test1.txt");
ifstream ifstream2("tests/s-test2.txt");
ifstream ifstream3("tests/s-test3.txt");
ifstream ifstream4("tests/s-test4.txt");
ifstream ifstream1("tests/test1.txt");
ifstream ifstream2("tests/test2.txt");
ifstream ifstream3("tests/test3.txt");
ifstream ifstream4("tests/test4.txt");
string word = "";
int id = 0;
while(ifstream1 >> word) {
......
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment