Skip to content
Snippets Groups Projects
Commit 7dea3986 authored by Nicholas Yang's avatar Nicholas Yang
Browse files

allows dictionaries that are local to the document to be merged

parent 6bf9fa1d
No related branches found
No related tags found
1 merge request!1Add makefile for everyone and tests for the indexer
......@@ -3,26 +3,32 @@
Indexer::Indexer() {
indexedCount = 0;
currentFile = 0;
totalIndexed = 0;
currentlyIndexed = 0;
}
void Indexer::run() {
while(pointerToDictionaries.Size() != 0) {
if(indexedCount > 100000) {
if(totalIndexed > 5) {
save();
reset();
}
unordered_map<string, vector<int>>* dictionary = pointerToDictionaries.Pop();
for(auto word : *dictionary) {
indexedCount += word.second.size();
totalIndexed += word.second.size();
for(auto location : word.second) {
indexedCount++;
masterDictionary[word.first].push_back(location);
masterDictionary[word.first].push_back(currentlyIndexed + location);
}
}
currentlyIndexed += indexedCount;
indexedCount = 0;
}
save();
}
void Indexer::save() {
map<string, vector<int> > maps(masterDictionary.begin(), masterDictionary.end());
map<string, vector<size_t> > maps(masterDictionary.begin(), masterDictionary.end());
string fileName = "index" + to_string(currentFile) + ".txt";
int file = open(fileName.c_str(), O_CREAT | O_WRONLY, S_IRWXU);
for(auto word : maps) {
......@@ -40,5 +46,5 @@ void Indexer::save() {
void Indexer::reset() {
masterDictionary.clear();
indexedCount = 0;
totalIndexed = 0;
}
......@@ -25,7 +25,9 @@ class Indexer {
private:
void save();
void reset();
unordered_map<string, vector<int> > masterDictionary;
unordered_map<string, vector<size_t> > masterDictionary;
size_t indexedCount;
size_t currentFile;
size_t totalIndexed;
size_t currentlyIndexed;
};
\ No newline at end of file
......@@ -16,39 +16,46 @@ int main() {
unordered_map<string, vector<int>> test2;
unordered_map<string, vector<int>> test3;
unordered_map<string, vector<int>> test4;
ifstream ifstream1("tests/test1.txt");
ifstream ifstream2("tests/test2.txt");
ifstream ifstream3("tests/test3.txt");
ifstream ifstream4("tests/test4.txt");
ifstream ifstream1("tests/s-test1.txt");
ifstream ifstream2("tests/s-test2.txt");
ifstream ifstream3("tests/s-test3.txt");
ifstream ifstream4("tests/s-test4.txt");
string word = "";
int id = 0;
while(ifstream1 >> word) {
std::transform(word.begin(), word.end(), word.begin(), ::tolower);
word.erase(remove_if(word.begin(), word.end(), [](char c) { return !isalpha(c); } ), word.end());
if(word != "")
if(word != "") {
test1[word].push_back(id);
id++;
id++;
}
}
id = 0;
while(ifstream2 >> word) {
std::transform(word.begin(), word.end(), word.begin(), ::tolower);
word.erase(remove_if(word.begin(), word.end(), [](char c) { return !isalpha(c); } ), word.end());
if(word != "")
if(word != "") {
test2[word].push_back(id);
id++;
id++;
}
}
id = 0;
while(ifstream3 >> word) {
std::transform(word.begin(), word.end(), word.begin(), ::tolower);
word.erase(remove_if(word.begin(), word.end(), [](char c) { return !isalpha(c); } ), word.end());
if(word != "")
if(word != "") {
test3[word].push_back(id);
id++;
id++;
}
}
id = 0;
while(ifstream4 >> word) {
std::transform(word.begin(), word.end(), word.begin(), ::tolower);
word.erase(remove_if(word.begin(), word.end(), [](char c) { return !isalpha(c); } ), word.end());
if(word != "")
if(word != "") {
test4[word].push_back(id);
id++;
id++;
}
}
indexer.pointerToDictionaries.Push(&test1);
indexer.pointerToDictionaries.Push(&test2);
......
ai
6
believe
1
but
17
by
8
can
3 21
conversational
5
human
29
improve
4
internet
16
letting
9
nature
30
of
28
on
14
people
13
reflect
25
researchers
0
say
22
sometimes
18
systems
7 20
talk
11
that
24
the
15 26
them
10
these
19
they
2
things
23
to
12
worst
27
a
34
an
55
and
58
by
47
dictionaries
43
dictionary
52
document
51
each
50
end
62
from
39 54
generated
42
hello
31
i
45
index
56
indexing
38
is
33 49
locally
41
mean
46
of
37 57
quick
35
runs
59
starts
53
test
36
the
61
this
32 48
till
60
what
44
with
40
all
64
and
87
because
90
bike
81
bikers
66
do
69
down
78
dry
86
ever
71
feel
72
for
63
hate
92
i
91
its
88
like
73
looks
84
not
89
on
79
out
67
perfectly
85
seat
82
shit
74
sit
77
that
83 93
there
68
when
75
you
65 70 76
your
80
and
111 116 133
application
110
changes
125
code
130
components
121
create
99
data
124
debug
136
declarative
126
design
102
each
106
easier
134
efficiently
114
for
105
in
108
interactive
100
it
96
just
118
make
128
makes
95
more
131
painless
97
predictable
132
react
94 112
render
117
right
120
simple
103
state
107
the
119
to
98 135
uis
101
update
115
views
104 127
when
122
will
113
your
109 123 129
Researchers believe they can improve conversational A.I. systems by letting them talk to people on the internet. But sometimes, these systems can say things that reflect the worst of human nature.
\ No newline at end of file
hello this is a quick test of indexing from 0 with locally generated dictionaries - what I mean by this is
each document dictionary starts from an index of 0 and runs till the end.
\ No newline at end of file
for all you bikers out there do you ever feel like shit when you sit down on your bike seat that looks perfectly dry
and it's not because i hate that
\ No newline at end of file
React makes it painless to create interactive UIs. Design simple views for each state in your application, and React will efficiently update and render just the right components when your data changes. Declarative views make your code more predictable and easier to debug.
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment