Commit 672515c8 authored by sgebreeg's avatar sgebreeg
Browse files

made updated to pass big vectors with reference instead of value

parent 068ba1b5
......@@ -26,6 +26,9 @@ Node *train(vector <vector<string>> &data, vector <FeatureType> &featureType,
std::pair<string, double> classificationAndEntropy = classifyWithEntropy(data, nodeDatasetIndices);
string classification = classificationAndEntropy.first;
double originalEntropy = classificationAndEntropy.second;
if(originalEntropy<0 || originalEntropy>1){
cout<<"ERROR with entropy "<<originalEntropy<<endl;
}
double informationGainFromParent;
if (currentDepth == 0) {
informationGainFromParent = 0.0;
......
......@@ -343,20 +343,20 @@ BestSplitPoint findBestSplit(double parentEntropy, int currentDepth, vector <vec
threshold = featureData.at(indx).second;
dataIndex = featureData.at(indx).first;
while (indx < nodeDatasetIndices.size() && featureData.at(indx).second <= threshold) { //TODO check
while (indx < featureData.size() && featureData.at(indx).second <= threshold) { //TODO check
leftSize++;
rightSize--;
if (leftLabelCount.count(data[data.size() - 1][nodeDatasetIndices[indx]])) {
leftLabelCount[data[data.size() - 1][nodeDatasetIndices[indx]]] += 1;
if (leftLabelCount.count(data[data.size() - 1][featureData.at(indx).first])) {
leftLabelCount[data[data.size() - 1][featureData.at(indx).first]] += 1;
} else {
leftLabelCount[data[data.size() - 1][nodeDatasetIndices[indx]]] = 1;
leftLabelCount[data[data.size() - 1][featureData.at(indx).first]] = 1;
}
rightLabelCount[data[data.size() - 1][nodeDatasetIndices[indx]]] -= 1;
rightLabelCount[data[data.size() - 1][featureData.at(indx).first]] -= 1;
indx++;
dataIndex = featureData[indx].first;
}
if (indx == nodeDatasetIndices.size()) { //TODO check
if (indx == featureData.size()) { //TODO check
continue;
}
double splitEntropy = calSplitEntropy(leftLabelCount, rightLabelCount, leftSize, rightSize);
......
......@@ -62,7 +62,7 @@ int main(int argc, char *argv[]) {
vector<int> oversampledData = oversample(datasetAsString, trainingIdxs);
// trainingIdxs.insert(trainingIdxs.end(), oversampledData.begin(), oversampledData.end());
trainingIdxs.insert(trainingIdxs.end(), oversampledData.begin(), oversampledData.end());
// sort(trainingIdxs.begin(), trainingIdxs.end());
vector <string> testData;
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment