Commit df470359 authored by sgebreeg's avatar sgebreeg
Browse files

soft voting and majority voting added to RandomForest.cpp and main.cpp

parent 222e9378
......@@ -95,7 +95,7 @@ pair<string, double> DecisionTree::predictSingle(vector <string> &test, Node *tr
}
Question *question = treeRoot->question;
int splitIndex = question->splitFeatureIndex;
cout << splitIndex << endl;
// cout << splitIndex << endl;
std::pair<int, double> featureInfoGain(question->splitFeatureIndex,
treeRoot->informationGainFromParent); // value init
......
......@@ -88,16 +88,29 @@ RandomForest::RandomForest(vector <vector<string>> &data, vector<int> &trainingI
}
vector <string>
void RandomForest::predict(string voting, vector <string> &test, RandomForest *forest, vector <string> &features){
if(voting == "SOFT"){
this->getSoftVoting(test, forest, features);
}
else if(voting == "HARD"){
this->getForestPrediction(test, forest, features);
}
}
string
RandomForest::getSoftVoting(vector <string> &test, RandomForest *forest, vector <string> &features) {
cout << "Trees in forest: " << to_string(forest->trees.size()) << endl;
cout << "Predicting" << endl;
cout << "Predicting: Soft Voting" << endl;
map<string, double> votes;
vector<string> predictions(forest->trees.size());
vector<PredictionReport *> explanations(forest->trees.size());
//get every tree's prediction
for (int treeIdx = 0; treeIdx < forest->trees.size(); treeIdx++) {
PredictionReport *report = new PredictionReport();
DecisionTree *tree = forest->trees[treeIdx];
pair<string, double> pred_pair = tree->predictSingle(test, tree->root);
pair<string, double> pred_pair = tree->predictSingle(test, tree->root, report);
string prediction = pred_pair.first;
double entropy = pred_pair.second;
......@@ -106,14 +119,52 @@ RandomForest::getSoftVoting(vector <string> &test, RandomForest *forest, vector
}else{
votes[prediction] = entropy;
}
predictions[treeIdx] = prediction;
explanations[treeIdx] = report;
//TODO remove maybe: printing all the prediction reports
map < string, vector < std::pair < int, double > >>::iterator itr;
cout << "Explanation " << treeIdx << " classified " << report->classification << " because ";
for (itr = report->path.begin(); itr != report->path.end(); ++itr) {
if (itr->first == report->classification) {
sort(itr->second, test, features);
}
}
}
//get prediction with lowest entropy
double minEntropy = 999999;
string label;
map<string, double>::iterator itr;
for(itr=votes.begin(); itr != votes.end(); ++itr){
if(minEntropy > itr->second){
minEntropy = itr->second;
label = itr->first;
}
}
cout<<"Soft voting Classified "<<label<<endl;
cout<<"Because: ";
map<int, double> reports = explain(label, explanations);
sort(reports, test, features);
for (int j = 0; j < explanations.size(); j++) {
delete explanations[j];
}
return label;
}
vector <string>
RandomForest::getForestPrediction(vector <string> &test, RandomForest *forest, vector <string> &features) {
cout << "Trees in forest: " << to_string(forest->trees.size()) << endl;
cout << "Predicting" << endl;
cout << "Predicting: Hard Voting" << endl;
vector <string> predictions(forest->trees.size());
vector < PredictionReport * > explanations(forest->trees.size());
map<string, int> votes;
......@@ -121,7 +172,7 @@ RandomForest::getForestPrediction(vector <string> &test, RandomForest *forest, v
for (int treeIdx = 0; treeIdx < forest->trees.size(); treeIdx++) {
PredictionReport *report = new PredictionReport();
DecisionTree *tree = forest->trees[treeIdx];
pair<string, double> pred_pair = tree->predictSingle(test, tree->root);
pair<string, double> pred_pair = tree->predictSingle(test, tree->root, report);
string prediction = pred_pair.first;
// cout << prediction << endl;
if (votes.count(prediction)) {
......@@ -161,12 +212,7 @@ RandomForest::getForestPrediction(vector <string> &test, RandomForest *forest, v
cout << "Because: ";
map<int, double> reports = explain(label, explanations);
map<int, double>::iterator
exp;
// for (exp = reports.begin(); exp != reports.end(); ++exp) {
// cout << "Feature no " << exp->first << " with info gain " << exp->second << endl;
// }
sort(reports, test, features);
......
......@@ -19,7 +19,8 @@ public:
accuracyReport getAccuracy(vector <vector<string>>& datasetAsString,vector <int> &testIdxs);
void printAccuracyReport(accuracyReport report);
void printAccuracyReportFile(accuracyReport report);
vector <string> getSoftVoting(vector <string> &test, RandomForest *forest, vector <string> &features);
string getSoftVoting(vector <string> &test, RandomForest *forest, vector <string> &features);
void predict(string voting, vector <string> &test, RandomForest *forest, vector <string> &features);
......
......@@ -28,8 +28,8 @@ vector <string> parseFeatures(string fileName) {
int main(int argc, char *argv[]) {
if ((argc <= 2) || (argc >= 5)) {
cout << "Given " << to_string(argc) <<" args. Need 2 or 3." << endl;
cout << "race [numTrees] [depth] [dataset (no suffix, optional)]" << endl;
cout << "Given " << to_string(argc) << " args. Need 2 or 3." << endl;
cout << "race [numTrees] [depth] [dataset (no suffix, optional)]" << endl;
exit(1);
}
......@@ -44,28 +44,27 @@ int main(int argc, char *argv[]) {
vector <vector<string>> datasetAsString;
vector <FeatureType> featureTypes;
vector <string> features;
if (argc == 4) {
cout << "Dataset: " << argv[3] << endl;
datasetAsString = parseDataToString( (string)argv[3] + ".data");
featureTypes = parseFeatureTypes( (string)argv[3] + ".featureTypes");
features = parseFeatures( (string)argv[3] + ".features");
}
else {
cout << "WARNING: No dataset provided as an argument!" << endl;
datasetAsString = parseDataToString("../datasets/adult.data");
featureTypes = parseFeatureTypes("../datasets/adult.featureTypes");
features = parseFeatures("../datasets/adult.features");
}
if (argc == 4) {
cout << "Dataset: " << argv[3] << endl;
datasetAsString = parseDataToString((string) argv[3] + ".data");
featureTypes = parseFeatureTypes((string) argv[3] + ".featureTypes");
features = parseFeatures((string) argv[3] + ".features");
} else {
cout << "WARNING: No dataset provided as an argument!" << endl;
datasetAsString = parseDataToString("../datasets/adult.data");
featureTypes = parseFeatureTypes("../datasets/adult.featureTypes");
features = parseFeatures("../datasets/adult.features");
}
//pick number of features to select for random sub-spacing
float featureWeight = sqrt(features.size())/features.size();
float featureWeight = sqrt(features.size()) / features.size();
double accuracy = 0.0;
double time = 0.0;
for (int x = 0; x < 3; x++) {
for (int x = 0; x < 1; x++) {
vector<int> trainingIdxs = randomSelect_WithoutReplacement(datasetAsString.at(0).size(), 0.7);
//vector <vector<string>> trainingData;
vector <int> testingIdxs = splitTrainingAndTesting(trainingIdxs, datasetAsString);
vector<int> testingIdxs = splitTrainingAndTesting(trainingIdxs, datasetAsString);
cout << "Over sampling training data " << endl;
......@@ -91,11 +90,11 @@ int main(int argc, char *argv[]) {
cout << endl;
// cout << "********************* Forest accuracy *****************" << endl;
// accuracyReport report = randomForest->getAccuracy(datasetAsString,testingIdxs);
//
// accuracy += report.accuracy;
// randomForest->printAccuracyReportFile(report);
cout << "********************* Forest accuracy *****************" << endl;
accuracyReport report = randomForest->getAccuracy(datasetAsString, testingIdxs);
accuracy += report.accuracy;
randomForest->printAccuracyReportFile(report);
cout << "**************** prediction with explanation ********** " << endl;
......@@ -107,10 +106,15 @@ int main(int argc, char *argv[]) {
cout << endl;
randomForest->getForestPrediction(testData, randomForest, features);
for (int i = 0; i<randomForest->trees.size(); i++){
// randomForest->getForestPrediction(testData, randomForest, features);
// randomForest->getSoftVoting(testData, randomForest, features);
randomForest->predict("HARD",testData, randomForest, features);
for (int i = 0; i < randomForest->trees.size(); i++) {
cleanTree(randomForest->trees[i]->root);
delete randomForest->trees[i];
}
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment