Commit f93f8fae authored by sgebreeg's avatar sgebreeg
Browse files

average time and accuracy added

parent 63a05a77
......@@ -352,3 +352,27 @@ Lable <=50K was predicted right 4484 times
Lable >50K was predicted right 1883 times
Lable <=50K was predicted wrong 2221 times
Lable >50K was predicted wrong 335 times
---------- Report--------------
Testing accuracy for forest with 10 trees depth 10 and feature selection weight 0.3
Total tested data is 9092
The accuracy of the tree is 71.0955% with 6464 correct predictions and 2628 incorrect predictions
Lable <=50K was predicted right 4504 times
Lable >50K was predicted right 1960 times
Lable <=50K was predicted wrong 2265 times
Lable >50K was predicted wrong 363 times
---------- Report--------------
Testing accuracy for forest with 10 trees depth 10 and feature selection weight 0.3
Total tested data is 9110
The accuracy of the tree is 70.966% with 6465 correct predictions and 2645 incorrect predictions
Lable <=50K was predicted right 4539 times
Lable >50K was predicted right 1926 times
Lable <=50K was predicted wrong 2301 times
Lable >50K was predicted wrong 344 times
---------- Report--------------
Testing accuracy for forest with 10 trees depth 10 and feature selection weight 0.3
Total tested data is 9133
The accuracy of the tree is 72.6048% with 6631 correct predictions and 2502 incorrect predictions
Lable <=50K was predicted right 4739 times
Lable >50K was predicted right 1892 times
Lable <=50K was predicted wrong 2149 times
Lable >50K was predicted wrong 353 times
......@@ -3,21 +3,22 @@
#include "RandomForest.hpp"
#include "util.hpp"
using namespace std::chrono;
using namespace std;
// Parse CSV file of feature names
// fileName = name of the file
// Returns vector of strings, each a name of a feature
vector<string> parseFeatures(string fileName) {
vector <string> parseFeatures(string fileName) {
ifstream fIn;
fIn.open(fileName, ifstream::in);
string nextString, line, word;
vector<string> features;
vector <string> features;
// cout << "Features: " << flush;
while (fIn >> line) {
stringstream ss(line);
while(getline(ss,word,',')) {
word = (string) (word.find_first_of(" ") == 0 ? word.substr(1) : word);
while (getline(ss, word, ',')) {
word = (string)(word.find_first_of(" ") == 0 ? word.substr(1) : word);
features.push_back(word);
}
}
......@@ -26,87 +27,98 @@ vector<string> parseFeatures(string fileName) {
int main(int argc, char *argv[]) {
if(argc <=1){
if (argc <= 1) {
cout << "arguments needed\n";
exit(1);
}
int numTrees = atoi(argv[1]);
int depth = atoi(argv[2]);
float baggingWeight = atoi(argv[2]) * 0.1;
// double featWeight = numFeatures * 0.1;
// cout << featWeight << "\n";
// cout << featWeight << "\n";
vector <vector<string>> datasetAsString;
vector <FeatureType> featureTypes;
vector<string> features;
vector <string> features;
datasetAsString = parseDataToString("../datasets/adult.data");
featureTypes = parseFeatureTypes("../datasets/adult.featureTypes");
features = parseFeatures("../datasets/adult.features");
vector<int> trainingIdxs = randSelectIdxWithoutReplacement(datasetAsString.at(0).size(), 0.7);
vector <vector<string>> trainingData;
vector <vector<string>> testingData;
double accuracy = 0.0;
double time = 0.0;
for (int x = 0; x < 3; x++) {
vector<int> trainingIdxs = randSelectIdxWithoutReplacement(datasetAsString.at(0).size(), 0.7);
vector <vector<string>> trainingData;
vector <vector<string>> testingData;
splitTrainingAndTesting(trainingIdxs, datasetAsString, trainingData, testingData);
splitTrainingAndTesting(trainingIdxs, datasetAsString, trainingData, testingData);
cout<< "Over sampling training data "<<endl;;
cout << "Over sampling training data " << endl;;
vector<vector<string>>oversampledData = oversample(trainingData);
vector <vector<string>> oversampledData = oversample(trainingData);
// cout << "over sampled data size "<< oversampledData.at(0).size() <<endl;
for (int fIdx = 0; fIdx < trainingData.size(); ++fIdx) {
for (int oIdx = 0; oIdx < oversampledData.at(0).size(); ++oIdx) {
trainingData.at(fIdx).emplace_back(oversampledData.at(fIdx).at(oIdx));
}
}
for (int fIdx = 0; fIdx < trainingData.size(); ++fIdx) {
for (int oIdx = 0; oIdx < oversampledData.at(0).size(); ++oIdx) {
trainingData.at(fIdx).emplace_back(oversampledData.at(fIdx).at(oIdx));
}
}
// cout<< "training data size after oversample" << trainingData.at(0).size()<<endl;
vector <string> testData;
string emptystring;
for (int featIndex = 0; featIndex < testingData.size(); featIndex++) {
testData.push_back(emptystring);
}
// string data = testingData.at(1).at(0);
// cout << data << endl;
vector <string> testData;
string emptystring;
for (int featIndex = 0; featIndex < testingData.size(); featIndex++) {
testData.push_back(emptystring);
}
// string data = testingData.at(1).at(0);
// cout << data << endl;
// for (int featIndex = 0; featIndex < testingData.size(); featIndex++) {
// testData.at(featIndex) = testingData.at(featIndex).at(41);
// //cout<<testingData.at(featIndex).at(0)<<", ";
// }
//cout<<endl;
auto start = high_resolution_clock::now();
// for (int featIndex = 0; featIndex < testingData.size(); featIndex++) {
// testData.at(featIndex) = testingData.at(featIndex).at(41);
// //cout<<testingData.at(featIndex).at(0)<<", ";
// }
//cout<<endl;
RandomForest *randomForest = new RandomForest(trainingData, featureTypes, numTrees, baggingWeight, 0.3, 10);
RandomForest *randomForest = new RandomForest(trainingData, featureTypes, numTrees, 0.7, 0.3, depth);
time += (high_resolution_clock::now() - start).count() / 1000000000.0;
cout << endl;
cout << "********************* Forest accuracy *****************" << endl;
accuracyReport report = randomForest->getAccuracy(testingData);
cout << endl;
cout << "********************* Forest accuracy *****************" <<endl;
accuracyReport report = randomForest->getAccuracy(testingData);
accuracy += report.accuracy;
randomForest->printAccuracyReportFile(report);
randomForest -> printAccuracyReportFile(report);
cout << "**************** prediction with explanation ********** "<<endl;
cout << "**************** prediction with explanation ********** " << endl;
for (int featIndex = 0; featIndex < testingData.size(); featIndex++) {
testData.at(featIndex) = testingData.at(featIndex)[41];
cout<<testingData.at(featIndex).at(41)<<", ";
}
cout<<endl;
for (int featIndex = 0; featIndex < testingData.size(); featIndex++) {
testData.at(featIndex) = testingData.at(featIndex)[0];
cout << testingData.at(featIndex).at(0) << ", ";
}
cout << endl;
randomForest->getForestPrediction(testData, randomForest, features);
randomForest->getForestPrediction(testData, randomForest, features);
}
ofstream outfile;
outfile.open("avg.txt", ios::app);
outfile<< "------ Report ------ " <<endl;
// outfile<< numTrees<<"\t"<<depth<<"\t"<<featureWeight<<"\t"<<baggingWeight<<"\t"<<accuracy/3<<"\t"<<time/3<<endl;
outfile<< numTrees<<"\t"<<10<<"\t"<<0.3<<"\t"<<baggingWeight<<"\t"<<accuracy/3<<"\t"<<time/3<<endl;
outfile.close();
return 0;
......
#!/bin/bash
for i in {9..10}
for i in {10..100..10}
do
./race $i 10
# for j in {4..10}
# do
# ./race $i $j
# done
# ./race $i 10
for j in {3..9}
do
./race $i $j
done
done
\ No newline at end of file
......@@ -38,3 +38,67 @@
------ Training Tree 0 18.1893
------ Training Tree 0 2.8035
------ Training Tree 1 3.50854
------ Training Tree 0 8.82535
------ Training Tree 1 12.6669
------ Training Tree 2 16.4287
------ Training Tree 3 8.33822
------ Training Tree 4 12.1255
------ Training Tree 5 14.3656
------ Training Tree 6 14.3623
------ Training Tree 7 11.744
------ Training Tree 8 8.30169
------ Training Tree 9 17.8691
------ Training Tree 0 14.9384
------ Training Tree 1 13.8911
------ Training Tree 2 15.6833
------ Training Tree 3 12.2209
------ Training Tree 4 12.3242
------ Training Tree 5 13.5826
------ Training Tree 6 7.63938
------ Training Tree 7 9.56681
------ Training Tree 8 16.7578
------ Training Tree 9 13.3941
------ Training Tree 0 17.3212
------ Training Tree 1 14.8815
------ Training Tree 2 14.4884
------ Training Tree 3 10.9416
------ Training Tree 4 16.6424
------ Training Tree 5 19.9735
------ Training Tree 6 12.3658
------ Training Tree 7 18.7448
------ Training Tree 8 13.0621
------ Training Tree 9 6.87777
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment