Commit 077e532b authored by ahmedaj's avatar ahmedaj
Browse files

Random oversampling

parent f06c4721
......@@ -533,25 +533,27 @@ vector<int> oversample(vector <vector<string>> &data, vector<int> &indices) {
if (difference > 0) {
int lablesize = labelWithIdx[lable].size();
vector<int> idxs = labelWithIdx[lable];
if (difference < lablesize) {
toAdd.insert(toAdd.end(), idxs.begin(), idxs.begin() + (difference));
} else {
while (difference > 0) {
if (difference < lablesize) {
toAdd.insert(toAdd.end(), idxs.begin(), idxs.begin() + (difference));
difference = 0;
} else {
toAdd.insert(toAdd.end(), idxs.begin(), idxs.end());
difference -= lablesize;
while (difference > 0) {
if (difference < lablesize) {
vector<int> selection = randSelectIdxWithReplacement(idxs.size(), (double(difference)/double(lablesize)));
for (int selectionIdx:selection){
toAdd.push_back(idxs.at(selectionIdx));
}
difference = 0;
} else {
vector<int> selection = randSelectIdxWithReplacement(idxs.size(), 1);
for (int selectionIdx:selection){
toAdd.push_back(idxs.at(selectionIdx));
}
difference -= lablesize;
}
}
}
}
cout << "lables to add "<<toAdd.size()<<endl;
return toAdd;
......
......@@ -44,9 +44,9 @@ int main(int argc, char *argv[]) {
vector <vector<string>> datasetAsString;
vector <FeatureType> featureTypes;
vector <string> features;
datasetAsString = parseDataToString("../datasets/loan.data");
featureTypes = parseFeatureTypes("../datasets/loan.featureTypes");
features = parseFeatures("../datasets/loan.features");
datasetAsString = parseDataToString("../datasets/adult.data");
featureTypes = parseFeatureTypes("../datasets/adult.featureTypes");
features = parseFeatures("../datasets/adult.features");
double accuracy = 0.0;
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment