re>3) An indicator of whether they have bought before. * * */ public class Logistic_Regression_Example { /** * @param args This is the main method that runs the example */ public static void main(String[] args){ /* * In this example we will * Initialize the names of the predictors we are going to use, * Namely : Age, Income, whether they bought previously. * We will add also the constant. */ String [] Name_of_predictors = {"Constant","Age","Income","Bought_Previously"}; /* * Initialize a double Array for the predictors again * displaying Age, Income and a binary target that when it is 1=they bough before * and when it is 0= they haven't bough before. */ double predictors_array [][]= {{23,15000,0}, {24,16000,0},{25,17000,0},{26,18000,0},{27,19000,1},{28,20000,0}, {29,21000,0},{30,22000,1},{31,23000.34,0},{32,24000,0},{33,25000,1}, {34,26000,1},{35,27000,1},{36,28000,0},{37,29000.78,0},{38,30000,1}, {39,31000,1},{40,32000,0},{41,33000,1},{42,34000,0},{43,35000,1}, {44,36000,0},{45,37000,1},{46,38000.17,0},{47,39000,1},{48,40000,0}, {49,41000,1},{50,42000,0},{51,43000,1},{52,44000,0},{53,45000,1}, {54,46000,1},{55,47000,1},{56,48000,0},{57,49000,1},{58,50000,1}, {59,51000,0},{60,52000,1},{61,53000,1},{62,54340,0},{63,30034.78,1}, {64,56000,1},{65,57000,0},{66,58000,0},{67,59000,1},{68,60000,0}, {69,61000,1},{70,62000,1},{71,63000,1},{72,64000,0},{73,65000,0}, {74,66000,1},{75,67000,1},{76,68000,0},{77,69000,1},{78,70000,0}, {79,71000,1},{80,72000,1},{81,73000,0},{82,74000,1},{83,75000,0}, {84,76000,1},{85,77000,0},{86,78000,0},{87,79000,0},{88,80000,1}, {89,81000,1},{90,82000,1},{91,83000,0},{92,84000,1},{93,48000,0}, {94,49000,1},{95,50000,1},{96,51000,0},{97,52000,1},{98,53000,0}, {99,54000,1},{100,55000,1},{101,55000,1},{23,27000,0},{24,28000,0}, {25,29000,1},{26,30000,0},{27,31000,0},{28,32000,1},{29,33000,1}, {30,34000,0},{31,35000,0},{32,36000,0},{33,37000,1},{34,38000,1}, {35,39000,0},{36,40000,1},{37,38000,0},{38,39000,1},{39,40000,1}, {40,38000,0},{41,39000,0},{42,40000,1},{43,41000,1},{44,42000,1}, {45,43000,0}, }; /* * Initialize the Array that will hold the Target variable * as a double Array. */ double Binary_Target [] ={1,0,0,1,0,0,0,1,0,1,0,1,0,0,0,1,0,0,1,0,1,0,0,1, 0,1,0,0,1,0,1,0,1,0,0,0,0,1,1,0,1,0,1,1,0,1,0,1,0,1,0,1,0,1,1,1,0, 1,1,1,0,1,0,1,0,1,1,1,0,1,0,1,1,0,1,0,1,1,0,1,1,0,1,0,0,0,1,0,1,1, 1,0,0,1,1,0,1,1,0,1,1,0}; /* * We create A logistic Regression class */ Logistic_Regression Logit= new Logistic_Regression(); /* * We initialize a few values needed to run the algorithm * 1) A string that will state whether we want a constant or not on the model. * 2) The number of maximum iterations that Newton-Raphson optimization method will run * 3) The precision (how string the Algorithm should be) */ boolean Constant = true; int iterations=20; double precision=0.0001; /* * We run the main Logistic Regression method */ Logit.regression(predictors_array, Binary_Target, Constant, precision, iterations); /* * We get Logistic Regression's * 1) Beta * 2) Odds * 3)Wald statistics * 4)Wald P_Values */ double betas []=Logit.getbetas(); double odds []=Logit.get_odds(); double Wald []= Logit.getWald(); double Wald_p []= Logit.getWald_P_Values(); // print the results for (int i=0; i < betas.length; i++){ System.out.println(Name_of_predictors[i] + "'s coefficient: " + betas[i] + ", odds: " + odds[i] + ", Wald: " + Wald[i] + ", Wald_pvalues: " + Wald_p[i]); } /* * It will print : * Constant's coefficient: -0.9337787939195562, odds: 0.3930655866371482, Wald: 2.5000817152040433, Wald_pvalues: 0.11384039106872246 Age's coefficient: -0.0014064796317275884, odds: 0.39251313649161934, Wald: 0.007115937840872481, Wald_pvalues: 0.9327733748244205 Income's coefficient: 1.709941845014123E-5, odds: 0.3930723078875571, Wald: 0.6069920714302763, Wald_pvalues: 0.43592260139561945 Bought_Previously's coefficient: 0.4886981492878521, odds: 0.6407725990783535, Wald: 1.43349845690155, Wald_pvalues: 0.23119433599827843 Statistically non of the predictors is very strong (all wald_p values are above 0.05) to say with confidence that they are predictive. However previous buying experience tends to be more predictive than the others (higher odds and lower Wald P_value). People that have bough previously have higher chance in responding positively to the campaign. */ /* we also get: * 1) The predicted Values * 2) The residuals */ double predicted_probabilities[]=Logit.getprobabilities(); double residuals [] =Logit.getresiduals(); /* * Last we will compute some statistics that will tell us how good the model is: * 1)Maximum Likelihood * 2) AIC * 3) BIC * 4) Area under Roc and Gini coefficient */ double Maximum_Likelihood=Logit.getMAXIMUMlikelihood(); double AIC=Logit.getAIC(); double BIC=Logit.getBIC(); /* * Finally we create a class to compute the Area under the ROC curve and the Gini Coefficient */ Gini gini= new Gini(); /* * First we need to make the target Array a string */ String Binary_Target_String []= new String [Binary_Target.length]; for (int i=0; i < Binary_Target_String.length; i++) { Binary_Target_String[i]=Binary_Target[i] + ""; } // The gini might come out negative based on which class the the target it has chosen to regard as "good" gini.getgenie(predicted_probabilities, Binary_Target_String); System.out.println("Maximum_Likelihood : " + Maximum_Likelihood); System.out.println("AIC : " + AIC); System.out.println("BIC : " + BIC); System.out.println(" Gini :" + Math.abs(gini.getGini()) + "%"); /* * It will print: * Maximum_Likelihood : 137.59033578013265 AIC : 145.59033578013265 BIC : 156.09022703326974 Gini :19.999999999999996% * The model in this particular scenario is rather poor, but still better than choosing randomly since * the Gini coefficient is higher than 0%. */ /* * Finally we can attempt to score a new set by providing the * coefficients we just computed.Initialize a second array to be * scored with the beta are computed */ double tobe_Scored [][]= {{29,24000,0}, {24,16000,0},{25,17500,0},{26,18000,0},{27,19000,1},{28,20000,0}, {28,27000,0},{34,22600,1},{31,23000.34,0},{32,24000,0},{33,25000,1}, {34,26000,1},{35,27000,1},{92,56000,1},{103,103000.78,1},{38,30000,1},}; /* * Score the new set and get the probabilities as a double array */ double validation_probabilities []=Logit.score(tobe_Scored, betas, true); /* * Print the results */ for (int i=0; i < validation_probabilities.length; i++) { System.out.println("Validation probability " + i + " is " + validation_probabilities[i]); } /* * It will print: * Validation probability 0 is 0.36258068847655434 Validation probability 1 is 0.33315637253907837 Validation probability 2 is 0.33856380367842287 Validation probability 3 is 0.3401652867413001 Validation probability 4 is 0.46054056671313165 Validation probability 5 is 0.34724487083728073 Validation probability 6 is 0.37484759793563155 Validation probability 7 is 0.473411580855987 Validation probability 8 is 0.35799266664256846 Validation probability 9 is 0.3616060754784706 Validation probability 10 is 0.4840032085966262 Validation probability 11 is 0.4879233311646759 Validation probability 12 is 0.4918449398547857 Validation probability 13 is 0.5946183163360439 Validation probability 14 is 0.763380890266451 Validation probability 15 is 0.5036138577855921 */ /* * The same example will be used with discriminant analysis as well. */ } }