from sklearn import cross_validation as cv from sklearn import neighbors as knn from sklearn import ensemble as en from sklearn import svm from sklearn import metrics as me import numpy as np import itertools # probably a simpler solution can be derived from the example in # http://scikit-learn.org/stable/auto_examples/grid_search_digits.html#example-grid-search-digits-py def eval_class(X,Y,cv_ext,classif,H): out=[] for TR,TE in cv_ext: bestacc=0.0 bestH=None for hi in H: acc=0.0 for TR2,TE2 in cv.StratifiedKFold(Y[TR],5): cl=train(classif,hi,X[TR2],Y[TR2]) acc+=test(cl,X[TE2],Y[TE2]) if acc>bestacc: bestacc=acc bestH=hi cl=train(classif,bestH,X[TR],Y[TR]) out.append((test(cl,X[TE],Y[TE]),bestH)) return out def train(classif,hiper,X,Y): if classif=="svmL": return svm.LinearSVC(C=hiper).fit(X,Y) elif classif=="knn": return knn.KNeighborsClassifier(hiper).fit(X,Y) elif classif=="svmRBF": return svm.SVC(kernel="rbf",C=hiper[0],gamma=hiper[1]).fit(X,Y) elif classif=="rf": return en.RandomForestClassifier(max_features = hiper).fit(X,Y) def test(cl,Xtest,Ytest): return me.accuracy_score(Ytest,cl.predict(Xtest)) with open("sonar.csv","rb") as f: x=[] y=[] f.readline() for l in f: z=l.strip().split(",") y+=z[-1][1:-1] z=z[:-1] x.append(map(float,z)) x=np.array(x) y=np.array(y) cv_ext=cv.StratifiedKFold(y,5) knnx=eval_class(x,y,cv_ext,"knn",[1,3,5,7,11,21]) print 'Acuracia media do knn =',np.mean(map(lambda x:x[0],knnx)) print 'Melhores k',map(lambda x:x[1],knnx) rg=10**np.array(range(-3,4),dtype=np.float) svml=eval_class(x,y,cv_ext,"svmL",rg) print 'Acuracia media do svm Linear =',np.mean(map(lambda x:x[0],svml)) print 'Melhores C',map(lambda x:x[1],svml) svmrbf=eval_class(x,y,cv_ext,"svmRBF",list(itertools.product(rg,rg))) print 'Acuracia media do svm rbf =',np.mean(map(lambda x:x[0],svmrbf)) print 'Melhores C e gamma',map(lambda x:x[1],svmrbf) rf=eval_class(x,y,cv_ext,"rf",[2,3,5,10,20,40,60]) print 'Acuracia media do rf =',np.mean(map(lambda x:x[0],rf)) print 'Melhores max_features',map(lambda x:x[1],rf)