dlib . Scikit-learn SVM RBF, . GridSearchCV SVM C .
from sklearn import svm
from sklearn.metrics import confusion_matrix, accuracy_score
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from sklearn.model_selection import GridSearchCV
X_train, X_test, y_train, y_test = train_test_split(
reps, labels, test_size=0.3, random_state=0)
le = LabelEncoder().fit(labels)
tuned_parameters = [{'kernel': ['rbf'], 'gamma': [1, 10, 100],
'C': [0.1, 1, 10, 100, 1000]}
]
scores = ['precision', 'recall']
for score in scores:
_LOGGER.info("# Tuning hyper-parameters for %s" % score)
clf = GridSearchCV(svm.SVC(C=1), tuned_parameters, cv=5,
scoring='%s_macro' % score)
stime = time.time()
clf.fit(X_train, y_train)
etime = time.time() - stime
_LOGGER.info("Fitting took %s seconds" % time.strftime("%H:%M:%S", time.gmtime(etime)))
_LOGGER.info("Best parameters set found on development set:")
_LOGGER.info(clf.best_params_)
_LOGGER.info("Grid scores on development set:")
means = clf.cv_results_['mean_test_score']
stds = clf.cv_results_['std_test_score']
for mean, std, params in zip(means, stds, clf.cv_results_['params']):
_LOGGER.info("%0.3f (+/-%0.03f) for %r"
% (mean, std * 2, params))
_LOGGER.info("Detailed classification report:")
_LOGGER.info("The model is trained on the full development set.")
_LOGGER.info("The scores are computed on the full evaluation set.")
y_true, y_pred = y_test, clf.predict(X_test)
_LOGGER.info(classification_report(y_true, y_pred))
_LOGGER.info("Accuracy : %.2f" % (accuracy_score(y_true, y_pred) * 100.0))