I'm just trying to make a simple RandomForestRegressor example. But when testing accuracy, I get this error
/Users/noppanit/anaconda/lib/python2.7/site-packages/sklearn/metrics/classification.pyc
in precision_score (y_true, y_pred, normalize, sample_weight) 177 178 # Calculate the accuracy for each possible view â 179 y_type, y_true, y_pred = _check_targets (y_true, y_pred) 180 if y_type.startswith ('multilabel'): 181 = different count_nonzero (y_true - y_pred, axis = 1)
/Users/noppanit/anaconda/lib/python2.7/site-packages/sklearn/metrics/classification.pyc
in _check_targets (y_true, y_pred) 90 if (y_type is not in ["binary", "multiclass", "multilabel-pointer", 91 "multilabel-sequence"]): ---> 92 raise ValueError ("{0} is equal to ".format (y_type)) 93 94 is not supported if y_type in [" binary "," multiclass "]:
ValueError: continuous is not supported
This is sample data. I can not show real data.
target, func_1, func_2, func_2, ... func_200 float, float, float, float, ... float
Here is my code.
import pandas as pd import numpy as np from sklearn.preprocessing import Imputer from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor, ExtraTreesRegressor, GradientBoostingRegressor from sklearn.cross_validation import train_test_split from sklearn.metrics import accuracy_score from sklearn import tree train = pd.read_csv('data.txt', sep='\t') labels = train.target train.drop('target', axis=1, inplace=True) cat = ['cat'] train_cat = pd.get_dummies(train[cat]) train.drop(train[cat], axis=1, inplace=True) train = np.hstack((train, train_cat)) imp = Imputer(missing_values='NaN', strategy='mean', axis=0) imp.fit(train) train = imp.transform(train) x_train, x_test, y_train, y_test = train_test_split(train, labels.values, test_size = 0.2) clf = RandomForestRegressor(n_estimators=10) clf.fit(x_train, y_train) y_pred = clf.predict(x_test) accuracy_score(y_test, y_pred)