I am experimenting with various classifiers implemented in the scikit-learn package to perform some NLP tasks. The code I use to perform the classification is as follows
def train_classifier(self, argcands):
train_argcands_feats = []
train_argcands_target = []
for argcand in argcands:
train_argcands_feats.append(self.extract_features(argcand))
train_argcands_target.append(argcand["info"]["label"])
self.feat_vectorizer = DictVectorizer()
train_argcands_feats = self.feat_vectorizer.fit_transform(train_argcands_feats)
self.target_names = list(set(train_argcands_target))
train_argcands_target = [self.target_names.index(target) for target in train_argcands_target]
self.classifier = LinearSVC()
self.classifier.fit(train_argcands_feats,train_argcands_target)
return
def execute(self, argcands_test):
test_argcands_feats = [self.extract_features(argcand) for argcand in argcands_test]
test_argcands_feats = self.feat_vectorizer.transform(test_argcands_feats)
test_argcands_targets = self.classifier.predict(test_argcands_feats)
test_argcands_labels = [self.target_names[int(label_index)] for label_index in test_argcands_targets]
return zip(argcands_test, test_argcands_labels)
, Machine Vectors Machine: LinearSVC SVC .
, "". LinearSVC : . , SVC, SAME.
, , SVC , . , SVC .
/, .
UPDATE
, , , , :
parameters = [{'C': [1, 10, 100, 1000], 'gamma': [0.001, 0.0001], 'kernel': ['poly'], 'degree': [2]}]
self.classifier = GridSearchCV(SVC(C=1), parameters, score_func = f1_score)
:
ValueError: The least populated class in y has only 1 members, which is too few. The minimum number of labels for any class cannot be less than k=3.
- , ? ?