TL DR
n_components explained_variance_score X. , , 95% .
, .
PCA NMF
NMF PCA, , , :
:
- PCA NMF:
X' = dot(H, W), W . - . PCA :
H = dot(X, V), V . NMF H = argmin(loss(X, H, W)) ( H), loss - X dot(H, W), . , X. - . PCA : MSE ,
k - MSE, . NMF loss(X, H, W), , H, W.
/, :
- +
X_train - ,
X_train'=decode(encode(X_train)) X_train (, MAE, RMSE ) - ( ) , 2
X_test.
PCA NMF!
from sklearn import decomposition, datasets, model_selection, preprocessing, metrics
X, _ = datasets.load_iris(return_X_y=True)
X_train, X_test = model_selection.train_test_split(X, test_size=0.5, random_state=1)
scaler = preprocessing.StandardScaler(with_mean=False).fit(X_train)
X_train_sc = scaler.transform(X_train)
X_test_sc = scaler.transform(X_test)
pca = decomposition.PCA(n_components=2).fit(X_train_sc)
nmf = decomposition.NMF(n_components=2).fit(X_train_sc)
print(sum(pca.explained_variance_ratio_))
0.9536930834362043 - PCA, . - "" :
def get_score(model, data, scorer=metrics.explained_variance_score):
""" Estimate performance of the model on the data """
prediction = model.inverse_transform(model.transform(data))
return scorer(data, prediction)
print('train set performance')
print(get_score(pca, X_train_sc))
print(get_score(nmf, X_train_sc))
print('test set performance')
print(get_score(pca, X_test_sc))
print(get_score(nmf, X_test_sc))
train set performance
0.9536930834362043
0.937291711378812
test set performance
0.9597828443047842
0.9590555069007827
, PCA , NMF, . , NMF :
H W ( )H ( L1 L2)W ( L1 L2)
NMF , , , , .
PCA , h_1, h_2, ... h_k . h_(k+1), k . , , . PCA explained_variance_ratio_ .
NMF , , . , k+1 th, k , ( ).
NMF :
ks = [1,2,3,4]
perfs_train = []
perfs_test = []
for k in ks:
nmf = decomposition.NMF(n_components=k).fit(X_train_sc)
perfs_train.append(get_score(nmf, X_train_sc))
perfs_test.append(get_score(nmf, X_test_sc))
print(perfs_train)
print(perfs_test)
[0.3236945680665101, 0.937291711378812, 0.995459457205891, 0.9974027602663655]
[0.26186701106012833, 0.9590555069007827, 0.9941424954209546, 0.9968456603914185]
, 95% ( ) ( ). , : , .