I get the error "Array contains NaN or infinity." I checked my data both for the train / test for the absence of values, and for the absence of errors.
Perhaps I have a misinterpretation of what “array containing NaN or infinity” means.
import numpy as np from sklearn import linear_model from numpy import genfromtxt, savetxt def main(): #create the training & test sets, skipping the header row with [1:] dataset = genfromtxt(open('C:\\Users\\Owner\\training.csv','r'), delimiter=',')[0:50] target = [x[0] for x in dataset] train = [x[1:50] for x in dataset] test = genfromtxt(open('C:\\Users\\Owner\\test.csv','r'), delimiter=',')[0:50] #create and train the SGD sgd = linear_model.SGDClassifier() sgd.fit(train, target) predictions = [x[1] for x in sgd.predict(test)] savetxt('C:\\Users\\Owner\\Desktop\\preds.csv', predictions, delimiter=',', fmt='%f') if __name__=="__main__": main()
I thought that the data type could call an algorithm for the loop (they are floating points).
I know that SGD can handle floating points, so I'm not sure if this setting requires a data type to be declared.
For example, one of the following:
>>> dt = np.dtype('i4') # 32-bit signed integer >>> dt = np.dtype('f8') # 64-bit floating-point number >>> dt = np.dtype('c16') # 128-bit complex floating-point number >>> dt = np.dtype('a25') # 25-character string
The following is a complete error message:
--------------------------------------------------------------------------- ValueError Traceback (most recent call last) <ipython-input-62-af5537e7802b> in <module>() 19 20 if __name__=="__main__": ---> 21 main() <ipython-input-62-af5537e7802b> in main() 13 #create and train the SGD 14 sgd = linear_model.SGDClassifier() ---> 15 sgd.fit(train, target) 16 predictions = [x[1] for x in sgd.predict(test)] 17 C:\Anaconda\lib\site-packages\sklearn\linear_model\stochastic_gradient.pyc in fi t(self, X, y, coef_init, intercept_init, class_weight, sample_weight) 518 coef_init=coef_init, intercept_init=intercept_i nit, 519 class_weight=class_weight, --> 520 sample_weight=sample_weight) 521 522 C:\Anaconda\lib\site-packages\sklearn\linear_model\stochastic_gradient.pyc in _f it(self, X, y, alpha, C, loss, learning_rate, coef_init, intercept_init, class_w eight, sample_weight) 397 self.class_weight = class_weight 398 --> 399 X = atleast2d_or_csr(X, dtype=np.float64, order="C") 400 n_samples, n_features = X.shape 401 C:\Anaconda\lib\site-packages\sklearn\utils\validation.pyc in atleast2d_or_csr(X , dtype, order, copy) 114 """ 115 return _atleast2d_or_sparse(X, dtype, order, copy, sparse.csr_matrix , --> 116 "tocsr") 117 118 C:\Anaconda\lib\site-packages\sklearn\utils\validation.pyc in _atleast2d_or_spar se(X, dtype, order, copy, sparse_class, convmethod) 94 _assert_all_finite(X.data) 95 else: ---> 96 X = array2d(X, dtype=dtype, order=order, copy=copy) 97 _assert_all_finite(X) 98 return X C:\Anaconda\lib\site-packages\sklearn\utils\validation.pyc in array2d(X, dtype, order, copy) 79 'is required. Use X.toarray() to convert to dens e.') 80 X_2d = np.asarray(np.atleast_2d(X), dtype=dtype, order=order) ---> 81 _assert_all_finite(X_2d) 82 if X is X_2d and copy: 83 X_2d = safe_copy(X_2d) C:\Anaconda\lib\site-packages\sklearn\utils\validation.pyc in _assert_all_finite (X) 16 if (X.dtype.char in np.typecodes['AllFloat'] and not np.isfinite(Xs um()) 17 and not np.isfinite(X).all()): ---> 18 raise ValueError("Array contains NaN or infinity.") 19 20 ValueError: Array contains NaN or infinity.
Any thoughts would be appreciated.