You can use np.unique (..., return_inverse = True) to get representative ints for each row. For instance,
In [117]: uniques, X = np.unique(['foo', 'baz', 'bar', 'foo', 'baz', 'bar'], return_inverse=True) In [118]: X Out[118]: array([2, 1, 0, 2, 1, 0])
Note that X has an int32 int32 , since np.unique can handle no more than 2**31 unique lines.
import pandas as pd import numpy as np import matplotlib.pyplot as plt import mpl_toolkits.mplot3d.axes3d as axes3d N = 12 arr = np.arange(N*2).reshape(N,2) words = np.array(['foo', 'bar', 'baz', 'quux', 'corge']) df = pd.DataFrame(words[arr % 5], columns=list('XY')) df['Z'] = np.linspace(1, 1000, N) Z = np.log10(df['Z']) Xuniques, X = np.unique(df['X'], return_inverse=True) Yuniques, Y = np.unique(df['Y'], return_inverse=True) fig = plt.figure() ax = fig.add_subplot(1, 1, 1, projection='3d') ax.scatter(X, Y, Z, s=20, c='b') ax.set(xticks=range(len(Xuniques)), xticklabels=Xuniques, yticks=range(len(Yuniques)), yticklabels=Yuniques) plt.show()
