As a workaround, you can convert categorical columns to integer code values, and save the display of columns in categories in a dict. For instance,
def decat(df): """ Convert categorical columns to (integer) codes; return the categories in catmap """ catmap = dict() for col, dtype in df.dtypes.iteritems(): if com.is_categorical_dtype(dtype): c = df[col].cat catmap[col] = c.categories df[col] = c.codes return df, catmap In [304]: df Out[304]: ID value2 0 5 c 1 3 a 2 6 c 3 7 a 4 0 c 5 4 b 6 8 b 7 2 a 8 9 a 9 1 b In [305]: df, catmap = decat(df) In [306]: df Out[306]: ID value2 0 5 2 1 3 0 2 6 2 3 7 0 4 0 2 5 4 1 6 8 1 7 2 0 8 9 0 9 1 1 In [307]: catmap Out[307]: {'value2': Index([u'a', u'b', u'c'], dtype='object')}
Now you can concatenate as usual, since there is no problem combining columns with integers.
Later, you can re-compile categorical columns using the data in catmap :
def recat(df, catmap): """ Use catmap to reconstitute columns in df to categorical dtype """ for col, categories in catmap.iteritems(): df[col] = pd.Categorical(categories[df[col]]) df[col].cat.categories = categories return df
import numpy as np import pandas as pd import pandas.core.common as com df1 = pd.DataFrame( {'ID': np.array([5, 3, 6, 7, 0, 4, 8, 2, 9, 1, 6, 5, 4, 9, 7, 2, 1, 8, 3, 0], dtype='int32'), 'value1': pd.Categorical(np.random.randint(0, 2, 20))}) df2 = pd.DataFrame( {'ID': np.array([5, 3, 6, 7, 0, 4, 8, 2, 9, 1], dtype='int32'), 'value2': pd.Categorical(['c', 'a', 'c', 'a', 'c', 'b', 'b', 'a', 'a', 'b'])}) def decat(df): """ Convert categorical columns to (integer) codes; return the categories in catmap """ catmap = dict() for col, dtype in df.dtypes.iteritems(): if com.is_categorical_dtype(dtype): c = df[col].cat catmap[col] = c.categories df[col] = c.codes return df, catmap def recat(df, catmap): """ Use catmap to reconstitute columns in df to categorical dtype """ for col, categories in catmap.iteritems(): df[col] = pd.Categorical(categories[df[col]]) df[col].cat.categories = categories return df def mergecat(left, right, *args, **kwargs): left, left_catmap = decat(left) right, right_catmap = decat(right) left_catmap.update(right_catmap) result = pd.merge(left, right, *args, **kwargs) return recat(result, left_catmap) result = mergecat(df1, df2, on='ID') result.info()
gives
<class 'pandas.core.frame.DataFrame'> Int64Index: 20 entries, 0 to 19 Data columns (total 3 columns): ID 20 non-null int32 value1 20 non-null category value2 20 non-null category dtypes: category(2), int32(1) memory usage: 320.0 bytes