Modified @Stephen Code
def make_3d(dataset): def make_3d_lines(a_df): a_df['depth'] = 0 # sets all depth from (1 to n) to 0 depth = 1 # initiate from 1, so that the first loop is correct prev = None accum = [] # accumulates blocks of data belonging to given user for row in a_df.values.tolist(): # for each row in our dataset row[0] = 0 # NOT SURE key = row[1] # this is the id of the row if key == prev: # if this rows id matches previous row id, append together depth += 1 accum.append(row) else: # else if this id is new, previous block is completed -> process it if depth == 0: # previous id appeared only once -> get that row from accum yield accum[0] # also remember that depth = 0 else: # process the block and emit each row depth = 0 to_emit = [] # prepare to emit the list for i in range(len(accum)): # for each unique day in the accumulated list date = accum[i][2] # define date to be the first date it sees for j, r in enumerate(accum[i:]): to_emit.append(list(r)) to_emit[-1][0] = j # define the depth to_emit[-1][2] = date # define the for r in to_emit[0:]: yield r accum = [row] prev = key df_data = dataset.reset_index() df_data.columns = ['depth'] + list(df_data.columns)[1:] new_df = pd.DataFrame( make_3d_lines(df_data.sort_values('id date'.split(), ascending=[True,False])), columns=df_data.columns ).astype(dtype=df_data.dtypes.to_dict()) return new_df.set_index('id date'.split())
Testing:
t = pd.DataFrame(data={'id':[1,1,1,1,2,2,3,3,4,5], 'date':[20180311,20180310,20180210,20170505,20180312,20180311,20180312,20180311,20170501,20180304], 'feature':[10,20,45,1,14,15,20,20,13,11],'result':[1,1,0,0,0,0,1,0,1,1]}) t = t.reindex(columns=['id','date','feature','result']) print t id date feature result 0 1 20180311 10 1 1 1 20180310 20 1 2 1 20180210 45 0 3 1 20170505 1 0 4 2 20180312 14 0 5 2 20180311 15 0 6 3 20180312 20 1 7 3 20180311 20 0 8 4 20170501 13 1 9 5 20180304 11 1
Exit
depth feature result id date 1 20180311 0 10 1 20180311 1 20 1 20180311 2 45 0 20180311 3 1 0 20180310 0 20 1 20180310 1 45 0 20180310 2 1 0 20180210 0 45 0 20180210 1 1 0 20170505 0 1 0 2 20180312 0 14 0 20180312 1 15 0 20180311 0 15 0 3 20180312 0 20 1 20180312 1 20 0 20180311 0 20 0 4 20170501 0 13 1