groupby/cumcount :
df1['cumcount'] = df1.groupby('val1').cumcount()
df2:
df2['cumcount'] = df2.groupby('val1').cumcount()
df1 df2 (val1 cumcount) :
import numpy as np
import pandas as pd
df1 = pd.DataFrame({"val1" : ["B2","A1","B2","A1","B2","A1"]})
df2 = pd.DataFrame({"val1" : ["A1","A1","A1","B2","B2","B2"],
"val2" : [10, 13, 16, 11, 20, 22]})
df_final = pd.DataFrame({"val1" : ["B2","A1","B2","A1","B2","A1"],
"val2" : [11, 10, 20, 13, 22, 16]})
df1['cumcount'] = df1.groupby('val1').cumcount()
df2['cumcount'] = df2.groupby('val1').cumcount()
result = pd.merge(df1, df2, how='left')
result = result.drop('cumcount', axis=1)
print(result)
assert result.equals(df_final)
val1 val2
0 B2 11
1 A1 10
2 B2 20
3 A1 13
4 B2 22
5 A1 16
, how='left' , DataFrame, df1 , df1.