In [189]: df
Out[189]:
IDX1 IDX2 IDX3 1983 Q4 X Y Z 1984 Q1 X.1 Y.1 Z.1
0 A A1 Q 10 A F NaN 110 A F NaN
1 A A2 Q 20 B C 40 120 B C 240
2 A A3 Q 30 A F NaN 130 A F NaN
3 A A4 Q 40 B C 80 140 B C 280
4 A A5 Q 50 A F NaN 150 A F NaN
5 A A6 Q 60 B F 120 160 B F 320
['IDX1', 'IDX2', 'IDX3'] .
df = df.set_index(['IDX1', 'IDX2', 'IDX3'])
; 4
. " "
; - ,
4 . :
1983 Q4 1984 Q1
W X Y Z W X Y Z
IDX1 IDX2 IDX3
A A1 Q 10 A F NaN 110 A F NaN
A2 Q 20 B C 240 120 B C 240
A3 Q 30 A F NaN 130 A F NaN
A4 Q 40 B C 280 140 B C 280
A5 Q 50 A F NaN 150 A F NaN
A6 Q 60 B F 320 160 B F 320
, MultiIndex df.columns:
columns = [col for col in df.columns if col[0] not in set(list('XYZ'))]
df.columns = pd.MultiIndex.from_product([columns, list('WXYZ')])
DataFrame , df.stack
:
df.columns.names = ['IDX4', 'ValueType']
series = df.stack(['IDX4', 'ValueType'], dropna=False)
, mangle_dupe_cols=False X, Y,
Z, . , mangle_dupe_cols=False.
, mangle_dupe_cols=False, Z
Z .
, mangle_dupe_cols=True ( ,
) . , ,
df.columns MultiIndex .
:
import numpy as np
import pandas as pd
df = pd.read_table('data', sep=r'\s*[|]\s*')
df = df.set_index(['IDX1', 'IDX2', 'IDX3'])
columns = [col for col in df.columns if col[0] not in set(list('XYZ'))]
df.columns = pd.MultiIndex.from_product([columns, list('WXYZ')])
df.columns.names = ['IDX4', 'ValueType']
series = df.stack(['IDX4', 'ValueType'], dropna=False)
print(series.head())
IDX1 IDX2 IDX3 IDX4 ValueType
A A1 Q 1983 Q4 W 10
X A
Y F
Z NaN
1984 Q1 W 110
dtype: object
, , .
, DataFrame 6 , :
series.name = 'Value'
df = series.reset_index()
print(df.head())
IDX1 IDX2 IDX3 IDX4 ValueType Value
0 A A1 Q 1983 Q4 W 10
1 A A1 Q 1983 Q4 X A
2 A A1 Q 1983 Q4 Y F
3 A A1 Q 1983 Q4 Z NaN
4 A A1 Q 1984 Q1 W 110
...