Выберите строки в Pandas Multiindex DataFrame
col
one two
a t 0
u 1
v 2
w 3
df.loc[['a']] # TypeError: Expected tuple, got str
df.loc[('a', slice(None)), :] # No error
# Or use crosssection
df.xs('a', level=0, axis=0, drop_level=False)
# df.xs('a', drop_level=False)
#Here, the drop_level=False argument is needed to prevent xs
#from dropping level "one" in the result (the level we sliced on).
# Yet another option here is using query:
df.query("one == 'a'")
#If the index did not have a name, you would need to change your query
#string to be "ilevel_0 == 'a'".
#Finally, using get_level_values:
df[df.index.get_level_values('one') == 'a']
# If your levels are unnamed, or if you need to select by position (not label),
# df[df.index.get_level_values(0) == 'a']
## How do I slice all rows with value "t" on level "two"?
df.loc[(slice(None), 't'), :]
#OR
idx = pd.IndexSlice
df.loc[idx[:, 't'], :]
#OR
df.loc(axis=0)[pd.IndexSlice[:, 't']]
#OR
df.xs('t', axis=0, level=1, drop_level=False)
#OR
df.query("two == 't'")
# Or, if the first level has no name,
# df.query("ilevel_1 == 't'")
#And finally, with get_level_values, you may do
df[df.index.get_level_values('two') == 't']
# Or, to perform selection by position/integer,
# df[df.index.get_level_values(1) == 't']
##
#How can I select rows corresponding to items "b" and "d" in level "one"?
df.loc[['b', 'd']] #b and d are outer (level0) indexes.
#OR
items = ['b', 'd']
df.query("one in @items")
# df.query("one == @items", parser='pandas')
# df.query("one in ['b', 'd']")
# df.query("one == ['b', 'd']", parser='pandas')
#OR
df[df.index.get_level_values("one").isin(['b', 'd'])]
#Level 2 access
df.loc[pd.IndexSlice[:, ['t', 'w']], :]
#OR
items = ['t', 'w']
df.query("two in @items")
# df.query("two == @items", parser='pandas')
# df.query("two in ['t', 'w']")
# df.query("two == ['t', 'w']", parser='pandas')
#OR
df[df.index.get_level_values('two').isin(['t', 'w'])]
#
col
one two
c u 9
df.loc[('c', 'u'), :]
#OR
df.loc[pd.IndexSlice[('c', 'u')]] # Give -PerformanceWarning: indexing past lexsort depth may impact performance.
# Solution
df_sort = df.sort_index()
df_sort.loc[('c', 'u')]
#OR
df.xs(('c', 'u'))
#OR
df.query("one == 'c' and two == 'u'")
#OR
m1 = (df.index.get_level_values('one') == 'c')
m2 = (df.index.get_level_values('two') == 'u')
df[m1 & m2]
col
one two
c u 9
a w 3
df.loc[[('c', 'u'), ('a', 'w')]]
# df.loc[pd.IndexSlice[[('c', 'u'), ('a', 'w')]]]
#OR
df[df.index.droplevel(unused_level).isin([('c', 'u'), ('a', 'w')])]
col
one two
a t 0
u 1
v 2
w 3
b t 4
t 8
d t 12
#How can I retrieve all rows corresponding to "a" in level "one" or "t" in level "two"?
df.query("one == 'a' or two == 't'")
#OR
m1 = (df.index.get_level_values('one') == 'a')
m2 = (df.index.get_level_values('two') == 't')
df[m1 | m2]
col
one two
b 7 4
9 5
c 7 10
d 6 11
8 12
8 13
6 15
# How do I get all rows where values in level "two" are greater than 5?
df2.query("two > 5")
#OR
df2[df2.index.get_level_values('two') > 5]
##############################
index = pd.MultiIndex.from_product([['a','b'],
['stock1','stock2','stock3'],
['price','volume','velocity']])
df = pd.DataFrame([1,2,3,4,5,6,7,8,9,
10,11,12,13,14,15,16,17,18],
index)
df.xs(('stock1', 'velocity'), level=(1,2))
#OR
(
df.iloc[
df.index.isin(['stock1'], level=1) &
df.index.isin(['velocity'], level=2)
]
)
(
df.iloc[
df.index.isin(['stock1','stock3'], level=1) &
df.index.isin(['velocity'], level=2)
]
)
Worried Wren