“Удалить выбросы Python Pandas” Ответ

Удалить выбросы Python Pandas

#------------------------------------------------------------------------------
# accept a dataframe, remove outliers, return cleaned data in a new dataframe
# see http://www.itl.nist.gov/div898/handbook/prc/section1/prc16.htm
#------------------------------------------------------------------------------
def remove_outlier(df_in, col_name):
    q1 = df_in[col_name].quantile(0.25)
    q3 = df_in[col_name].quantile(0.75)
    iqr = q3-q1 #Interquartile range
    fence_low  = q1-1.5*iqr
    fence_high = q3+1.5*iqr
    df_out = df_in.loc[(df_in[col_name] > fence_low) & (df_in[col_name] < fence_high)]
    return df_out
Handsome Hawk

Удалить выбросы в пандах

cols = ['col_1', 'col_2'] # one or more

Q1 = df[cols].quantile(0.25)
Q3 = df[cols].quantile(0.75)
IQR = Q3 - Q1

df = df[~((df[cols] < (Q1 - 1.5 * IQR)) |(df[cols] > (Q3 + 1.5 * IQR))).any(axis=1)]
Nice Nightingale

Удалить выбросы в DataFrame

# Solution is based on this article: 
# http://www.itl.nist.gov/div898/handbook/prc/section1/prc16.htm

import pandas as pd
import numpy as np

def remove_outliers_from_series(series):
    q1 = series.quantile(0.25)
    q3 = series.quantile(0.75)
    intraquartile_range = q3 - q1
    fence_low  = q1 - 1.5 * intraquartile_range
    fence_high = q3 + 1.5 * intraquartile_range
    return series[(series > fence_low) & (series < fence_high)]


def remove_outliers_from_dataframe(self, df, col):
    q1 = df[col].quantile(0.25)
    q3 = df[col].quantile(0.75)
    intraquartile_range = q3 - q1
    fence_low  = q1 - 1.5 * intraquartile_range
    fence_high = q3 + 1.5 * intraquartile_range
    return df.loc[(df[col] > fence_low) & (df[col] < fence_high)]


def remove_outliers_from_np_array(self, arr):
    q1 = np.percentile(arr, 25)
    q3 = np.percentile(arr, 75)
    intraquartile_range = q3 - q1
    fence_low  = q1 - 1.5 * intraquartile_range
    fence_high = q3 + 1.5 * intraquartile_range
    return arr[(arr > fence_low) & (arr < fence_high)]


def remove_outliers_from_python_list(self, _list):
    arr = np.array(_list)
    return list(remove_outliers_from_np_array(arr))


def remove_outliers(*args, **kwargs):
        if isinstance(args[0], pd.DataFrame):
            return remove_outliers_from_dataframe(*args, **kwargs)
        elif isinstance(args[0], pd.Series):
            return remove_outliers_from_series(*args, **kwargs)
        elif isinstance(args[0], np.ndarray):
            return remove_outliers_from_np_array(*args, **kwargs)
        elif isinstance(args[0], list):
            return remove_outliers_from_python_list(*args, **kwargs)
        else:
            raise TypeError(f'{type(args[0])} is not supported.')
Wrong Whale

удалить выбросы Python DataFrame

cols = ['col_1', 'col_2'] # one or more

Q1 = df[cols].quantile(0.25)
Q3 = df[cols].quantile(0.75)
IQR = Q3 - Q1

df = df[~((df[cols] < (Q1 - 1.5 * IQR)) |(df[cols] > (Q3 + 1.5 * IQR))).any(axis=1)]
Bored Butterfly

Удалить выбросы Python Pandas

df = pd.DataFrame(np.random.randn(100, 3))

from scipy import stats
df[(np.abs(stats.zscore(df)) < 3).all(axis=1)]
Frantic Fox

Панды удаляют выбросы

df = pd.DataFrame(np.random.randn(100, 3))

from scipy import stats
df[(np.abs(stats.zscore(df)) < 3).all(axis=1)]
Real Raccoon

Ответы похожие на “Удалить выбросы Python Pandas”

Вопросы похожие на “Удалить выбросы Python Pandas”

Больше похожих ответов на “Удалить выбросы Python Pandas” по Python

Смотреть популярные ответы по языку

Смотреть другие языки программирования