I am trying to get my function to output the outlier in the array "data." I have created a graph to show the outlier, however I want my function to spit out the actual value also. Basically I want the value '220' to be outputted in my code. How can I do this? What am I doing wrong with my code? I think something is off with my distance
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib import style
style.use('ggplot')
web_stats = {'Day': [1,2,3,4,5,6], 'Visitors': [43,53,34,45,64,34],
'Bounce_Rate': [65,72,62,64,54,220]}
df = pd.DataFrame(web_stats)
data = np.array(df['Bounce_Rate'])
def find_outlier(data, q1, q3):
lower = q1 - 1.5 * (q3 - q1)
upper = q3 + 1.5 * (q3 - q1)
return data <= lower or data >= upper
def find_indices(data):
q1 = np.percentile(data, 25)
q3 = np.percentile(data, 75)
indices_of_outliers = []
for ind, value in enumerate(data):
if find_outlier(value, q1, q3):
indices_of_outliers.append(ind)
return indices_of_outliers
dist=data
find_indices = find_indices(dist)
fig = plt.figure()
ax = fig.add_subplot(111) # 1x1 grid, first subplot
ax.plot(dist, 'b-', label='distances')
ax.plot(
find_indices,
data[find_indices],
'ro',
markersize = 7,
label='outliers')
ax.legend(loc='best')