Outliers filtering in statistics.
In adiition and alternative to simple dataset cleaning technique described in Machine learning and Data science section, more advanced method can be used which uses Z-score to filter outliers.

The Z-score method can be applied for data which is normally-like distributed, for other types of data distribution modified Z-score method should be applied.
Creating and plotting the data:
import numpy as np
import matplotlib.pyplot as plt
from statsmodels import robust
import scipy.stats as stats
N = 40
data = np.random.randn(N)
data[data<-1] = data[data<-1]+2
data[data>2] = data[data>2]**2; # try to force a few outliers
data = data*200 + 50 # change the scale for comparison with z
# convert to z
dataZ = (data-np.mean(data)) / np.std(data)
#### specify the z-score threshold
zscorethresh = 3
# plot the data
fig,ax = plt.subplots(2,1,figsize=(8,6))
ax[0].plot(data,'k^',markerfacecolor='w',markersize=12)
ax[0].set_xticks([])
ax[0].set_xlabel('Data index')
ax[0].set_ylabel('Orig. scale')
# then plot the zscores
ax[1].plot(dataZ,'k^',markerfacecolor='w',markersize=12)
ax[1].plot([0,N],[zscorethresh,zscorethresh],'r--')
ax[1].set_xlabel('Data index')
ax[1].set_ylabel('Z distance')
plt.show()

Identifying the outliers:
outliers = np.where(abs(dataZ)>zscorethresh)[0]
# and cross those out
ax[0].plot(outliers,data[outliers],'x',color='r',markersize=20)
ax[1].plot(outliers,dataZ[outliers],'x',color='r',markersize=20)
fig

Iterative method:
Sometimes one round of cleaning dataset is not enough and it is possible to aaply Z-score method in several iterations. IMPORTANT !: choose the threshold wisely depending on data (usually between 3 and 2).
# choose the threshold
zscorethresh = 2
dataZ = (data-np.mean(data)) / np.std(data)
colorz = 'brkm'
numiters = 0 # iteration counter
while True:
# convert to z
datamean = np.nanmean(dataZ)
datastd = np.nanstd(dataZ)
dataZ = (dataZ-datamean) / datastd
# find data values to remove
toremove = dataZ>zscorethresh
# break out of while loop if no points to remove
if sum(toremove)==0:
break
else:
# otherwise, mark the outliers in the plot
plt.plot(np.where(toremove)[0],dataZ[toremove],'%sx'%colorz[numiters],markersize=12)
dataZ[toremove] = np.nan
# replot
plt.plot(dataZ,'k^',markersize=12,markerfacecolor=colorz[numiters],label='iteration %g'%numiters)
numiters = numiters + 1
plt.xticks([])
plt.ylabel('Z-score')
plt.xlabel('Data index')
plt.legend()
plt.show()
#### the data points to be removed
removeFromOriginal = np.where(np.isnan(dataZ))[0]
print(removeFromOriginal)

Modified Z for non-normal distributions:
For non-normal distribution data modified Z-score method should be applied.
# compute modified z
dataMed = np.median(data)
dataMAD = robust.mad(data)
dataMz = stats.norm.ppf(.75)*(data-dataMed) / dataMAD
# plot the data
fig,ax = plt.subplots(2,1,figsize=(8,6))
ax[0].plot(data,'k^',markerfacecolor='w',markersize=12)
ax[0].set_xticks([])
ax[0].set_xlabel('Data index')
ax[0].set_ylabel('Orig. scale')
# then plot the zscores
ax[1].plot(dataMz,'k^',markerfacecolor='w',markersize=12)
ax[1].plot([0,N],[zscorethresh,zscorethresh],'r--')
ax[1].set_xlabel('Data index')
ax[1].set_ylabel('Median dev. units (Mz)')
plt.show()
