第一和第三四分位数的计算

Question

我正在尝试计算适合直方图的对数正态分布的均值、标准差、中位数、第一四分位数和第三四分位数。到目前为止，我只能根据我在维基百科上找到的公式计算均值、标准差和中位数，但我不知道如何计算第一个四分位数和第三个四分位数。我如何根据对数正态分布在 Python 中计算第一个四分位数和第三个四分位数？

import matplotlib.pyplot as plt
import pandas as pd
from scipy.stats import lognorm
import matplotlib.ticker as tkr
import scipy, pylab
import locale
import matplotlib.gridspec as gridspec
#from scipy.stats import lognorm
locale.setlocale(locale.LC_NUMERIC, "de_DE")
plt.rcParams['axes.formatter.use_locale'] = True

from scipy.optimize import curve_fit

x=np.asarray([0.10, 0.20, 0.30, 0.40, 0.50, 0.60, 0.70, 0.80, 0.90, 1.00, 1.10, 1.20, 1.30, 1.40,
   1.50, 1.60, 1.70, 1.80, 1.90, 2.00, 2.10, 2.20, 2.30, 2.40, 2.50, 2.60, 2.70, 2.80,
   2.90, 3.00, 3.10, 3.20, 3.30, 3.40, 3.50, 3.60, 3.70, 3.80, 3.90, 4.00, 4.10, 4.20,
   4.30, 4.40, 4.50, 4.60, 4.70, 4.80, 4.90, 5.00, 5.10, 5.20, 5.30, 5.40, 5.50, 5.60,
   5.70, 5.80, 5.90, 6.00, 6.10, 6.20, 6.30, 6.40, 6.50, 6.60, 6.70, 6.80, 6.90, 7.00,
   7.10, 7.20, 7.30, 7.40, 7.50, 7.60, 7.70, 7.80, 7.90, 8.00], dtype=np.float64)

frequencia_relativa=np.asarray([0.000, 0.000, 0.038, 0.097, 0.091, 0.118, 0.070, 0.124, 0.097, 0.059, 0.059, 0.048, 0.054, 0.043,
                     0.032, 0.005, 0.027, 0.016, 0.005, 0.000, 0.005, 0.000, 0.005, 0.000, 0.000, 0.000, 0.000, 0.000,
                     0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000,
                     0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000,
                     0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000,
                     0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.005, 0.000, 0.000], dtype=np.float64)

plt.rcParams["figure.figsize"] = [18,8]
f, (ax,ax2) = plt.subplots(1,2, sharex=True, sharey=True, facecolor='w')

def fun(y, mu, sigma):
    return 1.0/(np.sqrt(2.0*np.pi)*sigma*y)*np.exp(-(np.log(y)-mu)**2/(2.0*sigma*sigma))

step = 0.1

xx = x-step*0.99

nrm = np.sum(frequencia_relativa*step) # normalization integral
print(nrm)

frequencia_relativa /= nrm # normalize frequences histogram

print(np.sum(frequencia_relativa*step)) # check normalizatio

params, extras = curve_fit(fun, xx, frequencia_relativa)

print(params)

axes = f.add_subplot(111, frameon=False)

ax.spines['top'].set_color('none')
ax2.spines['top'].set_color('none')
gs = gridspec.GridSpec(1,2,width_ratios=[3,1])
ax = plt.subplot(gs[0])
ax2 = plt.subplot(gs[1])
ax.axvspan(0.243, 1.481, label='Média $\pm$ desvio padrão', ymin=0.0, ymax=1.0, alpha=0.2, color='Plum') #lognormal distribution
ax.yaxis.tick_left()
ax.xaxis.tick_bottom()
ax2.xaxis.tick_bottom()
ax.tick_params(labeltop='off') # don't put tick labels at the top
ax2.yaxis.tick_right()
ax.bar(xx, height=frequencia_relativa, label='Frequência relativa normalizada do tamanho triangular', alpha=0.5, width=0.1, align='edge', edgecolor='black', hatch="///")
ax2.bar(xx, height=frequencia_relativa, alpha=0.5, width=0.1, align='edge', edgecolor='black', hatch="///")

xxx = np.linspace (0.001, 8, 1000)
ax.plot(xxx, fun(xxx, params[0], params[1]), "r-", label='Distribuição log-normal', linewidth=3)
ax2.plot(xxx, fun(xxx, params[0], params[1]), "r-", linewidth=3)

ax.tick_params(axis = 'both', which = 'major', labelsize = 18)
ax.tick_params(axis = 'both', which = 'minor', labelsize = 18)
ax2.tick_params(axis = 'both', which = 'major', labelsize = 18)
ax2.tick_params(axis = 'both', which = 'minor', labelsize = 18)
ax2.xaxis.set_ticks(np.arange(7.0, 8.5, 0.5))
ax2.xaxis.set_major_formatter(tkr.FormatStrFormatter('%0.1f'))

plt.subplots_adjust(wspace=0.04)
ax.set_xlim(0,2.5)
ax.set_ylim(0,1.4)
ax2.set_xlim(7.0,8.0)
def func(x, pos):  # formatter function takes tick label and tick position
    s = str(x)
    ind = s.index('.')
    return s[:ind] + ',' + s[ind+1:]   # change dot to comma
x_format = tkr.FuncFormatter(func)
ax.xaxis.set_major_formatter(x_format)
ax2.xaxis.set_major_formatter(x_format)
# hide the spines between ax and ax2
ax.spines['right'].set_visible(False)
ax2.spines['left'].set_visible(False)

d = .015 # how big to make the diagonal lines in axes coordinates
# arguments to pass plot, just so we don't keep repeating them
kwargs = dict(transform=ax.transAxes, color='k', clip_on=False)
ax.plot((1-d/3,1+d/3), (-d,+d), **kwargs)
ax.plot((1-d/3,1+d/3),(1-d,1+d), **kwargs)

kwargs.update(transform=ax2.transAxes)  # switch to the bottom axes
ax2.plot((-d,+d), (1-d,1+d), **kwargs)
ax2.plot((-d,+d), (-d,+d), **kwargs)
ax2.tick_params(labelright=False)
ax.tick_params(labeltop=False)
ax.tick_params(axis='x', which='major', pad=15)
ax2.tick_params(axis='x', which='major', pad=15)
ax2.set_yticks([])
f.text(0.5, -0.04, 'Tamanho lateral do triângulo ($\mu m$)', ha='center', fontsize=22)
f.text(-0.02, 0.5, 'Frequência relativa normalizada', va='center', rotation='vertical', fontsize=22)

ax.axvline(0.862, color='k', linestyle='-', linewidth=1.3) #lognormal distribution
ax.axvline(0.243, color='k', linestyle='--', linewidth=1)  #lognormal distribution
ax.axvline(1.481, color='k', linestyle='--', linewidth=1)  #lognormal distribution
f.legend(loc=9, 
          bbox_to_anchor=(.77,.99),
          labelspacing=1.5,
          numpoints=1,
          columnspacing=0.2,
          ncol=1, fontsize=18,
          frameon=False)

ax.text(0.86*0.63, 1.4*0.92, 'tamanho = (0,86 $\pm$ 0,62) $\mu m$', fontsize=20) #Excel
mu = params[0]
sigma = params[1]

# calculate mean value
print(np.exp(mu + sigma*sigma/2.0))

# calculate stddev
print(np.sqrt((np.exp(sigma*sigma)-1)*np.exp(mu+sigma*sigma/2.0)))

# calculate median value
print(np.exp(mu))

f.tight_layout()
#plt.show()
plt.savefig('output.png', dpi=500, bbox_inches='tight')

图表：

Answer 1

给定一个对数正态分布，我们要计算它的分位数。此外，对数正态分布的参数是根据数据估计的。

下面的脚本使用 OpenTURNS 创建使用 LogNormal class 的分布。它将基础正态分布的均值和标准差作为输入参数。然后我们可以使用computeQuantile()方法来计算分位数。

import openturns as ot

distribution = ot.LogNormal(-0.33217492, 0.6065058)
mean = distribution.getMean()
std = distribution.getStandardDeviation()
print("Mean=", mean, ", Std=", std)
q25 = distribution.computeQuantile(0.25)
q75 = distribution.computeQuantile(0.75)
print("Quantiles = ", q25, q75)

请注意，前面脚本中的常量值可以替换为任何估计值，例如使用基于拟合 PDF 和直方图的估计程序。

之前的脚本打印：

Mean= [0.862215] , Std= [0.574926]
Quantiles =  [0.476515] [1.07994]

我们可以用脚本绘制PDF：

import openturns.viewer as otv
graph = distribution.drawPDF()
otv.View(graph)

哪个地块：

Answer 2

欢迎回来

好的，对于 log-normal distribution，可以使用 wiki 页面中的表达式计算分位数

import numpy as np
from scipy import special

SQRT2 = 1.41421356237

def LNquantile(μ, σ, p):
    """
    Compute quantile function for log-normal distribution
    """
    nq = SQRT2 * special.erfinv(2.0*p - 1.0) # N(0,1) normal quantile
    t = μ +  σ * nq # N(μ, σ) quantile
    return np.exp(t) # LN(μ, σ) quantile

μ = 0.0
σ = 1.0

q1 = LNquantile(μ, σ, 0.25)
q3 = LNquantile(μ, σ, 0.75)

print(q1)
print(q3)

打印合理

0.5094162838640294
1.9630310841553595

您可以在 https://planetcalc.com/7263/

输入任何值并与在线计算器进行比较

第一和第三四分位数的计算

Calculation of the first and third quartile

python

statistics

histogram

probability-distribution