ValueError: scale < 0 during normalization by using gaussian distribution function

Question

我正在尝试读取我的文本文件并提取 3 个主要参数并将它们放在单独的列表中，并在分配高斯分布函数后对参数列表（温度、速度、加速度）应用归一化。为了获得好的结果，我将每个参数列表的正数和负数分开并应用高斯分布函数并选择负数的平均值作为实际最小值并选择正数的平均值作为实际最大值而不是直接找到最小值和最大值这些参数的主要列表中的值可能会重复几次，因为它们不在所需的置信区间内。问题是我遇到了我已经避免的 RunTimeWarning 错误，但我仍然有以下错误，我不知道如何解决它们包括 ValueError: scale <0 ，希望有人对使用高斯分布函数应用归一化的错误解决方案有好的想法感谢您的关注：

File "c:\Users\majm\.vscode\extensions\ms-python.python-2018.11.0\pythonFiles\experimental\ptvsd_launcher.py", line 45, in <module>
main(ptvsdArgs)
File "c:\Users\majm\.vscode\extensions\ms-python.python-2018.11.0\pythonFiles\experimental\ptvsd\ptvsd\__main__.py", line 265, in main
wait=args.wait)
File "c:\Users\majm\.vscode\extensions\ms-python.python-2018.11.0\pythonFiles\experimental\ptvsd\ptvsd\__main__.py", line 258, in handle_args
debug_main(addr, name, kind, *extra, **kwargs)
File "c:\Users\majm\.vscode\extensions\ms-python.python-2018.11.0\pythonFiles\experimental\ptvsd\ptvsd\_local.py", line 45, in debug_main
run_file(address, name, *extra, **kwargs)
File "c:\Users\majm\.vscode\extensions\ms-python.python-2018.11.0\pythonFiles\experimental\ptvsd\ptvsd\_local.py", line 79, in run_file
run(argv, addr, **kwargs)
File "c:\Users\majm\.vscode\extensions\ms-python.python-2018.11.0\pythonFiles\experimental\ptvsd\ptvsd\_local.py", line 140, in _run
_pydevd.main()
File "c:\Users\majm\.vscode\extensions\ms-python.python-2018.11.0\pythonFiles\experimental\ptvsd\ptvsd\_vendored\pydevd\pydevd.py", line 1925, in main
debugger.connect(host, port)
File "c:\Users\majm\.vscode\extensions\ms-python.python-2018.11.0\pythonFiles\experimental\ptvsd\ptvsd\_vendored\pydevd\pydevd.py", line 1283, in run
return self._exec(is_module, entry_point_fn, module_name, file, globals, locals)
File "c:\Users\majm\.vscode\extensions\ms-python.python-2018.11.0\pythonFiles\experimental\ptvsd\ptvsd\_vendored\pydevd\pydevd.py", line 1290, in _exec
pydev_imports.execfile(file, globals, locals)  # execute the script
File "c:\Users\majm\.vscode\extensions\ms-python.python-2018.11.0\pythonFiles\experimental\ptvsd\ptvsd\_vendored\pydevd\_pydev_imps\_pydev_execfile.py", line 25, in execfile
exec(compile(contents+"\n", file, 'exec'), glob, loc)
File "p:\Desktop\correctt\news.py", line 142, in <module>
plotgaussianfunction(t_p_mean, t_sigma_Positive)
File "p:\Desktop\correctt\news.py", line 58, in plotgaussianfunction
s = np.random.normal(mu, sigma,1000)
File "mtrand.pyx", line 1656, in mtrand.RandomState.normal
ValueError: scale < 0

所以我的代码是：

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import scipy
import warnings
warnings.filterwarnings("ignore",category =RuntimeWarning)

df = pd.read_csv('D:/me.txt', header=None)
id_set = df[df.index % 4 == 0].astype('int').values
speed = df[df.index % 4 == 1].values
acceleration = df[df.index % 4 == 2].values
temperature = df[df.index % 4 == 3].values

m_data={'p_Speed': s_p_results[:,0],'n_Speed': s_n_results[:,0], 'p_Acceleration': a_p_results[:,0],'n_Acceleration': a_n_results[:,0], 'p_Temperature': t_p_results[:,0],'n_Temperature': t_n_results[:,0]}
m_main_data = pd.DataFrame(data, columns=['Speed','Acceleration','Temperature'], index = id_set[:,0])

data = {'Speed': speed[:,0], 'Acceleration': acceleration[:,0], 'Temperature': temperature[:,0]}
main_data = pd.DataFrame(data, columns=['Speed','Acceleration','Temperature'], index = id_set[:,0])
main_data = main_data.replace([np.inf, -np.inf], np.nan)

def normalize(value, min_value, max_value, min_norm, max_norm):
new_value = ((max_norm - min_norm)*((value - min_value)/(max_value - min_value))) + min_norm
return new_value

def createpositiveandnegativelist(listtocreate):
l_negative = []
l_positive = []
for value in listtocreate:
    if (value < 0):
        l_negative.append(value)
    elif (value > 0):
        l_positive.append(value)
#print(t_negative)
#print(t_positive)
return l_negative,l_positive

def calculatemean(listtocalculate):
return sum(listtocalculate)/len(listtocalculate)

def plotgaussianfunction(mu,sigma):
s = np.random.normal(mu, sigma,1000)
abs(mu - np.mean(s))<0.01
abs(sigma - np.std(s,ddof=1))<0.01
#count, bins, ignored = plt.hist(s,30,density=True)
#plt.plot(bins, 1/(sigma * np.sqrt(2 * np.pi)) * np.exp(-(bins-mu)**2/(2*sigma**2)),linewidth=2, color= 'r')
#plt.show()
return


def plotboundedCI(s, mu, sigma, lists):
plt.figure()

count, bins, ignored = plt.hist(s,30,density=True)
plt.plot(bins, 1/(sigma * np.sqrt(2 * np.pi)) * np.exp(-(bins-mu)**2/(2*sigma**2)),linewidth=2, color= 'r')
#confidential interval calculation
ci = scipy.stats.norm.interval(0.68, loc = mu, scale = sigma)
#confidence interval for left line
one_x12, one_y12 = [ci[0],ci[0]], [0,3]
#confidence interval for right line
two_x12, two_y12 = [ci[1],ci[1]], [0,3]

plt.title("Gaussian 68% Confidence Interval", fontsize=12, color='black', loc='left', style='italic')
plt.plot(one_x12, one_y12, two_x12, two_y12, marker = 'o')
plt.show()


results = []
for value in lists:
    if(ci[0]< value <ci[1]):
        results.append(value)
    else:
        #print("NOT WANTED: ",value)
        pass

return results


t_negative, t_positive = createpositiveandnegativelist(temperature)
a_negative, a_positive = createpositiveandnegativelist(acceleration)
s_negative, s_positive = createpositiveandnegativelist(speed)

#calculating the mean value
t_p_mean = calculatemean(t_positive)
a_p_mean = calculatemean(a_positive)
s_p_mean = calculatemean(s_positive)
t_n_mean = calculatemean(t_negative)
a_n_mean = calculatemean(a_negative)
s_n_mean = calculatemean(s_negative)

#calculating the sigma value
t_sigma_Negative = np.std(t_negative)
t_sigma_Positive = np.std(t_positive)
a_sigma_Negative = np.std(t_negative)
a_sigma_Positive = np.std(t_positive)
s_sigma_Negative = np.std(t_negative)
s_sigma_Positive = np.std(t_positive)

#plot the gaussian function with histograms
plotgaussianfunction(t_p_mean, t_sigma_Positive)
plotgaussianfunction(t_n_mean, t_sigma_Negative)
plotgaussianfunction(a_p_mean, a_sigma_Positive)
plotgaussianfunction(a_n_mean, a_sigma_Negative)
plotgaussianfunction(s_p_mean, s_sigma_Positive)
plotgaussianfunction(s_n_mean, s_sigma_Negative)

#normalization
t_p_s = np.random.normal(t_p_mean, t_sigma_Positive,1000)
t_n_s = np.random.normal(t_n_mean, t_sigma_Negative,1000)
a_p_s = np.random.normal(a_p_mean, a_sigma_Positive,1000)
a_n_s = np.random.normal(a_n_mean, a_sigma_Negative,1000)
s_p_s = np.random.normal(s_p_mean, s_sigma_Positive,1000)
s_n_s = np.random.normal(s_n_mean, s_sigma_Negative,1000)

#histograms minus the outliers
t_p_results = plotboundedCI(t_p_s, t_p_mean, t_sigma_Positive, t_positive)
t_n_results = plotboundedCI(t_n_s, t_n_mean, t_sigma_Negative, t_negative)
a_p_results = plotboundedCI(a_p_s, a_p_mean, a_sigma_Positive, a_positive)
a_n_results = plotboundedCI(a_n_s, a_n_mean, a_sigma_Negative, a_negative)
s_p_results = plotboundedCI(s_p_s, s_p_mean, s_sigma_Positive, s_positive)
s_n_results = plotboundedCI(s_n_s, s_n_mean, s_sigma_Negative, s_negative)

Note: I have some missing data(nan or inf) in my list of values which are already replaced by zero! but considering that when I have no missing values in my list of parameters , the code works!

Answer 1

来自 numpy.random.normal 的文档：

Parameters:

loc : float or array_like of floats

Mean (“centre”) of the distribution.

scale : float or array_like of floats Standard deviation (spread or “width”) of the distribution.

size : int or tuple of ints, optional Output shape. If the given shape is, e.g., (m, n, k), then m * n * k samples are drawn. If size is None (default), a single value is returned if loc and scale are both scalars. Otherwise, np.broadcast(loc, scale).size samples are drawn.

尺度是分布的标准差，因此不能为负。因此你会得到错误：ValueError: scale < 0

您可能需要检查此参数的符号。尝试一下：

s = np.random.normal(mu, np.abs(sigma),1000)

ValueError: scale < 0 during normalization by using gaussian distribution function

ValueError: scale < 0 during normalization by using gaussian distribution function

python

gaussian

normalization

valueerror