将分布拟合到直方图

Fitting a distribution to a histogram

我有一些使用 sns.distplot 绘制的数据 x轴是某个球员受伤的天数:

我想获取此发行版的 cdf。它不必非常准确,但我想 运行 一些模拟并且有效地我想要这个数据的分布,称之为 X。然后我想 运行

X.rvs(1000)

这会给我一个包含 1000 个随机数的数组,代表球员受伤的天数。例如如果它 returns

array(2,35,140,3,4,6,7,23,55,63,...,87)

那些应该代表球员受伤的天数。

真的不知道该怎么做,因为我要继续的是这个直方图背后的数据,它只是用 sns.distplot(data,kde=True)

绘制的

希望有人能帮忙

数据:

data = pd.DataFrame([78,
58,
124,
62,
30,
46,
31,
34,
94,
15,
41,
18,
63,
15,
63,
31,
35,
23,
19,
19,
47,
154,
113,
29,
35,
58,
62,
93,
93,
93,
37,
31,
16,
17,
16,
17,
62,
31,
145,
116,
183,
183,
183,
93,
148,
183,
13,
160,
183,
183,
68,
15,
183,
57,
91,
183,
86,
133,
20,
183,
89,
183,
43,
30,
183,
183,
136,
183,
183,
12,
183,
60,
161,
67,
183,
40,
121,
52,
58,
183,
183,
9,
151,
183,
183,
183,
116,
9,
95,
183,
27,
16,
183,
52,
167,
12,
183,
183,
94,
65,
183,
30,
183,
19,
14,
183,
54,
37,
183,
152,
33,
22,
67,
183,
40,
17,
183,
50,
7,
183,
106,
72,
183,
183,
22,
80,
183,
183,
58,
183,
183,
183,
183,
183,
15,
183,
183,
183,
183,
127,
156,
183,
26,
183,
183,
59,
9,
183,
183,
55,
183,
183,
183,
28,
18,
51,
18,
11,
18,
26,
77,
65,
61,
19,
61,
61,
61,
30,
30,
30,
182,
54,
182,
22,
121,
26,
64,
91,
91,
15,
18,
60,
17,
16,
60,
29,
15,
31,
181,
15,
16,
120,
24,
26,
30,
28,
90,
28,
90,
27,
25,
27,
32,
28,
28,
28,
28,
180,
150,
28,
47,
51,
60,
25,
43,
9,
16,
24,
89,
15,
13,
58,
106,
16,
59,
29,
19,
22,
16,
16,
51,
52,
33,
26,
178,
148,
148,
42,
72,
28,
86,
17,
17,
56,
18,
25,
17,
28,
41,
37,
15,
81,
25,
147,
147,
8,
36,
32,
18,
37,
42,
23,
86,
38,
36,
55,
24,
15,
60,
54,
41,
18,
15,
17,
31,
146,
115,
64,
115,
25,
32,
85,
85,
38,
15,
23,
175,
175,
175,
84,
145,
18,
22,
38,
35,
21,
22,
53,
43,
22,
21,
37,
15,
19,
49,
25,
52,
28,
21,
21,
15,
172,
11,
109,
20,
22,
32,
29,
168,
168,
15,
28,
166,
16,
31,
86,
165,
15,
25,
48,
163,
56,
15,
162,
38,
19,
17,
40,
95,
160,
56,
56,
27,
48,
158,
32,
157,
157,
157,
15,
27,
37,
16,
46,
141,
141,
15,
31,
19,
52,
43,
15,
51,
39,
74,
119,
23,
15,
134,
43,
17,
15,
54,
33,
79,
133,
133,
21,
72,
17,
118,
26,
24,
15,
106,
30,
15,
53,
16,
21,
127,
127,
21,
126,
126,
33,
112,
17,
93,
33,
21,
17,
43,
36,
15,
44,
53,
110,
17,
17,
109,
17,
47,
38,
12,
16,
33,
62,
16,
77,
78,
88,
52,
69,
24,
63,
104,
28,
104,
15,
101,
59,
42,
99,
68,
74,
41,
33,
97,
96,
96,
20,
29,
30,
58,
41,
15,
95,
15,
33,
34,
25,
78,
51,
77,
15,
74,
76,
27,
76,
76,
31,
20,
47,
75,
35,
15,
74,
60,
73,
72,
39,
45,
35,
39,
70,
70,
38,
44,
51,
15,
17,
68,
68,
37,
18,
15,
15,
66,
49,
20,
65,
64,
64,
24,
43,
42,
23,
19,
20,
50,
49,
20,
49,
18,
41,
45,
15,
47,
15,
20,
15,
47,
47,
46,
15,
15,
45,
15,
16,
7,
44,
13,
32,
21,
17,
17,
25,
16,
26,
31,
41,
40,
19,
18,
39,
38,
15,
15,
37,
17,
20,
35,
35,
17,
33,
20,
20,
20,
19,
18,
17,
17,
16,
15,
15,
11,
11,
10,
7,
6,
5,
41,
15,
33,
28,
59,
182,
28,
15,
20,
49,
161,
157,
22,
8,
56,
33,
182,
26,
54,
46,
23,
27,
153,
28,
28,
29,
21,
45,
30,
60,
6,
182,
17,
83,
16,
22,
120,
30,
17,
20,
17,
19,
15,
67,
20,
9,
172,
182,
182,
76,
88,
55,
161,
154,
182,
25,
66,
16,
18,
38,
15,
141,
182,
19,
10,
23,
27,
145,
179,
46,
67,
84,
18,
17,
50,
32,
46,
16,
4,
62,
29,
47,
33,
16,
20,
141,
77,
30,
47,
77,
15,
62,
88,
50,
19,
45,
142,
22,
42,
12,
33,
60,
17,
26,
7,
12,
182,
33,
182,
15,
18,
85,
182,
31,
75,
18,
98,
23,
37,
39,
104,
182,
30,
51,
149,
47,
172,
39,
21,
43,
26,
24,
25,
56,
27,
24,
158,
38,
26,
66,
40,
38,
63,
8,
48,
10,
131,
16,
20,
14,
38,
49,
58,
130,
39,
110,
136,
40,
67,
63,
30,
27,
41,
33,
174,
34,
15,
19,
102,
28,
22,
47,
10,
18,
28,
69,
37,
16,
31,
27,
28,
32,
42,
81,
38,
26,
24,
15,
15,
37,
14,
149,
7,
64,
133,
99,
17,
39,
18,
11,
40,
26,
34,
134,
76,
13,
162,
39,
34,
41,
47,
182,
15,
36,
47,
80,
15,
15,
32,
16,
41,
182,
49,
27,
46,
48,
16,
38,
40,
35,
76,
15,
17,
39,
107,
143,
182,
15,
84,
19,
24,
87,
79,
16,
41,
20,
42,
74,
23,
8,
73,
21,
16,
93,
23,
171,
53,
93,
49,
15,
26,
23,
34,
167,
18,
90,
38,
12,
13,
15,
6,
180,
18,
36,
22,
59,
61,
91,
35,
19,
65,
110,
41,
91,
77,
125,
33,
93,
34,
15,
24,
32,
35,
88,
31,
27,
28,
182,
18,
16,
29,
50,
46,
182,
120,
33,
7,
117,
15,
11,
13,
182,
31,
41,
112,
110,
17,
69,
27,
41,
43,
40,
44,
37,
33,
8,
57,
106,
20,
22,
115,
31,
102,
39,
17,
50,
182,
9,
16,
32,
182,
50,
38,
15,
16,
31,
109,
26,
159,
182,
38,
16,
103,
32,
40,
106,
22,
105,
90,
78,
16,
88,
18,
65,
90,
38,
47,
36,
88,
61,
64,
52,
19,
46,
42,
27,
20,
147,
41,
15,
29,
26,
16,
19,
182,
38,
86,
34,
15,
13,
66,
34,
122,
182,
43,
41,
73,
41,
89,
23,
30,
53,
182,
7,
36,
90,
30,
127,
90,
43,
105,
36,
19,
158,
28,
41,
20,
29,
20,
150,
27,
23,
116,
67,
38,
20,
53,
36,
15,
15,
61,
91,
69,
48,
143,
15,
16,
20,
52,
17,
51,
86,
182,
40,
24,
111,
182,
56,
18,
40,
15,
63,
24,
34,
33,
35,
57,
15,
40,
50,
12,
17,
16,
182,
118,
23,
36,
98,
22,
156,
27,
124,
15,
61,
38,
40,
51,
18,
50,
43,
129,
182,
18,
91,
15,
30,
182,
31,
63,
31,
94,
31,
82,
34,
66,
42,
36,
42,
7,
20,
25,
26,
182,
58,
15,
115,
182,
15,
15,
87,
15,
93,
25,
66,
18,
16,
160,
91,
39,
47,
17,
54,
91,
20,
40,
40,
33,
105,
26,
28,
52,
56,
11,
52,
182,
23,
100,
15,
56,
9,
24,
145,
174,
55,
13,
39,
23,
9,
16,
182,
60,
81,
19,
182,
15,
98,
67,
7,
39,
15,
40,
182,
16,
9,
31,
8,
16,
29,
55,
53,
123,
43,
50,
28,
23,
18,
80,
15,
16,
35,
15,
98,
15,
36,
63,
23,
25,
20,
15,
63,
92,
34,
40,
152,
13,
51,
60,
36,
17,
145,
39,
24,
46,
9,
178,
21,
7,
26,
182,
22,
19,
182,
43,
71,
32,
15,
141,
50,
6,
15,
182,
11,
15,
74,
182,
19,
30,
30,
18,
25,
17,
15,
182,
38,
19,
15,
17,
77,
40,
92,
83,
16,
21,
142,
135,
19,
13,
53,
159,
39,
101,
34,
47,
17,
128,
36,
70,
74,
99,
11,
128,
48,
100,
15,
182,
28,
22,
182,
59,
12,
25,
36,
81,
21,
16,
15,
27,
57,
7,
93,
51,
37,
31,
17,
75,
41,
77,
182,
32,
24,
17,
54,
29,
23,
55,
40,
48,
15,
118,
150,
14,
65,
138,
27,
30,
46,
182,
15,
15,
98,
150,
182,
7,
182,
152,
24,
31,
154,
20,
18,
182,
7,
19,
33,
168,
29,
27,
41,
36,
24,
24,
24,
32,
33,
182,
15,
178,
55,
20,
35,
182,
85,
4,
44,
36,
15,
28,
159,
15,
16,
24,
15,
75,
76,
54,
43,
63,
59,
35,
22,
84,
32,
11,
17,
7,
30,
35,
18,
29,
182,
62,
37,
48,
31,
58,
38,
32,
19,
110,
14,
47,
20,
26,
15,
25,
34,
40,
43,
27,
27,
8,
26,
15,
182,
20,
58,
182,
7,
20,
20,
76,
32,
50,
174,
182,
113,
82,
15,
15,
57,
122,
5,
31,
32,
50,
15,
26,
15,
81,
15,
16,
6,
32,
39,
16,
162,
15,
94,
75,
182,
36,
21,
68,
33,
53,
182,
181,
80,
163,
115,
84,
150,
65,
15,
28,
70,
141,
39,
39,
40,
18,
99,
15,
93,
13,
56,
182,
162,
37,
66,
163,
15,
36,
43,
64,
15,
24,
15,
15,
21,
15,
36,
36,
23,
23,
151,
15,
18,
182,
39,
15,
34,
185,
40,
25,
182,
15,
15,
41,
18,
18,
56,
37,
32,
26,
36,
23,
17,
141,
21,
34,
18,
21,
45,
65,
98,
11,
21,
119,
34,
53,
59,
16,
15,
48,
110,
15,
33,
9,
102,
66,
60,
15,
64,
26,
59,
56,
31,
108,
17,
7,
71,
22,
19,
23,
41,
33,
16,
50,
74,
40,
15,
40,
114,
80,
71,
29,
19,
36,
15,
21,
24,
182,
19,
28,
60,
24,
56,
37,
25,
85,
78,
36,
15,
71,
54,
182,
155,
141,
2,
49,
15,
23,
131,
15,
66,
15,
22,
15,
66,
59,
51,
15,
64,
21,
182,
19,
20,
36,
55,
51,
44,
39,
16,
47,
41,
98,
127,
125,
24,
32,
182,
20,
104,
20,
48,
16,
12,
57,
55,
13,
32,
15,
52,
25,
15,
119,
18,
15,
15,
182,
53,
66,
24,
15,
172,
15,
120,
16,
45,
15,
32,
8,
22,
132,
31,
69,
13,
89,
40,
63,
53,
36,
96,
156,
39,
59,
24,
40,
24,
118,
109,
29,
21,
119,
120,
39,
36,
175,
15,
134,
26,
15,
15,
29,
26,
182,
36,
17,
71,
40,
31,
104,
43,
66,
45,
30,
37,
26,
74,
182,
35,
32,
15,
64,
152,
182,
52,
28,
182,
16,
76,
182,
8,
35,
134,
36,
15,
182,
126,
15,
15,
182,
15,
17,
15,
182,
35,
16,
39,
33,
125,
27,
29,
51,
52,
125,
63,
161,
24,
75,
52,
9,
109,
70,
100,
65,
48,
13,
182,
16,
6,
28,
16,
52,
23,
15,
111,
28,
47,
61,
61,
15,
20,
33,
166,
10,
20,
19,
31,
29,
20,
68,
138,
22,
56,
26,
15,
182,
17,
169,
33,
28,
49,
24,
20,
33,
38,
94,
15,
43,
16,
88,
15,
53,
18,
18,
50,
15,
15,
80,
53,
69,
34,
22,
105,
131,
31,
32,
32,
182,
12,
147,
157,
15,
24,
15,
30,
18,
16,
50,
63,
60,
70,
89,
19,
18,
38,
38,
167,
15,
125,
182,
178,
78,
84,
33,
41,
95,
44,
40,
17,
182,
64,
70,
68,
133,
79,
10,
60,
48,
29,
160,
20,
117,
42,
50,
128,
182,
51,
61,
35,
45,
33,
37,
35,
25,
20,
27,
15,
35,
182,
15,
15,
32,
87,
28,
19,
67,
15,
15,
43,
49,
15,
86,
6,
38,
5,
17,
77,
51,
15,
57,
30,
41,
20,
37,
75,
58,
149,
111,
51,
60,
64,
17,
99,
22,
182,
18,
15,
34,
71,
32,
182,
182,
50,
71,
17,
84,
35,
77,
29,
32,
31,
30,
13,
35,
105,
36,
60,
45,
62,
15,
88,
101,
15,
111,
85,
28,
23,
74,
61,
41,
129,
55,
42,
23,
182,
26,
16,
39,
33,
63,
16,
16,
118,
19,
34,
115,
15,
46,
20,
20,
182,
27,
28,
35,
98,
62,
56,
42,
175,
40,
86,
15,
154,
57,
15,
62,
30,
101,
58,
47,
15,
182,
182,
30,
17,
36,
82,
78,
21,
84,
24,
93,
15,
36,
19,
62,
12,
58,
48,
145,
16,
20,
20,
182,
18,
21,
10,
17,
58,
76,
25,
79,
36,
15,
54,
38,
30,
182,
30,
20,
131,
118,
18,
15,
29,
52,
15,
45,
22,
182,
24,
15,
15,
37,
33,
27,
60,
15,
27,
24,
8,
12,
15,
85,
21,
182,
37,
27,
49,
26,
7,
16,
14,
19,
33,
30,
4,
28,
60,
15,
162,
40,
39,
14,
22,
7,
47,
121,
40,
39,
15,
8,
30,
13,
150,
16,
7,
78,
30,
41,
26,
21,
65,
155,
54,
44,
85,
54,
72,
19,
21,
26,
127,
17,
4,
75,
23,
69,
44,
71,
102,
23,
21,
17,
61,
155,
27,
38,
27,
30,
107,
33,
24,
103,
61,
98,
6,
60,
22,
51,
]

鉴于原始数据,很容易提供合理的近似值。

from scipy import stats

p_data = [1, 1, 2, 4, 3, 8, 19, 11, 12, 7, 11, 12, 16, 7, 176, 62, 55, 44, 38, 48, 29, 28, 29, 35, 22, 32, 30, 36, 22, 33, 30, 30, 35, 20, 24, 33, 22, 28, 27, 32, 28, 13, 20, 9, 11, 12, 21, 13, 12, 18, 18, 16, 14, 12, 10, 15, 8, 14, 11, 19, 14, 11, 14, 12, 10, 12, 8, 7, 7, 7, 8, 5, 3, 10, 8, 10, 10, 8, 4, 6, 5, 3, 2, 8, 8, 8, 3, 8, 5, 7, 9, 2, 12, 5, 4, 3, 1, 9, 5, 3, 4, 4, 2, 5, 5, 5, 2, 1, 5, 6, 4, 2, 2, 1, 6, 3, 2, 6, 4, 5, 3, 2, 1, 2, 5, 3, 6, 3, 2, 1, 4, 1, 5, 4, 1, 2, 1, 2, 2, 2, 8, 2, 2, 4, 6, 1, 4, 3, 3, 6, 2, 4, 1, 4, 3, 3, 5, 3, 3, 4, 4, 5, 3, 2, 1, 2, 3, 3, 1, 1, 1, 4, 3, 3, 5, 4, 4, 4, 1, 148]
thesum=sum(p_data)

p_data = [_/thesum for _ in p_data]

x_k = range(1, 1+len(p_data))
custom = stats.rv_discrete(name='custm', values=(x_k, p_data))

R = custom.rvs(size=1000)

print (R[:20])

首先,我使用 itertools 中的 Counter 来计算每个持续时间发生的次数。正如所料,一些持续时间不会出现在数据中,并表示为零计数。牢记操作词 'approximate' 我觉得用接近相邻值的数字替换少数零值是合理的。结果就是您在 p_data 中看到的结果。然后我归一化(使值总和为 1 作为概率)。

从大约 180 天开始,右端出现了一些值。我把所有这些质量放在 180 天。零日时没有质量。这解释了为什么 x_k 从 1 到 180(含)。

最后我使用了 rv_discrete 对象,其 rvs 方法被证明相当快。