ks.test 左截短威布尔

ks.test with left truncated weibull

我接受我的价值观遵循的分布是左截断威布尔分布。我确实知道使用 ptrunc 命令时此分布的参数 a、形状和比例:

require(truncdist);

ptrunc(x,"weibull",a=a,scale=b,shape=c)

所以我希望 ks.test 命令(见下文)使用描述的左截断 weibull 分布而不是 "normal weibull"。

myvalues<-c(37.5, 35.4, 27.1, 32.9, 35.9, 35.1, 34.1, 32.5, 35.5, 31.5, 38.2, 36.1,,29.9, 30.1, 34.7, 38.7 ,32.3, 38.0, 34.9, 44.2, 35.8, 30.8, 39.3, 26.0, 34.2, 40.0, 36.1 ,41.5 ,32.8, 31.9, 41.3 ,30.5, 39.9, 35.0 ,31.2 ,35.0, 30.3, 29.0, 34.4, 35.7, 34.1, 35.4); 
a<-7;
scale<-36.37516;
shape<-9.437013; 

所以我知道,在这种情况下,没有必要进行左侧截断。但在其他情况下会这样。

ks.test(myvalues,"pweibull",scale=b,shape=c) #for normal weibull

但是

ks.test(myvalues,ptrunc(x,"weibull",a=a,scale=b,shape=c)) # for leftruncated

给出错误的结果。

您错误地使用了 ptrunc 函数(我假设),它需要输入一系列分位数。下面我根据你的scale和shape参数计算出Weibull的均值和标准差,然后从上下5个标准差中抽样得出一个比较集。

require(truncdist);

myvalues <- c(37.5, 35.4, 27.1, 32.9, 35.9, 35.1, 34.1, 32.5, 35.5, 31.5, 38.2, 36.1,29.9, 30.1, 34.7, 38.7 ,32.3, 38.0, 34.9, 44.2, 35.8, 30.8, 39.3, 26.0, 34.2, 40.0, 36.1 ,41.5 ,32.8, 31.9, 41.3 ,30.5, 39.9, 35.0 ,31.2 ,35.0, 30.3, 29.0, 34.4, 35.7, 34.1, 35.4); 
a <- 7;
scale <- 36.37516;
shape <- 9.437013;

# Calculate standard deviation of the weibull
weib_mean <- scale * gamma(1 + 1/shape)
weib_sd <- sqrt((scale^2) * (gamma(1 + 2/shape) - (gamma(1 + 1/shape))^2))

# Get a sample
quant <- seq(weib_mean - 5 * weib_sd, weib_mean + 5 * weib_sd, length.out = 1E5)
weibull_samp <- ptrunc(quant, "weibull", a = a, scale = scale, shape = shape)

# Take a look
plot(weibull_samp ~ quant)

# Use with test
> ks.test(sort(myvalues), weibull_samp)
       Two-sample Kolmogorov-Smirnov test

data:  sort(myvalues) and weibull_samp
D = 1, p-value < 2.2e-16
alternative hypothesis: two-sided

首先,ptrunc应该换成rtruncptrunc 给出概率值向量。但是根据 documentation of ks.test 我们需要一个样本,这就是 rtrunc 给我们的。如果 rtrunc 的参数 a 设置为 -Inf,则没有截断并且 a=-Inf 的结果确实与 a=7 相同:

library(truncdist)

myvalues <- c(37.5, 35.4, 27.1, 32.9, 35.9, 35.1, 34.1, 32.5, 35.5, 31.5, 38.2, 36.1,29.9, 30.1, 34.7, 38.7 ,32.3, 38.0, 34.9, 44.2, 35.8, 30.8, 39.3, 26.0, 34.2, 40.0, 36.1 ,41.5 ,32.8, 31.9, 41.3 ,30.5, 39.9, 35.0 ,31.2 ,35.0, 30.3, 29.0, 34.4, 35.7, 34.1, 35.4)

a <- 7
scale<-36.37516
shape <- 9.437013

set.seed(1)
y1 <- rtrunc(myvalues,"weibull",a=-Inf,scale=scale,shape=shape)

set.seed(1)
y2 <- rtrunc(myvalues,"weibull",a=a,scale=scale,shape=shape)

set.seed(1)
ks0 <- ks.test( myvalues, "pweibull",scale=scale,shape=shape  )

set.seed(1)
ks1 <- ks.test( myvalues, y1 )

set.seed(1)
ks2 <- ks.test( myvalues, y2 )

.

> ks1

    Two-sample Kolmogorov-Smirnov test

data:  myvalues and y1
D = 0.21429, p-value = 0.2898
alternative hypothesis: two-sided

> ks2

    Two-sample Kolmogorov-Smirnov test

data:  myvalues and y2
D = 0.21429, p-value = 0.2898
alternative hypothesis: two-sided

但是ks.test( myvalues, "pweibull",scale=scale,shape=shape )的结果还是不一样:

> ks0

    One-sample Kolmogorov-Smirnov test

data:  myvalues
D = 0.15612, p-value = 0.2576
alternative hypothesis: two-sided

原因是myvalues太小了。如果我们在rtrunc(而不是ks.test)的调用中把它变大,ks0ks1ks2几乎是一样的:

library(truncdist)

myvalues <- c(37.5, 35.4, 27.1, 32.9, 35.9, 35.1, 34.1, 32.5, 35.5, 31.5, 38.2, 36.1,29.9, 30.1, 34.7, 38.7 ,32.3, 38.0, 34.9, 44.2, 35.8, 30.8, 39.3, 26.0, 34.2, 40.0, 36.1 ,41.5 ,32.8, 31.9, 41.3 ,30.5, 39.9, 35.0 ,31.2 ,35.0, 30.3, 29.0, 34.4, 35.7, 34.1, 35.4)

myManyValues <- c(outer((0:9999)/100000,myvalues,"+"))

a <- 7
scale<-36.37516
shape <- 9.437013

set.seed(1)
y1 <- rtrunc(myManyValues,"weibull",a=-Inf,scale=scale,shape=shape)

set.seed(1)
y2 <- rtrunc(myManyValues,"weibull",a=a,scale=scale,shape=shape)

set.seed(1)
ks0 <- ks.test( myvalues, "pweibull",scale=scale,shape=shape  )

set.seed(1)
ks1 <- ks.test( myvalues, y1 )

set.seed(1)
ks2 <- ks.test( myvalues, y2 )

.

> ks0

    One-sample Kolmogorov-Smirnov test

data:  myvalues
D = 0.15612, p-value = 0.2576
alternative hypothesis: two-sided

> ks1

    Two-sample Kolmogorov-Smirnov test

data:  myvalues and y1
D = 0.15655, p-value = 0.2548
alternative hypothesis: two-sided

> ks2

    Two-sample Kolmogorov-Smirnov test

data:  myvalues and y2
D = 0.15655, p-value = 0.2548
alternative hypothesis: two-sided

现在让我们看看当我们执行截断分布时会发生什么:

library(truncdist)

myvalues <- c(37.5, 35.4, 27.1, 32.9, 35.9, 35.1, 34.1, 32.5, 35.5, 31.5, 38.2, 36.1,29.9, 30.1, 34.7, 38.7 ,32.3, 38.0, 34.9, 44.2, 35.8, 30.8, 39.3, 26.0, 34.2, 40.0, 36.1 ,41.5 ,32.8, 31.9, 41.3 ,30.5, 39.9, 35.0 ,31.2 ,35.0, 30.3, 29.0, 34.4, 35.7, 34.1, 35.4)

myManyValues <- c(outer((0:9999)/100000,myvalues,"+"))

a <- 29
scale<-36.37516
shape <- 9.437013

set.seed(1)
y1 <- rtrunc(myManyValues,"weibull",a=-Inf,scale=scale,shape=shape)

set.seed(1)
y2 <- rtrunc(myManyValues,"weibull",a=a,scale=scale,shape=shape)

set.seed(1)
ks0 <- ks.test( myvalues, "pweibull",scale=scale,shape=shape  )

set.seed(1)
ks1 <- ks.test( myvalues, y1 )

set.seed(1)
ks2 <- ks.test( myvalues, y2 )

.

> ks0

    One-sample Kolmogorov-Smirnov test

data:  myvalues
D = 0.15612, p-value = 0.2576
alternative hypothesis: two-sided

> ks1

    Two-sample Kolmogorov-Smirnov test

data:  myvalues and y1
D = 0.15655, p-value = 0.2548
alternative hypothesis: two-sided

> ks2

    Two-sample Kolmogorov-Smirnov test

data:  myvalues and y2
D = 0.2059, p-value = 0.05683
alternative hypothesis: two-sided