在 C 中生成幂律分布并使用 python 对其进行测试
Generating a power-law distribution in C and testing it with python
我知道,给定一个生成均匀分布随机数的 rng,获得类似幂的数据的一种方法是,遵循 Wolfram Mathworld 以下内容:设 y 是均匀分布在 (0, 1) 和 x 另一个随机变量分布为 P(x) = C*x**n(对于 (xmin,xmax) 中的 x)。我们有
x=[ (xmax**(n+1) - xmin**(n-1))y+xmin**(n+1) ]**(1/(n+1))
所以我用 C 编写了这个程序,它生成从 1 到 100 的 50k 个数字,这些数字应该分配为 x^(-2) 并将结果的频率打印在文件上 DATA.txt:
void random_powerlike(int *k, int dim, double degree, int xmin, int xmax, unsigned int *seed)
{
int i;
double aux;
for(i=0; i<dim; i++)
{
aux=(powq(xmax, degree +1 ) - powq(xmin, degree +1 ))*((double)rand_r(seed)/RAND_MAX)+ powq(xmin, degree +1);
k[i]=(int) powq(aux, 1/(degree+1));
}
}
int main()
{
unsigned int seed = 1934123471792583;
FILE *tmp;
char stringa[50];
sprintf(stringa, "Data.txt");
tmp=fopen(stringa, "w");
int dim=50000;
int *k;
k= (int *) malloc(dim*sizeof(int));
int degree=-2;
int freq[100];
random_powerlike(k,dim, degree, 1,100,&seed);
fprintf(tmp, "#degree = %d x=[%d,%d]\n",degree,1,100);
for(int j=0; j< 100;j++)
{
freq[j]=0;
for(int i = 0; i< dim; ++i)
{
if(k[i]==j+1)
freq[j]++;
}
fprintf(tmp, "%d %d\n", j+1, freq[j]);
}
fflush(tmp);
fclose(tmp);
return 0;
}
我决定用 pylab 拟合这些数字,看看适合它们的最佳幂律是否是 a*x**b,其中 b = -2。我在 python:
中写了这个程序
import numpy
from scipy.optimize import curve_fit
import pylab
num, freq = pylab.loadtxt("Data.txt", unpack=True)
freq=freq/freq[0]
def funzione(num, a,b):
return a*num**(b)
pars, covm = curve_fit(funzione, num, freq, absolute_sigma=True)
xx=numpy.linspace(1, 99)
pylab.plot(xx, funzione(xx, pars[0],pars[1]), color='red')
pylab.errorbar(num, freq, linestyle='', marker='.',color='black')
pylab.show()
print pars
问题是当我拟合数据时,我得到的指数值为 ~-1.65。
我想我在某个地方犯了错误,但我想不通在哪里。
我认为你必须制作直方图。我只是稍微重写了您的代码,现在非常适合
#include <math.h>
#include <stdlib.h>
#include <string.h>
#include <stdio.h>
double rndm() {
return (double)rand()/(double)RAND_MAX;
}
double power_sample(double xmin, double xmax, int degree) {
double pmin = pow(xmin, degree + 1);
double pmax = pow(xmax, degree + 1);
double v = pmin + (pmax - pmin)*rndm();
return pow(v, 1.0/(degree + 1));
}
int main() {
unsigned int seed = 32345U;
srand(seed);
int xmin = 1;
int xmax = 100;
double* hist = malloc((xmax-xmin + 1)*sizeof(double));
memset(hist, 0, (xmax-xmin + 1)*sizeof(double));
// sampling
int nsamples = 100000000;
for(int k = 0; k != nsamples; ++k) {
double v = power_sample(xmin, xmax, 2);
int idx = (int)v;
hist[idx] += 1.0;
}
// normalization
for(int k = xmin; k != xmax; ++k) {
hist[k] /= (double)nsamples;
}
// output
for(int k = xmin; k != xmax; ++k) {
double x = k + 0.5;
printf(" %e %e\n", x, hist[k]);
}
free(hist); // cleanup
return 0;
}
和配件代码
import numpy
from scipy.optimize import curve_fit
import pylab
def funzione(x, a,b):
return a * numpy.power(x, b)
num, freq = pylab.loadtxt("q.dat", unpack=True)
pars, covm = curve_fit(funzione, num, freq, absolute_sigma=True)
pylab.plot(num, funzione(num, pars[0], pars[1]), color='red')
pylab.errorbar(num, freq, linestyle='', marker='.',color='black')
pylab.show()
print(pars)
它产生了
[ 3.00503372e-06 1.99961571e+00]
非常接近
我知道,给定一个生成均匀分布随机数的 rng,获得类似幂的数据的一种方法是,遵循 Wolfram Mathworld 以下内容:设 y 是均匀分布在 (0, 1) 和 x 另一个随机变量分布为 P(x) = C*x**n(对于 (xmin,xmax) 中的 x)。我们有
x=[ (xmax**(n+1) - xmin**(n-1))y+xmin**(n+1) ]**(1/(n+1))
所以我用 C 编写了这个程序,它生成从 1 到 100 的 50k 个数字,这些数字应该分配为 x^(-2) 并将结果的频率打印在文件上 DATA.txt:
void random_powerlike(int *k, int dim, double degree, int xmin, int xmax, unsigned int *seed)
{
int i;
double aux;
for(i=0; i<dim; i++)
{
aux=(powq(xmax, degree +1 ) - powq(xmin, degree +1 ))*((double)rand_r(seed)/RAND_MAX)+ powq(xmin, degree +1);
k[i]=(int) powq(aux, 1/(degree+1));
}
}
int main()
{
unsigned int seed = 1934123471792583;
FILE *tmp;
char stringa[50];
sprintf(stringa, "Data.txt");
tmp=fopen(stringa, "w");
int dim=50000;
int *k;
k= (int *) malloc(dim*sizeof(int));
int degree=-2;
int freq[100];
random_powerlike(k,dim, degree, 1,100,&seed);
fprintf(tmp, "#degree = %d x=[%d,%d]\n",degree,1,100);
for(int j=0; j< 100;j++)
{
freq[j]=0;
for(int i = 0; i< dim; ++i)
{
if(k[i]==j+1)
freq[j]++;
}
fprintf(tmp, "%d %d\n", j+1, freq[j]);
}
fflush(tmp);
fclose(tmp);
return 0;
}
我决定用 pylab 拟合这些数字,看看适合它们的最佳幂律是否是 a*x**b,其中 b = -2。我在 python:
中写了这个程序import numpy
from scipy.optimize import curve_fit
import pylab
num, freq = pylab.loadtxt("Data.txt", unpack=True)
freq=freq/freq[0]
def funzione(num, a,b):
return a*num**(b)
pars, covm = curve_fit(funzione, num, freq, absolute_sigma=True)
xx=numpy.linspace(1, 99)
pylab.plot(xx, funzione(xx, pars[0],pars[1]), color='red')
pylab.errorbar(num, freq, linestyle='', marker='.',color='black')
pylab.show()
print pars
问题是当我拟合数据时,我得到的指数值为 ~-1.65。
我想我在某个地方犯了错误,但我想不通在哪里。
我认为你必须制作直方图。我只是稍微重写了您的代码,现在非常适合
#include <math.h>
#include <stdlib.h>
#include <string.h>
#include <stdio.h>
double rndm() {
return (double)rand()/(double)RAND_MAX;
}
double power_sample(double xmin, double xmax, int degree) {
double pmin = pow(xmin, degree + 1);
double pmax = pow(xmax, degree + 1);
double v = pmin + (pmax - pmin)*rndm();
return pow(v, 1.0/(degree + 1));
}
int main() {
unsigned int seed = 32345U;
srand(seed);
int xmin = 1;
int xmax = 100;
double* hist = malloc((xmax-xmin + 1)*sizeof(double));
memset(hist, 0, (xmax-xmin + 1)*sizeof(double));
// sampling
int nsamples = 100000000;
for(int k = 0; k != nsamples; ++k) {
double v = power_sample(xmin, xmax, 2);
int idx = (int)v;
hist[idx] += 1.0;
}
// normalization
for(int k = xmin; k != xmax; ++k) {
hist[k] /= (double)nsamples;
}
// output
for(int k = xmin; k != xmax; ++k) {
double x = k + 0.5;
printf(" %e %e\n", x, hist[k]);
}
free(hist); // cleanup
return 0;
}
和配件代码
import numpy
from scipy.optimize import curve_fit
import pylab
def funzione(x, a,b):
return a * numpy.power(x, b)
num, freq = pylab.loadtxt("q.dat", unpack=True)
pars, covm = curve_fit(funzione, num, freq, absolute_sigma=True)
pylab.plot(num, funzione(num, pars[0], pars[1]), color='red')
pylab.errorbar(num, freq, linestyle='', marker='.',color='black')
pylab.show()
print(pars)
它产生了
[ 3.00503372e-06 1.99961571e+00]
非常接近