骨架代码:字符指针斗争
Skeleton code: character pointers struggles
所以这是读取此 csv 文件的骨架代码的一部分:
year,month,day,location,mintemp,maxtemp
2015,4,28,18,6.7,12.9
2015,4,28,22,12.7,19.1
2015,4,29,18,7.6,15.3
2015,4,29,22,13.4,21.9
2015,4,30,18,7.3,21.8
2015,4,30,22,13.2,23.2
2015,5,1,18,9.4,15.9
2015,5,1,22,16.1,27.2
2015,5,2,18,8.7,16.3
2015,5,2,22,14.2,21.4
此函数将文件作为字符串读取并以某种方式将其分开并将其放入类型为 csv_t 的名为 'D' 的结构中。
结构及其他:
typedef char input_line_t[LINELEN+1];
typedef struct {
input_line_t labelstring;
char *labs[MAXCOLS+1];
int nrows;
int ncols;
double vals[MAXROWS][MAXCOLS];
} csv_t;
函数:
void
read_csv_file(char *fname, csv_t *D) {
FILE *fp; /* used to read from a named file */
input_line_t line;
int cols=0, rows=0, bytes=0;
int c, i, j, chr, ncommas, empties=0;
double x;
double nan = 0.0/0.0;
/* first argument on commandline should the data file name */
if (fname==NULL) {
/* and it wasn't there... */
printf("No csv file specified on commandline\n");
exit(EXIT_FAILURE);
}
/* try and open the named file for reading */
if ((fp=fopen(fname,"r")) == NULL) {
printf("Error: unable to open %s\n", fname);
exit(EXIT_FAILURE);
}
/* file is open, can now use fp to access CSV data,
start by reading the bytes of the header row */
while ((c=getc(fp)) != '\n') {
D->labelstring[bytes++] = c;
}
D->labelstring[bytes] = '[=12=]';
/* now process line again, breaking in to separate labels by
replacing commas by nulls, and tracking the start of each of
the column headings */
D->labs[cols++] = D->labelstring;
for (i=1; i<bytes; i++) {
if (D->labelstring[i]==COMMA) {
D->labelstring[i] = '[=12=]';
D->labs[cols++] = D->labelstring+i+1;
}
if (cols>MAXCOLS && i<bytes) {
printf("Too many columns, limit is %d\n",
MAXCOLS);
exit(EXIT_FAILURE);
}
}
D->labs[cols] = NULL;
/* ok, that's the labels sorted, now for the data */
while ((chr=getc(fp)) != EOF) {
/* there is another row, because first character of it
just got read, next step is to get the rest of them */
i = 0;
line[i++] = chr;
ncommas = (chr==COMMA) ;
while (((chr=getc(fp))!=EOF) && (chr!='\n')) {
line[i++] = chr;
ncommas += (chr==COMMA) ;
}
line[i] = '[=12=]';
if (ncommas!=cols-1) {
printf("Data input error line %d\n", rows+2);
exit(EXIT_FAILURE);
}
/* then process the line from the right end */
j = i-1;
for (c=cols-1; c>=0; c--) {
/* look for next previous comma */
while (j>=0 && line[j]!=COMMA) {
j--;
}
/* access the value */
if (sscanf(line+j+1, "%lf", &x) == 1) {
D->vals[rows][c] = x;
} else {
D->vals[rows][c] = nan;
empties++;
}
/* mark the new end of the string */
line[j--] = '[=12=]';
}
rows++;
/* check to make sure don't overflow array */
if (rows==MAXROWS) {
/* time to stop reading data */
printf("Too many rows, truncated at %d\n", MAXROWS);
break;
}
/* if not full, go round and see if there is another data row */
}
/* either input has all been read or array is full */
printf("file %s:\n %d columns and %d rows of data\n",
fname, cols, rows);
if (empties) {
printf(" %d entries were empty or non-numeric\n",
empties);
}
/* finish building the structure */
D->nrows = rows;
D->ncols = cols;
return;
}
除了从这一点开始,尤其是这一点,我基本上明白发生了什么:
D->labs[cols++] = D->labelstring;
for (i=1; i<bytes; i++) {
if (D->labelstring[i]==COMMA/*=','*/) {
D->labelstring[i] = '[=13=]';
D->labs[cols++] = D->labelstring+i+1;
}
最后一段代码非常混乱。 labelstring 是一个字符串,但可以向其中添加数字。它有什么作用?那如何给出列数。如果我明白了,我就能理解整个代码。感谢帮助。
非常感谢!!
其实很简单。假设你有 labelstring
包含字符串 "Foo"
,那么它在内存中看起来像这样
+-----+-----+-----+------+
| 'F' | 'o' | 'o' | '[=22=]' |
+-----+-----+-----+------+
^
|
+-------------+
| labelstring |
+-------------+
也就是说,labelstring
指向字符串中的第一个字符。
如果你再做 labelstring + 1
你会得到 *second character:
+-----+-----+-----+------+
| 'F' | 'o' | 'o' | '[=23=]' |
+-----+-----+-----+------+
^
|
+-----------------+
| labelstring + 1 |
+-----------------+
这叫做指针运算。
是的,您也可以在数组上使用它,因为数组会衰减到指向其第一个元素的指针。
了解了以上内容后,也很容易理解array[X]
(对于任何数组或指针array
以及对于任何X
)与*(array + X)
是一样的.
真正让你大吃一惊的是,多亏了 commutative property of addition,最后一位意味着 *(array + X)
等于 *(X + array)
,这导致 array[X]
等于 X[array]
.
所以这是读取此 csv 文件的骨架代码的一部分:
year,month,day,location,mintemp,maxtemp
2015,4,28,18,6.7,12.9
2015,4,28,22,12.7,19.1
2015,4,29,18,7.6,15.3
2015,4,29,22,13.4,21.9
2015,4,30,18,7.3,21.8
2015,4,30,22,13.2,23.2
2015,5,1,18,9.4,15.9
2015,5,1,22,16.1,27.2
2015,5,2,18,8.7,16.3
2015,5,2,22,14.2,21.4
此函数将文件作为字符串读取并以某种方式将其分开并将其放入类型为 csv_t 的名为 'D' 的结构中。
结构及其他:
typedef char input_line_t[LINELEN+1];
typedef struct {
input_line_t labelstring;
char *labs[MAXCOLS+1];
int nrows;
int ncols;
double vals[MAXROWS][MAXCOLS];
} csv_t;
函数:
void
read_csv_file(char *fname, csv_t *D) {
FILE *fp; /* used to read from a named file */
input_line_t line;
int cols=0, rows=0, bytes=0;
int c, i, j, chr, ncommas, empties=0;
double x;
double nan = 0.0/0.0;
/* first argument on commandline should the data file name */
if (fname==NULL) {
/* and it wasn't there... */
printf("No csv file specified on commandline\n");
exit(EXIT_FAILURE);
}
/* try and open the named file for reading */
if ((fp=fopen(fname,"r")) == NULL) {
printf("Error: unable to open %s\n", fname);
exit(EXIT_FAILURE);
}
/* file is open, can now use fp to access CSV data,
start by reading the bytes of the header row */
while ((c=getc(fp)) != '\n') {
D->labelstring[bytes++] = c;
}
D->labelstring[bytes] = '[=12=]';
/* now process line again, breaking in to separate labels by
replacing commas by nulls, and tracking the start of each of
the column headings */
D->labs[cols++] = D->labelstring;
for (i=1; i<bytes; i++) {
if (D->labelstring[i]==COMMA) {
D->labelstring[i] = '[=12=]';
D->labs[cols++] = D->labelstring+i+1;
}
if (cols>MAXCOLS && i<bytes) {
printf("Too many columns, limit is %d\n",
MAXCOLS);
exit(EXIT_FAILURE);
}
}
D->labs[cols] = NULL;
/* ok, that's the labels sorted, now for the data */
while ((chr=getc(fp)) != EOF) {
/* there is another row, because first character of it
just got read, next step is to get the rest of them */
i = 0;
line[i++] = chr;
ncommas = (chr==COMMA) ;
while (((chr=getc(fp))!=EOF) && (chr!='\n')) {
line[i++] = chr;
ncommas += (chr==COMMA) ;
}
line[i] = '[=12=]';
if (ncommas!=cols-1) {
printf("Data input error line %d\n", rows+2);
exit(EXIT_FAILURE);
}
/* then process the line from the right end */
j = i-1;
for (c=cols-1; c>=0; c--) {
/* look for next previous comma */
while (j>=0 && line[j]!=COMMA) {
j--;
}
/* access the value */
if (sscanf(line+j+1, "%lf", &x) == 1) {
D->vals[rows][c] = x;
} else {
D->vals[rows][c] = nan;
empties++;
}
/* mark the new end of the string */
line[j--] = '[=12=]';
}
rows++;
/* check to make sure don't overflow array */
if (rows==MAXROWS) {
/* time to stop reading data */
printf("Too many rows, truncated at %d\n", MAXROWS);
break;
}
/* if not full, go round and see if there is another data row */
}
/* either input has all been read or array is full */
printf("file %s:\n %d columns and %d rows of data\n",
fname, cols, rows);
if (empties) {
printf(" %d entries were empty or non-numeric\n",
empties);
}
/* finish building the structure */
D->nrows = rows;
D->ncols = cols;
return;
}
除了从这一点开始,尤其是这一点,我基本上明白发生了什么:
D->labs[cols++] = D->labelstring;
for (i=1; i<bytes; i++) {
if (D->labelstring[i]==COMMA/*=','*/) {
D->labelstring[i] = '[=13=]';
D->labs[cols++] = D->labelstring+i+1;
}
最后一段代码非常混乱。 labelstring 是一个字符串,但可以向其中添加数字。它有什么作用?那如何给出列数。如果我明白了,我就能理解整个代码。感谢帮助。 非常感谢!!
其实很简单。假设你有 labelstring
包含字符串 "Foo"
,那么它在内存中看起来像这样
+-----+-----+-----+------+ | 'F' | 'o' | 'o' | '[=22=]' | +-----+-----+-----+------+ ^ | +-------------+ | labelstring | +-------------+
也就是说,labelstring
指向字符串中的第一个字符。
如果你再做 labelstring + 1
你会得到 *second character:
+-----+-----+-----+------+ | 'F' | 'o' | 'o' | '[=23=]' | +-----+-----+-----+------+ ^ | +-----------------+ | labelstring + 1 | +-----------------+
这叫做指针运算。
是的,您也可以在数组上使用它,因为数组会衰减到指向其第一个元素的指针。
了解了以上内容后,也很容易理解array[X]
(对于任何数组或指针array
以及对于任何X
)与*(array + X)
是一样的.
真正让你大吃一惊的是,多亏了 commutative property of addition,最后一位意味着 *(array + X)
等于 *(X + array)
,这导致 array[X]
等于 X[array]
.