格式化 x、y、z 坐标的 numpy 数组并将其保存到文本文件的快速方法

Fast way to format and save a numpy array of x, y, z coordinates to a text file

我需要将大量顶点写入特定格式 (.obj wavefront) 的文本文件。所以我正在测试方法。

import numpy as np

def write_test(vertices, file_path, overwrite=True):
    """loop through each vertex, format and write"""
    if overwrite:
        with open(file_path, 'w') as obj_file:
            obj_file.write('')
    with open(file_path, 'a') as test_file:
        for v in vertices:
            test_file.write('v %s %s %s\n' % (v[0], v[1], v[2]))


def write_test2(vertices, file_path, overwrite=True):
    """use np.savetxt"""
    if overwrite:
        with open(file_path, 'w') as obj_file:
            obj_file.write('')
    with open(file_path, 'a') as test_file:
        np.savetxt(test_file, vertices, 'v %s %s %s\n', delimiter='', newline='')


def write_test3(vertices, file_path, overwrite=True):
    """avoid writing in a loop by creating a template for the entire array, and format at once"""
    if overwrite:
        with open(file_path, 'w') as obj_file:
            obj_file.write('')
    with open(file_path, 'a') as test_file:
        temp = 'v %s %s %s\n' * len(vertices)
        test_file.write(temp % tuple(vertices.ravel()))


def write_test4(vertices, file_path, overwrite=True):
    """write only once, use join to concatenate string in memory"""
    if overwrite:
        with open(file_path, 'w') as obj_file:
            obj_file.write('')
    with open(file_path, 'a') as test_file:
        test_file.write('v ' + '\nv '.join(' '.join(map(str, v)) for v in vertices))

事实证明,令我惊讶的是 write_testwrite_test2 更快,其中 write_test3 是最快的

In [2]: a=np.random.normal(0, 1, (1234567, 3))

In [3]: %timeit write_test(a, 'test.obj')
2.6 s ± 94.6 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)

In [4]: %timeit write_test2(a, 'test.obj')
3.6 s ± 30 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)

In [5]: %timeit write_test3(a, 'test.obj')
2.23 s ± 7.29 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)

In [6]: %timeit write_test4(a, 'test.obj')
3.49 s ± 19.4 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)

目前,写入文本文件是我矢量化代码中的瓶颈。

按照 rchome 的建议查看 np.savetxt 代码 savetxt 似乎正在做大量的通用格式化工作,并且可能在 python 中循环,所以难怪它是比 write_test.

中的简单 python 循环慢

所以我现在的问题是有没有更快的方法来完成这个?

这是一种首先将数据转换为数据框的方法:

#! import pandas as pd


def write_test5(vertices, file_path):
    df = pd.DataFrame(data=a)
    df.insert(loc=0, column='v', value="v")
    df.to_csv(file_path, index=False, sep=" ", header=False)

这可能会有帮助。

我终于写了一个 C 扩展,因为似乎没有任何方法可以从 python/numpy 实现中获得更多性能。

首先我使用 sprintf 进行格式化并得到了这些结果 -

In [7]: a = np.random.normal(0, 1, (1234567, 3))

In [8]: %timeit ObjWrite.write(a, 'a.txt')
1.21 s ± 17.7 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)

与大约 2.5 秒相比,这是一个改进,但还不足以证明编写扩展程序的合理性

由于几乎所有时间都花在格式化字符串上,我写了一个 sprintf 替换只是为了格式化双精度数(精确到值 b/w [=16 的小数点后 15-17 位) =] 和 10^7,这对我的用例来说是可以接受的)

In [9]: %timeit ObjWrite.writeFast(a, 'a-fast.txt')
302 ms ± 22.6 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)

~300ms - 不错!

这是模块 -

ObjWrite.c

#include <stdio.h>
#include <Python.h>
#include <numpy/arrayobject.h>

#define CHUNK_SIZE 32768

/*
Write vertices to given file, use sprintf for formatting
    python-interface: ObjWrite.write(arr: ndarray, filepath: string)
*/
static PyObject* methodWriteIter(PyObject *self, PyObject *args) {
    // Parse arguments
    PyArrayObject *arr;
    char *filepath = NULL;
    if (!PyArg_ParseTuple(args, "O!s", &PyArray_Type, &arr, &filepath)) return PyLong_FromLong(-1);

    npy_intp size = PyArray_SIZE(arr);
    // Handle zero-sized arrays specially, if size is not a multiple of 3, exit
    if (size == 0 || size % 3 != 0) return PyLong_FromLong(-1);

    // get iterator
    NpyIter* iter;
    NpyIter_IterNextFunc *iternext;
    PyArray_Descr *dtype;
    dtype = PyArray_DescrFromType(NPY_DOUBLE);
    iter = NpyIter_New(arr, NPY_ITER_READONLY, NPY_KEEPORDER, NPY_NO_CASTING, dtype);
    if (iter == NULL) return PyLong_FromLong(-1);

    // get iternext function for fast access
    iternext = NpyIter_GetIterNext(iter, NULL);
    if (iternext == NULL) {
        NpyIter_Deallocate(iter);
        return PyLong_FromLong(-1);
    }

    // get data pointer, this will get updated by the iterator
    double **dataptr;
    dataptr = (double **) NpyIter_GetDataPtrArray(iter);

    // open file, exit if null
    FILE *fp = fopen(filepath, "w");
    if (fp == NULL) {
        NpyIter_Deallocate(iter);
        return PyLong_FromLong(-1);
    }

    // init file buffer, writing in chunks does not seem to offer any significant benefit
    // but it should still will be useful when disk utilization is high
    char fileBuffer[CHUNK_SIZE + 128];
    int bufferCount = 0;

    double x, y, z;
    do {
        // get 3 doubles from array
        x = **dataptr;
        iternext(iter);
        y = **dataptr;
        iternext(iter);
        z = **dataptr;
        // use sprintf to format and write to buffer
        bufferCount += sprintf(&fileBuffer[bufferCount], "v %.17f %.17f %.17f\n", x, y, z);
        // if the chunk is big enough, write it.
        if (bufferCount >= CHUNK_SIZE) {
            fwrite(fileBuffer, bufferCount, 1, fp);
            bufferCount = 0;
       }
    } while (iternext(iter));
    // write remainder
    if (bufferCount > 0) fwrite(fileBuffer, 1, bufferCount, fp);

    // clean-up and exit with success
    NpyIter_Deallocate(iter);
    fclose(fp);
    return PyLong_FromLong(0);
}

/*
Turns out that maximum proportion of time is taken by sprintf call in the above implementation
So, the next part is basically implementing a faster way to format doubles
*/

static const char DIGITS[] = "0123456789";  // digit-char lookup table

/* get powers of 10, can overflow but we only need this for digits <= 9 */
int powOf10(int digits) {
    int res = 1;
    while (digits > 0) {
        res *= 10;
        digits--;
    }
    return res;
}

/* a fast way to get number of digits in a positive integer */
int countDigitsPosInt(int n) {
    if (n < 100000) {  // 5 or less
        if (n < 100) {  // 1 or 2
            if (n < 10) { return 1; } else { return 2; }
        } else {  // 3 or 4 or 5
            if (n < 1000) { return 3; }
            else {  // 4 or 5
                if (n < 10000) { return 4; } else { return 5; }
            }
        }
    } else {  // 6 or more
        if (n < 10000000) {  // 6 or 7
            if (n < 1000000) { return 6; } else { return 7; }
        } else {  // 8 to 10
            if (n < 100000000) { return 8; }
            else {  // 9 or 10
                if (n < 1000000000) { return 9; } else { return 10; }
            }
        }
    }
}

/* format positive integers into `digits` length strings, zero-pad if number of digits too high
if number digits are greater then `digits`, it will get truncated, so watch out */
int posIntToStringDigs(char *s, int n, int digits) {
    int q = n;
    int r;
    int i = digits - 1;
    while (i >= 0 && q > 0) {  // assign digits from last to first
        r = q % 10;
        q = q / 10;
        *(s + i) = DIGITS[r];  // get char from lookup table
        i--;
    }
    while (i >= 0) {  // we are here because q=0 and still some digits remain
        *(s + i) = '0';  // 0 pad these
        i--;
    }
    return digits;
}

/* format positive integers - no zero padding */
int posIntToString(char *s, int n) {
    if (n == 0) { // handle 0 case, no need of counting digits in this case
        *s = '0';
        return 1;
    }
    // call posIntToStringDigs with exactly the number of digits as in the integer
    return posIntToStringDigs(s, n, countDigitsPosInt(n));
}


static const int MAX_D = 8;  // max number of digits we'll break things into
static const int _10D = 100000000;  // 10 ^ MAX_D

/*
format positive doubles

accurate to 15-17th digit for numbers that are not huge (< 10^7), fairly accurate for huge numbers
I personally do not need this to be crazy accurate, for the range of numbers I am expecting, this will do just fine
*/
int posDoubleToString(char *s, double f, int precision) {

    // length of the generated string
    int len = 0;

    // to make big numbers int friendly, divide by 10 ^ MAX_D until the whole part would fit in an int
    int steps = 0;
    while (f > _10D) {
        f /= _10D;
        steps++;
    }
    int intPart = (int) f;
    double decPart = f - intPart;
    // add the first whole part to the string, we have no idea how many digits would be there
    len += posIntToString(&s[len], intPart);

    // if the number was bigger then 10 ^ MAX_D, we need to return it to its former glory, i.e. add rest to integer string
    while (steps > 0) {
        decPart = decPart * _10D;
        intPart = (int) decPart;
        decPart = decPart - intPart;
        len += posIntToStringDigs(&s[len], intPart, MAX_D);  // appending 0's important here
        steps--;
    }

    // add the decimal point
    s[len++] = '.';

    // after the decimal, piggy back int-to-string function to `precision` number of digits
    while (precision > 0) {
        if (precision > MAX_D) {
            decPart = decPart * _10D;
            intPart = (int) decPart;
            decPart = decPart - intPart;
            len += posIntToStringDigs(&s[len], intPart, MAX_D);
            precision -= MAX_D;
        } else {
            decPart = decPart * powOf10(precision);
            intPart = (int) decPart;
            decPart = decPart - intPart;
            if (decPart > 0.5) intPart += 1;  // round of
            len += posIntToStringDigs(&s[len], intPart, precision);
            precision = 0;
        }
    }

    // truncate following zeros, loop on string in reverse
    /* commented to mimic sprintf
    int index = len - 1;
    while (index > 0) {
        if (s[index] != '0') break;  // if last char is not 0 our work is done, nothing more to do
        if (s[index - 1] == '.') break;  // if char is 0 but its the last 0 before decimal point, stop
        len--;
        index--;
    }*/

    return len;
}

/* format positive or negative doubles */
int doubleToString(char *s, double f, int pre) {
    // handle negatives
    int len = 0;
    if (f < 0) {
        *s = '-';
        len++;
        f *= -1;  // change to positive
    }
    len += posDoubleToString(&s[len], f, pre);
    return len;
}


/*
Write vertices to given file, use our doubleToString for formatting
    python-interface: ObjWrite.writeFast(arr: ndarray, filepath: string)
*/
static PyObject* methodWriteIterFast(PyObject *self, PyObject *args) {
    // Parse arguments
    PyArrayObject *arr;
    char *filepath = NULL;
    if (!PyArg_ParseTuple(args, "O!s", &PyArray_Type, &arr, &filepath)) return PyLong_FromLong(-1);

    npy_intp size = PyArray_SIZE(arr);
    // Handle zero-sized arrays specially, if size is not a multiple of 3, exit
    if (size == 0 || size % 3 != 0) return PyLong_FromLong(-1);

    // get iterator
    NpyIter* iter;
    NpyIter_IterNextFunc *iternext;
    PyArray_Descr *dtype;
    dtype = PyArray_DescrFromType(NPY_DOUBLE);
    iter = NpyIter_New(arr, NPY_ITER_READONLY, NPY_KEEPORDER, NPY_NO_CASTING, dtype);
    if (iter == NULL) return PyLong_FromLong(-1);

    // get iternext function for fast access
    iternext = NpyIter_GetIterNext(iter, NULL);
    if (iternext == NULL) {
        NpyIter_Deallocate(iter);
        return PyLong_FromLong(-1);
    }

    // get data pointer, this will get updated by the iterator
    double **dataptr;
    dataptr = (double **) NpyIter_GetDataPtrArray(iter);

    // open file, exit if null
    FILE *fp = fopen(filepath, "w");
    if (fp == NULL) {
        NpyIter_Deallocate(iter);
        return PyLong_FromLong(-1);
    }

    // init file buffer, writing in chunks does not seem to offer any significant benefit
    // but it should still will be useful when disk utilization is high
    char fileBuffer[CHUNK_SIZE + 128];
    int bufferCount = 0;

    double x, y, z;
    do {
        // get 3 doubles from array
        x = **dataptr;
        iternext(iter);
        y = **dataptr;
        iternext(iter);
        z = **dataptr;

        // use doubleToString to format and write to buffer
        fileBuffer[bufferCount++] = 'v';
        fileBuffer[bufferCount++] = ' ';
        bufferCount += doubleToString(&fileBuffer[bufferCount], x, 17);
        fileBuffer[bufferCount++] = ' ';
        bufferCount += doubleToString(&fileBuffer[bufferCount], y, 17);
        fileBuffer[bufferCount++] = ' ';
        bufferCount += doubleToString(&fileBuffer[bufferCount], z, 17);
        fileBuffer[bufferCount++] = '\n';

        // if the chunk is big enough, write it.
        if (bufferCount >= CHUNK_SIZE) {
            fwrite(fileBuffer, bufferCount, 1, fp);
            bufferCount = 0;
       }
    } while (iternext(iter));
    // write remainder
    if (bufferCount > 0) fwrite(fileBuffer, 1, bufferCount, fp);

    // clean-up and exit with success
    NpyIter_Deallocate(iter);
    fclose(fp);
    return PyLong_FromLong(0);
}


/* Set up the methods table */
static PyMethodDef objWriteMethods[] = {
    {"write", methodWriteIter, METH_VARARGS, "write numpy array to a text file in .obj format"},
    {"writeFast", methodWriteIterFast, METH_VARARGS, "write numpy array to a text file in .obj format"},
    {NULL, NULL, 0, NULL}  /* Sentinel - marks the end of this structure */
};

/* Set up module definition */
static struct PyModuleDef objWriteModule = {
    PyModuleDef_HEAD_INIT,
    "ObjWrite",
    "write numpy array to a text file in .obj format",
    -1,
    objWriteMethods
};

/* module init function */
PyMODINIT_FUNC PyInit_ObjWrite(void) {
    import_array();
    return PyModule_Create(&objWriteModule);
}

setup.py

from distutils.core import setup, Extension
import numpy

def main():
    setup(
        name="ObjWrite",
        version="1.0.0",
        description="Python interface for the function to write numpy array to a file",
        author="Shobhit Vashistha",
        author_email="shobhit.v87@gmail.com",
        ext_modules=[
            Extension("ObjWrite", ["ObjWrite.c"], include_dirs=[numpy.get_include()])
        ]
    )

if __name__ == "__main__":
    main()

我知道这可能有点矫枉过正,但我​​在深入研究 C 和 Python/Numpy C 扩展世界时玩得很开心,希望将来其他人会发现它有用。