python 中对的一致性

Consistancy of pairs in python

我有两个相同长度的列表,我想找出所有可能的列表索引值对中有多少百分比对两个列表具有相同的关系,在我的例子中大于(或符号(列表[index_1] - 列表[index_2])).

这是我的缓慢实现:

from itertools import combinations
import random
import numpy as np

values_lists = []
for i in range(4):
    value_list = []
    for j in range(50):
        value_list.append(random.random())
    values_lists.append(value_list)
#

total = 0.
n = 0

for list_1, list_2 in combinations(values_lists, 2):
    for index_1, index_2 in combinations(range(len(list_1)), 2):
        if np.sign(list_1[index_2] - list_1[index_1]) == np.sign(list_2[index_2] - list_2[index_1]):
            total += 1
        
        n += 1

print(total / n)

我想知道是否有人有更快的解决方案,因为这需要一些时间。

预先计算每个列表的符号(并且不使用 NumPy)使其速度提高约 14 倍(solution2),并且对大多数 NumPy 执行相同操作使其速度提高约 48 倍(solution3 ).

11.52 ms  solution1
 0.82 ms  solution2
 0.25 ms  solution3

11.35 ms  solution1
 0.81 ms  solution2
 0.24 ms  solution3

11.42 ms  solution1
 0.83 ms  solution2
 0.26 ms  solution3

代码(Try it online!):

def solution1(values_lists):
    total = 0.
    n = 0

    for list_1, list_2 in combinations(values_lists, 2):
        for index_1, index_2 in combinations(range(len(list_1)), 2):
            if np.sign(list_1[index_2] - list_1[index_1]) == np.sign(list_2[index_2] - list_2[index_1]):
                total += 1
            n += 1
    return total / n

def solution2(values_lists):
    signs_lists = [
        [-1 if a < b else 1 if b < a else 0
         for a, b in combinations(lst, 2)]
        for lst in values_lists
    ]
    total = 0.
    n = 0
    for signs_1, signs_2 in combinations(signs_lists, 2):
        n += len(signs_1)
        for sign_1, sign_2 in zip(signs_1, signs_2):
            if sign_1 == sign_2:
                total += 1
    return total / n

def solution3(values_lists):
    triu_indices = np.triu_indices(len(values_lists[0]), 1)
    signs_arrays = [
        prod_signs[triu_indices]
        for lst in values_lists
        for a in [np.array(lst)]
        for prod_signs in [np.sign(np.subtract.outer(a, a))]
    ]
    total = 0
    n = 0
    for signs1, signs2 in combinations(signs_arrays, 2):
        n += signs1.size
        total += (signs1 == signs2).sum()
    return total / n

funcs = solution1, solution2, solution3

from timeit import repeat
from itertools import combinations
from operator import eq
import random
import numpy as np

values_lists = []
for i in range(4):
    value_list = []
    for j in range(50):
        value_list.append(random.random())
    values_lists.append(value_list)

for func in funcs:
    print(func(values_lists))

for _ in range(3):
    print()
    for func in funcs:
        t = min(repeat(lambda: func(values_lists), number=10)) / 10
        print('%5.2f ms ' % (t * 1e3), func.__name__)

我认为我能提供的最好帮助是格式化和组织。您应该将您的功能分解成独立运行的部分。

我还简化了您的嵌套列表生成器以使用列表理解。

这没有解决您的代码速度问题,但是当我 运行 有问题的脚本时我找不到任何慢的东西...

from itertools import combinations
import random
import numpy as np
from loguru import logger

@logger.catch
def build_nested_lists(inner_list_size=50, outer_list_size=4):
    values_lists = []
    for ii in range(outer_list_size):
        value_list = [random.random() for jj in range(inner_list_size)]
        values_lists.append(value_list)
    return values_lists


@logger.catch
def handle_nested_lists(
    values_lists=None,
):
    assert isinstance(values_lists, (list, tuple))
    total = 0.0
    n = 0

    for list_1, list_2 in combinations(values_lists, 2):
        for index_1, index_2 in combinations(range(len(list_1)), 2):
            if np.sign(list_1[index_2] - list_1[index_1]) == np.sign(
                list_2[index_2] - list_2[index_1]
            ):
                total += 1
            n += 1
    return total / n

if __name__=="__main__":
    print(handle_nested_lists(build_nested_lists(50, 4)))

扩展@Kelly Bundy 的答案,您可以通过对生成器表达式求和而不是嵌套的 for 循环来使 solution2 更快:

def generatorway():
    signs_lists = (
        [-1 if a < b else 1 if b < a else 0
         for a, b in combinations(lst, 2)]
        for lst in values_lists)
    combos = list(combinations(signs_lists, 2))
    n = sum(len(i) for i, _ in combos)
    total = sum(1 for i, j in combos for k, m in zip(i, j) if k==m)
    return total/n

values_lists = [[random.random() for _ in range(500)] for _ in range(4)]的速度比较:

print(generatorway())
print(solution2())
%timeit generatorway()
%timeit solution2()

输出:

0.4931048764195057
0.4931048764195057
66.9 ms ± 58.5 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)
68.6 ms ± 38.6 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)