这段代码的性能瓶颈是什么?

What are the performance bottlenecks in this code?

我有以下 Clojure 代码:

(ns smallest-sum.core)

(defn next-transformation
  [arr]
  (loop [i 0
         j 0]
    (let [
          arr-len (count arr)
          ]
      (if (and (< i arr-len)
               (< j arr-len))
        (let [
                xi (nth arr i)
                xj (nth arr j)
                j-plus-1 (+ j 1)
                i-plus-1 (+ i 1)
                new-i (if (< j-plus-1 arr-len)
                       i
                       (+ i 1))
                new-j (if (< j-plus-1 arr-len)
                       (+ j 1)
                       0)
              ]
          (if (> xi xj)
              ;; We found it
              [i j] 
              ;; We haven't found it, recur 
              (recur new-i new-j)
            )
          )
          nil ; We are at the end of the  loop
        ) ; if 
      )
    ) ; loop 
  ) ; defn

(defn solution
  [arr]
  (loop [state {
                :arr arr 
                :sum 0
                }]
      (let [
            cur-arr (get state :arr) 
            trx (next-transformation cur-arr) 
            ]
         ; (println (str "========"))
         ; (println (str "cur-arr: " cur-arr))
         (if (not (= trx nil))
           ;; trx is not nil -- recur
           (let [
                  i (nth trx 0)
                  j (nth trx 1)
                  xi (nth cur-arr i)
                  xj (nth cur-arr j)
                  diff (- xi xj) 
                 ]
              (recur (assoc state :arr (assoc cur-arr
                                              i
                                              diff
                                 )
                           )
                    )            
             )
           ;; Here we need the sum
           (reduce + cur-arr)
           )
        )   
    )
)

此代码必须能够在 120000 毫秒内处理大量输入(示例见下文)。

我假设一个问题是两个循环(在solutionnext-transformation中)可以统一为一个。

这段代码还有其他性能瓶颈吗?

这是失败的测试,因为 solution 花费超过 120000 毫秒到 运行:

(ns smallest-sum.test
  (:require [smallest-sum.core :refer :all]
            [clojure.test :refer :all]))
            
(deftest sample-test-cases
  (is (< 0 (solution [
             91293
             38437
             40626
             173
             76990
             17858
             43446
             25050
             10791
             68990
             52403
             21503
             52331
             51909
             73488
             91293
             38437
             40626
             173
             76990
             17858
             43446
             25050
             10791
             68990
             52403
             21503
             52331
             51909
             73488
             91293
             38437
             40626
             173
             76990
             17858
             43446
             25050
             10791
             68990
             52403
             21503
             52331
             51909
             73488
             91293
             38437
             40626
             173
             76990
             17858
             43446
             25050
             10791
             68990
             52403
             21503
             52331
             51909
             73488
                      ])  ))       
)

更新: 阅读答案以及 this 问题后,我重写了代码如下。现在它似乎可以工作了(足够快)。

(defn gcd
  [a b]
  (if (= b 0)
    a
    (gcd b (mod a b)))
  )

(defn gcd-arr
  [arr]
  (reduce gcd arr) 
  )

(defn solution
  [arr]
  (let [
        greatest-common-divisor (gcd-arr arr)
        arr-len (count arr)
        ]
      (* arr-len greatest-common-divisor) 
    )
  )

我发现这段代码很难推理。如果您能提供代码试图做什么的描述,可能会给出更好的建议。

但是根据我所看到的 运行 这两个函数 solution 接受一个向量并以某种方式减小值,直到它们都是相同的值。然后 returns 向量的大小乘以相同的值。 (或减少值的总和)next-transformation从向量中挑选出两个具有不同值的索引,solution然后将较大的值减少差值。


下一次转换 return 的索引之一始终为零。

next-transformation的结果是:

  1. nil 当向量中的所有元素都相同时。
  2. [0 first-index-of-element-less-than-first] 当向量的一个或多个元素小于第一个元素时。
  3. 其他[first-index-of-element-greater-than-first 0].

以上结果可以单次计算,而不是嵌套循环。类似于:

(defn nt-1 [arr]
  (let [f (first arr)]
    (loop [i 1
           bigger nil]
      (cond (>= i (count arr)) (if (nil? bigger) 
                                 nil 
                                 [bigger 0])
            (> f (arr i)) [0 i]
            :else (recur (inc i)
                         (cond (not (nil? bigger)) bigger
                               (< f (arr i)) i
                               :else nil))))))

我不太了解 solution 以下内容是否正确,但另一种可能性 可能是 以第一个不相等的元素退出:

(defn nt-2 [arr]
  (let [f (first arr)]
    (loop [i 1]
      (cond (>= i (count arr)) nil
            (< f (arr i)) [i 0]
            (> f (arr i)) [0 i]
            :else (recur (inc i))))))

时间:

;; Using original next-transformation
user> (time (solution (into [] (repeat 100000 10000))))
"Elapsed time: 269826.483125 msecs"
1000000000
user> (def next-transformation nt-1)
#'user/next-transformation
user> (time (solution (into [] (repeat 1000000 10000))))
"Elapsed time: 116.421625 msecs"
10000000000
user> (def next-transformation nt-2)
#'user/next-transformation
user> (time (solution (into [] (repeat 1000000 10000))))
"Elapsed time: 87.211333 msecs"
10000000000

使用您的测试数据:

;; Original next-transformation
user> (time (solution [91293 38437 40626 173 76990 17858 43446 25050 10791
                 68990 52403 21503 52331 51909 73488 91293 38437 40626
                 173 76990 17858 43446 25050 10791 68990 52403 21503
                 52331 51909 73488 91293 38437 40626 173 76990 17858
                 43446 25050 10791 68990 52403 21503 52331 51909 73488
                 91293 38437 40626 173 76990 17858 43446 25050 10791
                 68990 52403 21503 52331 51909 73488]))
"Elapsed time: 112973.434458 msecs"
60
user> (def next-transformation nt-1)
#'user/next-transformation
user> (time (solution [91293 38437 40626 173 76990 17858 43446 25050 10791
                 68990 52403 21503 52331 51909 73488 91293 38437 40626
                 173 76990 17858 43446 25050 10791 68990 52403 21503
                 52331 51909 73488 91293 38437 40626 173 76990 17858
                 43446 25050 10791 68990 52403 21503 52331 51909 73488
                 91293 38437 40626 173 76990 17858 43446 25050 10791
                 68990 52403 21503 52331 51909 73488]))
"Elapsed time: 1630.157375 msecs"
60

零和负数似乎导致无限循环

在至少一个正数和至少一个非正数的情况下solution出现了进入死循环。我假设使用的数据是正整数。


一旦(arr 0)为一,结果为(count arr)。

我的直觉告诉我,通常其中一个元素会被减少为一个,导致大多数时候结果为 (count arr)。我的直觉还说这是一个无趣的功能,大多数情况下,但并非总是如此,return (count arr) 所以我可能是错的。

在结果为 (count arr) 的情况下,一旦 (= 1 (arr 0)) 退出将提供性能提升。

(defn s-1 [arr]
  (loop [state {:arr arr :sum 0}]
      (let [cur-arr (get state :arr) 
            trx (next-transformation cur-arr)]
        (cond (nil? trx) (* (count cur-arr)
                            (nth cur-arr 0 0))
              (= 1 (first cur-arr)) (count arr)
              :else (let [i (nth trx 0)
                          j (nth trx 1)
                          xi (nth cur-arr i)
                          xj (nth cur-arr j)
                          diff (- xi xj)]
                      (recur (assoc state :arr (assoc cur-arr
                                                      i
                                                      diff))))))))

时间:

user> (def solution s-1)
;; next-transformation has been restored to original
user> (time (solution [91293 38437 40626 173 76990 17858 43446 25050 10791
                 68990 52403 21503 52331 51909 73488 91293 38437 40626
                 173 76990 17858 43446 25050 10791 68990 52403 21503
                 52331 51909 73488 91293 38437 40626 173 76990 17858
                 43446 25050 10791 68990 52403 21503 52331 51909 73488
                 91293 38437 40626 173 76990 17858 43446 25050 10791
                 68990 52403 21503 52331 51909 73488]))
"Elapsed time: 9.182584 msecs"
60
#'user/next-transformation
user> (time (solution [91293 38437 40626 173 76990 17858 43446 25050 10791
                 68990 52403 21503 52331 51909 73488 91293 38437 40626
                 173 76990 17858 43446 25050 10791 68990 52403 21503
                 52331 51909 73488 91293 38437 40626 173 76990 17858
                 43446 25050 10791 68990 52403 21503 52331 51909 73488
                 91293 38437 40626 173 76990 17858 43446 25050 10791
                 68990 52403 21503 52331 51909 73488]))
"Elapsed time: 2.379 msecs"
60

根据我的喜好清理 solution

;; :sum is never used. So there is no reason
;; to have `state` with two entries.
;; (Not that there ever was:
;;    `(loop [arr arr sum 0]...)`)
;; would have been fine.
;; With only `arr` changing, we can
;; recur to the function top, without
;; `loop`
(defn s-2 [arr]
  (let [trx (next-transformation arr)]
    (cond (nil? trx) (* (count arr)
                        (nth arr 0 0)) ; Last `0` provides a default
          (= 1 (first arr)) (count arr)
          :else (let [[i j] trx ;destructuring
                      diff (- (arr i) (arr j))]
                  (recur (assoc arr i diff))))))

时间:

user> (def solution s-2)
#'user/solution
user> (time (solution [91293 38437 40626 173 76990 17858 43446 25050 10791
                 68990 52403 21503 52331 51909 73488 91293 38437 40626
                 173 76990 17858 43446 25050 10791 68990 52403 21503
                 52331 51909 73488 91293 38437 40626 173 76990 17858
                 43446 25050 10791 68990 52403 21503 52331 51909 73488
                 91293 38437 40626 173 76990 17858 43446 25050 10791
                 68990 52403 21503 52331 51909 73488]))
"Elapsed time: 2.116625 msecs"
60

使用一次 time 并不是很好的基准测试,但考虑到差异的大小,我相信在这种情况下它仍然是定向的。

您实施的这个算法实际上 f(data) = gcd(data) * length(data) 用于任何 data 正整数集合。

虽然您选择的递归减法算法不是最优的,但仍然可以改进,例如

(defn solve2 [data]
  (cond (some #(= 1 %) data) (count data)             ;; if any of numbers is 1, gcd is obviously 1
        (apply = data) (* (first data) (count data))  ;; if all of them are equal => gcd is found 
        :else (let [res (map #(reduce (fn [acc x]
                                        (if (< x acc)
                                          ;; the next line is identical to substracting x from acc while substraction result is > x
                                          (- acc (long (* x (dec (Math/ceil (/ acc x))))))
                                          acc))
                                      %
                                      data)
                             data)]
                ;; repeating until gcd is not found
                (recur res))))

user> (solve2 [4 8 2 6 10 12])
;;=> 12

user> (solve2 [4 8 2 6 10 21])
;;=> 6

或供您输入:

user> (time (solve2 [91293 38437 40626 173 76990 17858 43446 25050 10791
                     68990 52403 21503 52331 51909 73488 91293 38437 40626
                     173 76990 17858 43446 25050 10791 68990 52403 21503
                     52331 51909 73488 91293 38437 40626 173 76990 17858
                     43446 25050 10791 68990 52403 21503 52331 51909 73488
                     91293 38437 40626 173 76990 17858 43446 25050 10791
                     68990 52403 21503 52331 51909 73488]))
;; "Elapsed time: 1.518815 msecs"
;;=> 60

但实际上你需要的只是组成一些gcd函数并用它来减少输入。像这样:

;; binary gcd algorithm impl
(defn gcd
  ([m n] (gcd 1 m n))
  ([mul m n]
   (let [m (long m)
         n (long n)]
     (cond (zero? m) (* n mul)
           (zero? n) (* m mul)
           (== m n) (* m mul)
           (or (== 1 m) (== 1 n)) mul
           (and (even? m) (even? n)) (recur (* mul 2) (/ m 2.0) (/ n 2.0))
           (even? m) (recur mul (/ m 2.0) n)
           (even? n) (recur mul m (/ n 2.0))
           (> n m) (recur mul (/ (- n m) 2.0) m)
           :else (recur mul (/ (- m n) 2.0) n)))))

user> (reduce gcd [2 4 8 8 10 22])
;; 2
user> (reduce gcd [2 4 8 11 10 22])
;; 1

user> (time (reduce gcd [91293 38437 40626 173 76990 17858 43446 25050 10791
                         68990 52403 21503 52331 51909 73488 91293 38437 40626
                         173 76990 17858 43446 25050 10791 68990 52403 21503
                         52331 51909 73488 91293 38437 40626 173 76990 17858
                         43446 25050 10791 68990 52403 21503 52331 51909 73488
                         91293 38437 40626 173 76990 17858 43446 25050 10791
                         68990 52403 21503 52331 51909 73488]))
;;"Elapsed time: 0.079517 msecs"
;;=> 1

user> (time (let [data [91293 38437 40626 173 76990 17858 43446 25050 10791
                        68990 52403 21503 52331 51909 73488 91293 38437 40626
                        173 76990 17858 43446 25050 10791 68990 52403 21503
                        52331 51909 73488 91293 38437 40626 173 76990 17858
                        43446 25050 10791 68990 52403 21503 52331 51909 73488
                        91293 38437 40626 173 76990 17858 43446 25050 10791
                        68990 52403 21503 52331 51909 73488]]
              (* (count data) (reduce gcd data))))
;; "Elapsed time: 0.085898 msecs"
;;=> 60

正如在另一个答案中观察到的,目标似乎与最大公约数有关:

(defn gcd [x y]
  (if (zero? y)
    x
    (recur y (mod x y))))

(defn solution [arr]
  (* (count arr) (reduce gcd arr)))