尝试并行化的速度不够快

Attempt to parallelize not fast enough

我了解了 Go 的并发模型,也了解了并发和并行之间的区别。为了测试并行执行,我写了下面的程序

package main

import (
    "fmt"
    "runtime"
    "time"
)

const count = 1e8

var buffer [count]int

func main() {
    fmt.Println("GOMAXPROCS: ", runtime.GOMAXPROCS(0))

    // Initialise with dummy value
    for i := 0; i < count; i++ {
        buffer[i] = 3
    }

    // Sequential operation
    now := time.Now()
    worker(0, count-1)
    fmt.Println("sequential operation: ", time.Since(now))

    // Attempt to parallelize
    ch := make(chan int, 1)
    now = time.Now()
    go func() {
        worker(0, (count/2)-1)
        ch <- 1
    }()
    worker(count/2, count-1)
    <-ch
    fmt.Println("parallel operation: ", time.Since(now))
}

func worker(start int, end int) {
    for i := start; i <= end; i++ {
        task(i)
    }
}

func task(index int) {
    buffer[index] = 2 * buffer[index]
}

但问题是:结果不是很令人满意。

GOMAXPROCS:  8
sequential operation:  206.85ms
parallel operation:  169.028ms

使用 goroutine 确实可以加快速度,但还不够。我预计它会接近两倍的速度。我对代码 and/or 的理解有什么问题?我怎样才能接近两倍的速度?

并行化很强大,但是这么小的计算量很难看出来。这是一些示例代码,结果差异较大:

package main

import (
    "fmt"
    "math"
    "runtime"
    "time"
)

func calctest(nCPU int) {
    fmt.Println("Routines:", nCPU)
    ch := make(chan float64, nCPU)
    startTime := time.Now()
    a := 0.0
    b := 1.0
    n := 100000.0
    deltax := (b - a) / n

    stepPerCPU := n / float64(nCPU)
    for start := 0.0; start < n; {
        stop := start + stepPerCPU
        go f(start, stop, a, deltax, ch)
        start = stop
    }

    integral := 0.0
    for i := 0; i < nCPU; i++ {
        integral += <-ch
    }

    fmt.Println(time.Now().Sub(startTime))
    fmt.Println(deltax * integral)
}

func f(start, stop, a, deltax float64, ch chan float64) {
    result := 0.0
    for i := start; i < stop; i++ {
        result += math.Sqrt(a + deltax*(i+0.5))
    }
    ch <- result
}

func main() {
    nCPU := runtime.NumCPU()
    calctest(nCPU)
    fmt.Println("")
    calctest(1)
}

这是我得到的结果:

Routines: 8
853.181µs

Routines: 1
2.031358ms