time.Now() 在 goroutine 中的意外行为

Unexpected behaviour of time.Now() in goroutine

作为尝试熟悉 Go 的一种方式,我正在尝试构建一个(完全不可靠的)随机数生成器。这个想法是对某些 url 发出 100 个 GET 请求,对结果进行处理并产生一个“随机”数字。
我很想知道在工作组中使用 goroutines 执行请求时代码是否会 运行 更快。答案似乎是肯定的,但是当打印出各个请求的计时结果时,goroutine 调用的计时显示出一个有趣的结果。
GET 请求的顺序计时(以微秒为单位):
[25007 30502 25594 40417 31505 18502 20503 19034 19473 18001 36507 25004 28005 19004 20502 20503 20503 20504 20002 19003 20511 18494 20003 21004 20003 20502 20504 19002 19004 21506 29501 30005 31005 21504 20054 22452 19503 19503 20003 19503 21004 18501 18003 20003 20003 19003 19503 20003 23504 18003 20003 19503 19502 19003 20003 20003 20040 21010 18959 20503 34251 27260 30504 25004 22004 20502 20003 19503 20502 20504 19503 22003 19003 19003 20003 20002 18003 19503 19003 18503 20504 18552 18953 18002 20003 19004 21002 18503 20503 19503 20504 20003 20003 21003 46050 19504 18503 19503 19503 19002]

GET 请求的 Goroutine 计时(以微秒为单位):
[104518 134570 157528 187533 193535 193535 208036 211041 220039 220242 252044 252044 258045 258045 258045 258045 271047 282050 282050 282050 286050 287050 289051 296052 297552 300052 300678 305553 307053 308054 310556 311069 312055 312555 324056 329558 334559 339559 346061 353562 360563 369564 375065 377566 384067 393569 397069 402570 410072 416572 420573 425574 431076 437576 443078 446577 453579 458580 465081 474583 480584 488085 496122 505588 510589 515590 520591 526592 533593 538596 544595 549596 555097 563098 569600 575100 584101 589604 595604 604106 610606 620609 634111 640611 645613 653119 656616 663116 669117 674118 681119 696122 709123 723627 735629 747631 757632 769635 779137 785139]
goroutine 调用的时间是递增的,而常规的顺序时间是预期的。我怀疑这可能与 time.now() 对所有 gorotines 进行一次评估有关,但改组调用并没有改变结果。
这是我到目前为止所拥有的,我知道熵不是衡量随机性的好方法,但由于某些原因我还是将它包括在内:)
首先,goroutines 是 运行,接下来是顺序版本 运行。最后,打印计时和其他一些内容。

package main

import (
    "fmt"
    "log"
    "math/rand"
    "net/http"
    "sync"
    "time"

    "github.com/montanaflynn/stats"
)

func doGet(address string, channel chan int, wg *sync.WaitGroup) {
    // do get request in a workgroup
    // defer wg signal
    defer wg.Done()
    // start time
    startTime := time.Now()
    _, err := http.Get(address)
    if err != nil {
        log.Fatalln(err)
    }
    // get time since start
    delta := int(time.Since(startTime).Microseconds())
    channel <- delta
}

func doGetNoWg(address string) int {
    // do get request without a workgroup/channel
    start := time.Now()
    _, err := http.Get(address)
    if err != nil {
        log.Fatalln(err)
    }
    return int(time.Since(start).Microseconds())
}

func main() {
    var wg sync.WaitGroup
    // initialize arrays for the timings
    var timings_parallel [100]int
    var timings_sequential [100]int
    // get a small uniform set for comparison of entropy
    zeroes := []int{1, 1, 1}
    // get a random set for comparison of entropy
    var randnrs [100]int
    for i := 0; i < len(randnrs); i++ {
        randnrs[i] = rand.Intn(250)
    }
    // start
    start := time.Now()
    ch := make(chan int, 100)
    url := "https://www.nu.nl"
    wg.Add(100)
    for i, _ := range timings_parallel {
        // can this be done without dummy assignemnt or explicit counters?
        i = i
        go doGet(url, ch, &wg)
    }
    wg.Wait()
    close(ch)
    // feed the results from the channel into the result array
    count := 0
    for ret := range ch {
        timings_parallel[count] = ret
        count++
    }
    // get total running time for this part
    time_parallel := time.Since(start).Milliseconds()

    // start of the sequential part
    start = time.Now()
    for i, _ := range timings_sequential {
        timings_sequential[i] = doGetNoWg(url)
    }
    // end sequential part. Why was I using goroutines again? :P
    time_sequential := time.Since(start).Milliseconds()
    
    // calculate entropy
    entropy, _ := stats.Entropy(stats.LoadRawData(timings_parallel[:]))
    entropy_equal, _ := stats.Entropy(stats.LoadRawData(zeroes[:]))
    entropy_random, _ := stats.Entropy(stats.LoadRawData(randnrs[:]))

    // print out stuffs
    fmt.Print("Parallel: ")
    fmt.Printf("%v\n", timings_parallel)
    fmt.Print("Sequential: ")
    fmt.Printf("%v\n", timings_sequential)
    fmt.Printf("Entropy equal: %v\n", entropy_equal)
    fmt.Printf("Entropy random: %v\n", entropy_random)
    fmt.Printf("Entropy: %v\n", entropy)
    fmt.Printf("Time elapsed parallel: %v\n", time_parallel)
    fmt.Printf("Time elapsed sequential: %v", time_sequential)

}

示例输出(没有计时数组):

Entropy equal: 1.0986122886681096
Entropy random: 4.39737296171013
Entropy: 4.527705829831552
Time elapsed parallel: 786
Time elapsed sequential: 2160

所以 goroutines 部分似乎快了很多,而个别时间似乎高得多。有没有人知道如何正确安排时间(或者为什么他们被期望如此)?

=====更新
goroutines 的最后时间几乎总是等于或比 Time elapsed parallel

中测量的总时间低一毫秒

=====更新2 问题似乎是第一次调用 time.Now() 总是产生相同的时间,而第二个 time.Now() 工作正常。至少这解释了结果:

GOstart: 2022-04-05 18:47:06.3117452 +0200 CEST m=+0.004000601
GOstop: 2022-04-05 18:47:06.4736105 +0200 CEST m=+0.165865901
GOstart: 2022-04-05 18:47:06.3117452 +0200 CEST m=+0.004000601
GOstop: 2022-04-05 18:47:06.4736105 +0200 CEST m=+0.165865901
...
GOstart: 2022-04-05 18:47:06.3117452 +0200 CEST m=+0.004000601
GOstop: 2022-04-05 18:47:06.6234215 +0200 CEST m=+0.315676901

导致此行为的原因在于 Go 的调度程序(shorter version of this question at golang-nuts)。上面的 goroutines 都在同一时间点开始执行(如计时所示,加上检查 startTime 变量的内存位置证明时间对象不是“回收”),但一旦它们命中 http.Get ().计时是递增的,因为 http.Get() 造成了瓶颈,不允许并发执行生成的 goroutine 数量。似乎这里使用了某种 FIFO 队列。
推荐观看和阅读:
Explaining the Golang I/O multiplexing netpoller model
Queues, Fairness and the Go scheduler

研究等待组的大小,我发现一些值显示出更加一致的计时(而不是增量计时)。所以我想知道等待组大小对总时间和个人时间的影响是什么。我将上面重构为一个程序,该程序在给定范围内对每个 waitgroupsize 进行多次实验,并坚持每个 运行 sqlite 数据库的总计时和单独计时。生成的数据集可以很容易地用于例如。一个 Jupyter 笔记本。不幸的是,在当前设置下,我只能获得大约 40K 的请求,然后才会受到限制。如果您有兴趣但不想等待数据,请参阅 my github 了解某些数据集,因为它需要很长时间才能完成。有趣的结果是,小 wg 大小的比率 concurrent/sequential 急剧下降,你会看到最后连接开始受到限制。此 运行 当时已手动中止。
并发 运行ning 时间/顺序 运行ning 时间与等待组大小:

不同等待组大小的个别计时图。

package main

import (
    "database/sql"
    "fmt"
    "log"
    "net/http"
    "os"
    "path/filepath"
    "runtime"
    "sync"
    "time"

    _ "github.com/mattn/go-sqlite3"
)

///// global vars
const REQUESTS int = 100           // Single run size, performed two times (concurrent and sequential)
const URL string = "SET_YOUR_OWN" // Some file on a CDN somewhere; used for the GET requests
const DBNAME string = "netRand.db" // Name of the db file. Saved next to the executable
const WGMIN int = 1                // Start range for waitgroup size (inclusive)
const WGMAX int = 101              // Stop range for waitgroup size (exclusive)
const NREPEAT int = 10             // Number of times to repeat a run for a specific waitgroup size

//// types
type timingResult struct {
    // Container for collecting results before persisting to DB
    WaitgroupSize       int
    ConcurrentTimingsMs [REQUESTS]int64
    ConcurrentTotalMs   int64
    SequentialTimingsMs [REQUESTS]int64
    SequentialTotalMs   int64
}

//// main
func main() {
    db := setupDb()
    defer db.Close()
    for i := WGMIN; i < WGMAX; i++ {
        // waitgroup size range
        for j := 0; j < NREPEAT; j++ {
            // repeat for more data points
            timings := requestTimes(i)
            persistTimings(timings, db)
            fmt.Printf("\n======== %v of %v ============\n", j+1, NREPEAT)
            fmt.Printf("current waitgroup size: %v\n", i)
            fmt.Printf("max waitgroup size: %v\n", WGMAX-1)
        }
    }

}

func requestTimes(waitgroupSize int) timingResult {
    // do NTIMES requests in go routines with waitgroupSize
    // do NTIMES requests sequentially

    timings_concurrent, total_concurrent := concurrentRequests(waitgroupSize)
    timings_sequential, total_sequential := sequentialRequests()

    return timingResult{
        WaitgroupSize:       waitgroupSize,
        ConcurrentTimingsMs: timings_concurrent,
        ConcurrentTotalMs:   total_concurrent,
        SequentialTimingsMs: timings_sequential,
        SequentialTotalMs:   total_sequential,
    }

}
func persistTimings(timings timingResult, db *sql.DB) {
    persistRun(timings, db)
    currentRunId := getCurrentRunId(db)
    persistConcurrentTimings(currentRunId, timings, db)
    persistSequentialTimings(currentRunId, timings, db)
}
func concurrentRequests(waitgroupSize int) ([REQUESTS]int64, int64) {
    start := time.Now()

    var wg sync.WaitGroup
    var timings [REQUESTS]int64
    ch := make(chan int64, REQUESTS)

    for i := range timings {
        wg.Add(1)
        go func() {
            defer wg.Done()
            doGetChannel(URL, ch)
        }()
        // waitgroupsize is controlled using modulo
        // making sure experiment size is always NTIMES
        // independent of waitgroupsize
        if i%waitgroupSize == 0 {
            wg.Wait()
        }
    }
    wg.Wait()
    close(ch)

    count := 0
    for ret := range ch {
        timings[count] = ret
        count++
    }

    return timings, time.Since(start).Milliseconds()
}
func doGetChannel(address string, channel chan int64) {
    // time get request and send to channel
    startSub := time.Now().UnixMilli()
    _, err := http.Get(address)
    if err != nil {
        log.Fatalln(err)
    }
    stopSub := time.Now().UnixMilli()
    delta := stopSub - startSub
    channel <- delta
}
func sequentialRequests() ([REQUESTS]int64, int64) {
    startGo := time.Now()
    var timings_sequential [REQUESTS]int64
    for i := range timings_sequential {
        timings_sequential[i] = doGetReturn(URL)
    }
    return timings_sequential, time.Since(startGo).Milliseconds()
}
func doGetReturn(address string) int64 {
    // time get request without a waitgroup/channel
    start := time.Now()
    _, err := http.Get(address)
    if err != nil {
        log.Fatalln(err)
    }
    duration := time.Since(start).Milliseconds()
    return duration
}

//// DB
func setupDb() *sql.DB {
    //      __________________________runs____________________
    //     |                                                  |
    // concurrent_timings(fk: run_id)         sequential_timings(fk: run_id)
    //
    const createRuns string = `
    CREATE TABLE IF NOT EXISTS runs (
    run_id INTEGER NOT NULL PRIMARY KEY,
    time TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP,
    waitgroup_size INTEGER,
    concurrent_total_ms INTEGER,
    sequential_total_ms INTEGER,
    concurrent_sequential_ratio REAL
    );`

    const createSequentialTimings string = `
    CREATE TABLE IF NOT EXISTS sequential_timings (
    run INTEGER,
    call_number INTEGER,
    timing_ms INTEGER,
    FOREIGN KEY(run) REFERENCES runs(run_id)
    );`

    const createConcurrentTimings string = `
    CREATE TABLE IF NOT EXISTS concurrent_timings (
    run INTEGER,
    channel_position INTEGER,
    timing_ms INTEGER,
    FOREIGN KEY(run) REFERENCES runs(run_id)
    );`
    // retrieve platform appropriate connection string
    dbString := getConnectionString(DBNAME)
    db, err := sql.Open("sqlite3", dbString)
    if err != nil {
        log.Fatalln(err)
    }
    if _, err := db.Exec(createRuns); err != nil {
        log.Fatalln(err)
    }
    if _, err := db.Exec(createSequentialTimings); err != nil {
        log.Fatalln(err)
    }
    if _, err := db.Exec(createConcurrentTimings); err != nil {
        log.Fatalln(err)
    }
    return db
}
func getConnectionString(dbName string) string {
    // Generate platform appropriate connection string
    // the db is placed in the same directory as the current executable

    // retrieve the path to the currently executed executable
    ex, err := os.Executable()
    if err != nil {
        panic(err)
    }
    // retrieve path to containing dir
    dbDir := filepath.Dir(ex)

    // Append platform appropriate separator and dbName
    if runtime.GOOS == "windows" {

        dbDir = dbDir + "\" + dbName

    } else {
        dbDir = dbDir + "/" + dbName
    }
    return dbDir
}
func persistRun(timings timingResult, db *sql.DB) {
    tx, err := db.Begin()
    if err != nil {
        log.Fatalln(err)
    }

    insertRun, err := db.Prepare(`INSERT INTO runs(
        waitgroup_size, 
        sequential_total_ms, 
        concurrent_total_ms, 
        concurrent_sequential_ratio) 
        VALUES(?, ?, ?, ?)`)

    if err != nil {
        log.Fatalln(err)
    }
    defer tx.Stmt(insertRun).Close()
    _, err = tx.Stmt(insertRun).Exec(
        timings.WaitgroupSize,
        timings.SequentialTotalMs,
        timings.ConcurrentTotalMs,
        float32(timings.ConcurrentTotalMs)/float32(timings.SequentialTotalMs),
    )
    if err != nil {
        log.Fatalln(err)
    }
    err = tx.Commit()

    if err != nil {
        log.Fatalln(err)
    }
}

func getCurrentRunId(db *sql.DB) int {
    rows, err := db.Query("SELECT MAX(run_id) FROM runs")
    if err != nil {
        log.Fatal(err)
    }
    var run_id int
    for rows.Next() {
        err = rows.Scan(&run_id)
        if err != nil {
            log.Fatalln(err)
        }
    }
    rows.Close()
    return run_id
}
func persistConcurrentTimings(runId int, timings timingResult, db *sql.DB) {
    tx, err := db.Begin()
    if err != nil {
        log.Fatalln(err)
    }

    insertTiming, err := db.Prepare(`INSERT INTO concurrent_timings(
        run, 
        channel_position, 
        timing_ms) 
        VALUES(?, ?, ?)`)

    if err != nil {
        log.Fatalln(err)
    }
    for i, timing := range timings.ConcurrentTimingsMs {
        _, err = tx.Stmt(insertTiming).Exec(
            runId,
            i,
            timing,
        )
        if err != nil {
            log.Fatalln(err)
        }
    }

    err = tx.Commit()

    if err != nil {
        log.Fatalln(err)
    }
}
func persistSequentialTimings(runId int, timings timingResult, db *sql.DB) {
    tx, err := db.Begin()
    if err != nil {
        log.Fatalln(err)
    }

    insertTiming, err := db.Prepare(`INSERT INTO sequential_timings(
        run, 
        call_number, 
        timing_ms) 
        VALUES(?, ?, ?)`)

    if err != nil {
        log.Fatalln(err)
    }
    for i, timing := range timings.SequentialTimingsMs {
        _, err = tx.Stmt(insertTiming).Exec(
            runId,
            i,
            timing,
        )
        if err != nil {
            log.Fatalln(err)
        }
    }

    err = tx.Commit()

    if err != nil {
        log.Fatalln(err)
    }
}