Go 中的切片分块
Slice chunking in Go
我有一个包含约 210 万个日志字符串的切片,我想创建一个切片,其中的字符串尽可能均匀分布。
这是我目前的情况:
// logs is a slice with ~2.1 million strings in it.
var divided = make([][]string, 0)
NumCPU := runtime.NumCPU()
ChunkSize := len(logs) / NumCPU
for i := 0; i < NumCPU; i++ {
temp := make([]string, 0)
idx := i * ChunkSize
end := i * ChunkSize + ChunkSize
for x := range logs[idx:end] {
temp = append(temp, logs[x])
}
if i == NumCPU {
for x := range logs[idx:] {
temp = append(temp, logs[x])
}
}
divided = append(divided, temp)
}
idx := i * ChunkSize
会给我 logs
索引的当前 "chunk start",end := i * ChunkSize + ChunkSize
会给我 "chunk end",或者结束该块的范围。我找不到任何关于如何在 Go 中 chunk/split 切片或迭代有限范围的文档或示例,所以这就是我想出的。但是,它只是多次复制第一个块,所以它不起作用。
我如何(尽可能均匀地)在 Go 中对切片进行分块?
您不需要制作新切片,只需将 logs
的切片附加到 divided
切片即可。
http://play.golang.org/p/vyihJZlDVy
var divided [][]string
chunkSize := (len(logs) + numCPU - 1) / numCPU
for i := 0; i < len(logs); i += chunkSize {
end := i + chunkSize
if end > len(logs) {
end = len(logs)
}
divided = append(divided, logs[i:end])
}
fmt.Printf("%#v\n", divided)
另一种变体。它的工作速度比 JimB. The tests and benchmarks are here.
提出的快 2.5 倍
https://play.golang.org/p/WoXHqGjozMI
func chunks(xs []string, chunkSize int) [][]string {
if len(xs) == 0 {
return nil
}
divided := make([][]string, (len(xs)+chunkSize-1)/chunkSize)
prev := 0
i := 0
till := len(xs) - chunkSize
for prev < till {
next := prev + chunkSize
divided[i] = xs[prev:next]
prev = next
i++
}
divided[i] = xs[prev:]
return divided
}
对任何 []T
使用反射
https://github.com/kirito41dd/xslice
package main
import (
"fmt"
"github.com/kirito41dd/xslice"
)
func main() {
s := []int{0, 1, 2, 3, 4, 5, 6, 7, 8, 9}
i := xslice.SplitToChunks(s, 3)
ss := i.([][]int)
fmt.Println(ss) // [[0 1 2] [3 4 5] [6 7 8] [9]]
}
https://github.com/kirito41dd/xslice/blob/e50d91fa75241a3a03d262ad51c8e4cb2ea4b995/split.go#L12
func SplitToChunks(slice interface{}, chunkSize int) interface{} {
sliceType := reflect.TypeOf(slice)
sliceVal := reflect.ValueOf(slice)
length := sliceVal.Len()
if sliceType.Kind() != reflect.Slice {
panic("parameter must be []T")
}
n := 0
if length%chunkSize > 0 {
n = 1
}
SST := reflect.MakeSlice(reflect.SliceOf(sliceType), 0, length/chunkSize+n)
st, ed := 0, 0
for st < length {
ed = st + chunkSize
if ed > length {
ed = length
}
SST = reflect.Append(SST, sliceVal.Slice(st, ed))
st = ed
}
return SST.Interface()
}
func chunkSlice(items []int32, chunkSize int32) (chunks [][]int32) {
//While there are more items remaining than chunkSize...
for chunkSize < int32(len(items)) {
//We take a slice of size chunkSize from the items array and append it to the new array
chunks = append(chunks, items[0:chunkSize])
//Then we remove those elements from the items array
items = items[chunkSize:]
}
//Finally we append the remaining items to the new array and return it
return append(chunks, items)
}
视觉示例
假设我们要将一个数组分成 3 个块
items: [1,2,3,4,5,6,7]
chunks: []
items: [1,2,3,4,5,6,7]
chunks: [[1,2,3]]
items: [4,5,6,7]
chunks: [[1,2,3]]
items: [4,5,6,7]
chunks: [[1,2,3],[4,5,6]]
items: [7]
chunks: [[1,2,3],[4,5,6]]
items: [7]
chunks: [[1,2,3],[4,5,6],[7]]
return
以最小分配进行批处理
Useful if you want to do batch processing on large slices.
actions := []int{0, 1, 2, 3, 4, 5, 6, 7, 8, 9}
batchSize := 3
batches := make([][]int, 0, (len(actions) + batchSize - 1) / batchSize)
for batchSize < len(actions) {
actions, batches = actions[batchSize:], append(batches, actions[0:batchSize:batchSize])
}
batches = append(batches, actions)
产生以下结果:
[[0 1 2] [3 4 5] [6 7 8] [9]]
总结:
// ChunkStringSlice divides []string into chunks of chunkSize.
func ChunkStringSlice(s []string, chunkSize int) [][]string {
chunkNum := int(math.Ceil(float64(len(s)) / float64(chunkSize)))
res := make([][]string, 0, chunkNum)
for i := 0; i < chunkNum-1; i++ {
res = append(res, s[i*chunkSize:(i+1)*chunkSize])
}
res = append(res, s[(chunkNum-1)*chunkSize:])
return res
}
// ChunkStringSlice2 divides []string into chunkNum chunks.
func ChunkStringSlice2(s []string, chunkNum int) [][]string {
res := make([][]string, 0, chunkNum)
chunkSize := int(math.Ceil(float64(len(s)) / float64(chunkNum)))
for i := 0; i < chunkNum-1; i++ {
res = append(res, s[i*chunkSize:(i+1)*chunkSize])
}
res = append(res, s[(chunkNum-1)*chunkSize:])
return res
}
使用泛型(Go 版本 >=1.18):
func chunkBy[T any](items []T, chunkSize int) (chunks [][]T) {
var _chunks = make([][]T, 0, (len(items)/chunkSize)+1)
for chunkSize < len(items) {
items, _chunks = items[chunkSize:], append(_chunks, items[0:chunkSize:chunkSize])
}
return append(_chunks, items)
}
我有一个包含约 210 万个日志字符串的切片,我想创建一个切片,其中的字符串尽可能均匀分布。
这是我目前的情况:
// logs is a slice with ~2.1 million strings in it.
var divided = make([][]string, 0)
NumCPU := runtime.NumCPU()
ChunkSize := len(logs) / NumCPU
for i := 0; i < NumCPU; i++ {
temp := make([]string, 0)
idx := i * ChunkSize
end := i * ChunkSize + ChunkSize
for x := range logs[idx:end] {
temp = append(temp, logs[x])
}
if i == NumCPU {
for x := range logs[idx:] {
temp = append(temp, logs[x])
}
}
divided = append(divided, temp)
}
idx := i * ChunkSize
会给我 logs
索引的当前 "chunk start",end := i * ChunkSize + ChunkSize
会给我 "chunk end",或者结束该块的范围。我找不到任何关于如何在 Go 中 chunk/split 切片或迭代有限范围的文档或示例,所以这就是我想出的。但是,它只是多次复制第一个块,所以它不起作用。
我如何(尽可能均匀地)在 Go 中对切片进行分块?
您不需要制作新切片,只需将 logs
的切片附加到 divided
切片即可。
http://play.golang.org/p/vyihJZlDVy
var divided [][]string
chunkSize := (len(logs) + numCPU - 1) / numCPU
for i := 0; i < len(logs); i += chunkSize {
end := i + chunkSize
if end > len(logs) {
end = len(logs)
}
divided = append(divided, logs[i:end])
}
fmt.Printf("%#v\n", divided)
另一种变体。它的工作速度比 JimB. The tests and benchmarks are here.
提出的快 2.5 倍https://play.golang.org/p/WoXHqGjozMI
func chunks(xs []string, chunkSize int) [][]string {
if len(xs) == 0 {
return nil
}
divided := make([][]string, (len(xs)+chunkSize-1)/chunkSize)
prev := 0
i := 0
till := len(xs) - chunkSize
for prev < till {
next := prev + chunkSize
divided[i] = xs[prev:next]
prev = next
i++
}
divided[i] = xs[prev:]
return divided
}
对任何 []T
使用反射https://github.com/kirito41dd/xslice
package main
import (
"fmt"
"github.com/kirito41dd/xslice"
)
func main() {
s := []int{0, 1, 2, 3, 4, 5, 6, 7, 8, 9}
i := xslice.SplitToChunks(s, 3)
ss := i.([][]int)
fmt.Println(ss) // [[0 1 2] [3 4 5] [6 7 8] [9]]
}
https://github.com/kirito41dd/xslice/blob/e50d91fa75241a3a03d262ad51c8e4cb2ea4b995/split.go#L12
func SplitToChunks(slice interface{}, chunkSize int) interface{} {
sliceType := reflect.TypeOf(slice)
sliceVal := reflect.ValueOf(slice)
length := sliceVal.Len()
if sliceType.Kind() != reflect.Slice {
panic("parameter must be []T")
}
n := 0
if length%chunkSize > 0 {
n = 1
}
SST := reflect.MakeSlice(reflect.SliceOf(sliceType), 0, length/chunkSize+n)
st, ed := 0, 0
for st < length {
ed = st + chunkSize
if ed > length {
ed = length
}
SST = reflect.Append(SST, sliceVal.Slice(st, ed))
st = ed
}
return SST.Interface()
}
func chunkSlice(items []int32, chunkSize int32) (chunks [][]int32) {
//While there are more items remaining than chunkSize...
for chunkSize < int32(len(items)) {
//We take a slice of size chunkSize from the items array and append it to the new array
chunks = append(chunks, items[0:chunkSize])
//Then we remove those elements from the items array
items = items[chunkSize:]
}
//Finally we append the remaining items to the new array and return it
return append(chunks, items)
}
视觉示例
假设我们要将一个数组分成 3 个块
items: [1,2,3,4,5,6,7]
chunks: []
items: [1,2,3,4,5,6,7]
chunks: [[1,2,3]]
items: [4,5,6,7]
chunks: [[1,2,3]]
items: [4,5,6,7]
chunks: [[1,2,3],[4,5,6]]
items: [7]
chunks: [[1,2,3],[4,5,6]]
items: [7]
chunks: [[1,2,3],[4,5,6],[7]]
return
以最小分配进行批处理
Useful if you want to do batch processing on large slices.
actions := []int{0, 1, 2, 3, 4, 5, 6, 7, 8, 9}
batchSize := 3
batches := make([][]int, 0, (len(actions) + batchSize - 1) / batchSize)
for batchSize < len(actions) {
actions, batches = actions[batchSize:], append(batches, actions[0:batchSize:batchSize])
}
batches = append(batches, actions)
产生以下结果:
[[0 1 2] [3 4 5] [6 7 8] [9]]
总结:
// ChunkStringSlice divides []string into chunks of chunkSize.
func ChunkStringSlice(s []string, chunkSize int) [][]string {
chunkNum := int(math.Ceil(float64(len(s)) / float64(chunkSize)))
res := make([][]string, 0, chunkNum)
for i := 0; i < chunkNum-1; i++ {
res = append(res, s[i*chunkSize:(i+1)*chunkSize])
}
res = append(res, s[(chunkNum-1)*chunkSize:])
return res
}
// ChunkStringSlice2 divides []string into chunkNum chunks.
func ChunkStringSlice2(s []string, chunkNum int) [][]string {
res := make([][]string, 0, chunkNum)
chunkSize := int(math.Ceil(float64(len(s)) / float64(chunkNum)))
for i := 0; i < chunkNum-1; i++ {
res = append(res, s[i*chunkSize:(i+1)*chunkSize])
}
res = append(res, s[(chunkNum-1)*chunkSize:])
return res
}
使用泛型(Go 版本 >=1.18):
func chunkBy[T any](items []T, chunkSize int) (chunks [][]T) {
var _chunks = make([][]T, 0, (len(items)/chunkSize)+1)
for chunkSize < len(items) {
items, _chunks = items[chunkSize:], append(_chunks, items[0:chunkSize:chunkSize])
}
return append(_chunks, items)
}