解析csv文件时出现奇怪的输出
weird output when parsing csv file
我正在用 golang 解析来自 google api 的 csv 文件,该文件以 utf-16 编码,下面的代码尝试读取一条记录(跳过 header)并打印记录,但它给了我这样的输出,这很奇怪:
, v=/09/20 00:35:42 k=Smartfren Andromax AD681H
估计和utf-16编码有关,具体情况不详,代码如下:
主包
import (
"encoding/csv"
"io"
"log"
"net/http"
"strings"
)
var url = "http://storage.googleapis.com/play_public/supported_devices.csv"
func main() {
resp, err := http.Get(url)
if err != nil {
return
}
defer resp.Body.Close()
r := csv.NewReader(resp.Body)
r.LazyQuotes = true
r.FieldsPerRecord = -1
// skip header
r.Read()
m := make(map[string]string)
for {
record, err := r.Read()
if err == io.EOF {
break
}
if err != nil {
log.Println(err)
continue
}
if len(record) >= 4 {
m[strings.TrimSpace(record[3])] = strings.TrimSpace(record[1])
for k, v := range m {
log.Printf("k=%s, v=%s\n", k, v)
}
break
}
}
}
如您所料,输入数据必须从 UTF-16 编码字符流转换为 UTF-8 编码字符流。您可以使用 Go 子存储库包 golang.org/x/text/encoding/unicode
:
package main
import (
"encoding/csv"
"io"
"log"
"net/http"
"strings"
"golang.org/x/text/encoding/unicode"
)
var url = "http://storage.googleapis.com/play_public/supported_devices.csv"
func main() {
resp, err := http.Get(url)
if err != nil {
return
}
defer resp.Body.Close()
dec := unicode.UTF16(unicode.LittleEndian, unicode.UseBOM).NewDecoder()
reader := dec.Reader(resp.Body)
r := csv.NewReader(reader)
r.LazyQuotes = true
r.FieldsPerRecord = -1
// skip header
r.Read()
m := make(map[string]string)
for {
record, err := r.Read()
if err == io.EOF {
break
}
if err != nil {
log.Println(err)
continue
}
if len(record) >= 4 {
m[strings.TrimSpace(record[3])] = strings.TrimSpace(record[1])
for k, v := range m {
log.Printf("k=%s, v=%s\n", k, v)
}
break
}
}
}
我正在用 golang 解析来自 google api 的 csv 文件,该文件以 utf-16 编码,下面的代码尝试读取一条记录(跳过 header)并打印记录,但它给了我这样的输出,这很奇怪:
, v=/09/20 00:35:42 k=Smartfren Andromax AD681H
估计和utf-16编码有关,具体情况不详,代码如下: 主包
import (
"encoding/csv"
"io"
"log"
"net/http"
"strings"
)
var url = "http://storage.googleapis.com/play_public/supported_devices.csv"
func main() {
resp, err := http.Get(url)
if err != nil {
return
}
defer resp.Body.Close()
r := csv.NewReader(resp.Body)
r.LazyQuotes = true
r.FieldsPerRecord = -1
// skip header
r.Read()
m := make(map[string]string)
for {
record, err := r.Read()
if err == io.EOF {
break
}
if err != nil {
log.Println(err)
continue
}
if len(record) >= 4 {
m[strings.TrimSpace(record[3])] = strings.TrimSpace(record[1])
for k, v := range m {
log.Printf("k=%s, v=%s\n", k, v)
}
break
}
}
}
如您所料,输入数据必须从 UTF-16 编码字符流转换为 UTF-8 编码字符流。您可以使用 Go 子存储库包 golang.org/x/text/encoding/unicode
:
package main
import (
"encoding/csv"
"io"
"log"
"net/http"
"strings"
"golang.org/x/text/encoding/unicode"
)
var url = "http://storage.googleapis.com/play_public/supported_devices.csv"
func main() {
resp, err := http.Get(url)
if err != nil {
return
}
defer resp.Body.Close()
dec := unicode.UTF16(unicode.LittleEndian, unicode.UseBOM).NewDecoder()
reader := dec.Reader(resp.Body)
r := csv.NewReader(reader)
r.LazyQuotes = true
r.FieldsPerRecord = -1
// skip header
r.Read()
m := make(map[string]string)
for {
record, err := r.Read()
if err == io.EOF {
break
}
if err != nil {
log.Println(err)
continue
}
if len(record) >= 4 {
m[strings.TrimSpace(record[3])] = strings.TrimSpace(record[1])
for k, v := range m {
log.Printf("k=%s, v=%s\n", k, v)
}
break
}
}
}