使用 Go 的 archive/zip 创建带有 Unicode 文件名的 zip 存档

Creating a zip archive with Unicode filenames using Go's archive/zip

package main

import (
    "archive/zip"
    "fmt"
    "io"
    "os"
    "path/filepath"
    "strings"
)

func main() {
    var (
        Path = os.Args[1]
        Name = os.Args[2]
    )

    File, _ := os.Create(Name)
    PS := strings.Split(Path, "\")
    PathName := strings.Join(PS[:len(PS)-1], "\")
    os.Chdir(PathName)
    Path = PS[len(PS)-1]
    defer File.Close()
    Zip := zip.NewWriter(File)
    defer Zip.Close()
    walk := func(Path string, info os.FileInfo, err error) error {
        if err != nil {
            fmt.Println(err)
            return err
        }
        if info.IsDir() {
            return nil
        }
        Src, _ := os.Open(Path)
        defer Src.Close()
        fmt.Println(Path)
        FileName, _ := Zip.Create(Path)
        io.Copy(FileName, Src)
        Zip.Flush()
        return nil
    }
    if err := filepath.Walk(Path, walk); err != nil {
        fmt.Println(err)
    }
}

这个 mydir 路径:

-----root
    |---2015-05(dir)
         |---中文.go
    |---package(dir)
    |---你好.go

我用这个代码目录的时候,中文会乱码。谁能帮我解决问题。

问题是默认情况下,在 zip 条目名称中,Zip specification, more specifically: (Source: APPENDIX D)

只允许使用 ASCII 字符

APPENDIX D.1 The ZIP format has historically supported only the original IBM PC character encoding set, commonly referred to as IBM Code Page 437. This limits storing file name characters to only those within the original MS-DOS range of values and does not properly support file names in other character encodings, or languages. To address this limitation, this specification will support the following change.

后来添加了对 Unicode 名称的支持。这可以用称为 general purpose bit 11 的特殊位标记,也称为 Language encoding flag (EFS):

Section 4.4.4 - General purpose bit flag - Bit 11 - Language encoding flag (EFS). If this bit is set, the filename and comment fields for this file MUST be encoded using UTF-8.

APPENDIX D.2 If general purpose bit 11 is unset, the file name and comment should conform to the original ZIP character encoding. If general purpose bit 11 is set, the filename and comment must support The Unicode Standard, Version 4.1.0 or greater using the character encoding form defined by the UTF-8 storage specification. The Unicode Standard is published by the The Unicode Consortium (www.unicode.org). UTF-8 encoded data stored within ZIP files is expected to not include a byte order mark (BOM).

general purpose bit flag 存在并受 Go 支持:它是 FileHeader 结构的 Flags 字段。不幸的是 Go 没有设置这个位的方法,默认情况下它是 0.

因此,添加对 Unicode 名称支持的最简单方法就是将 bit 11 设置为一个。而不是

FileName, _ := Zip.Create(Path)

以以下方式开始您的 zip 条目:

h := &zip.FileHeader{Name:Path, Method: zip.Deflate, Flags: 0x800}
FileName, _ := Zip.CreateHeader(h)

第一行创建一个 FileHeader,其中为 Flags 字段设置了 0x800 (bit 11) 值,表明文件名将使用UTF-8(这是 Go 在将 string 写入 io.Writer 时所做的)。

注:

通过这样做,UTF-8 文件名将被保留,但并非所有 zip reader/extractor 都支持它。例如,在 Windows、windows 文件处理程序上,Windows Explorer 不会将其解码为 UTF-8,而是更严格的 Zip 处理程序(例如 SecureZip)将看到 UTF-8 文件名并正确提取文件名(使用 UTF-8 解码)。

package main

import (
    "archive/zip"
    "fmt"
    "io"
    "os"
    "path/filepath"
    "strings"
)

func main() {
    var (
        Path = os.Args[1]
        Name = os.Args[2]
    )

    File, _ := os.Create(Name)
    PS := strings.Split(Path, "\")
    PathName := strings.Join(PS[:len(PS)-1], "\")
    os.Chdir(PathName)
    Path = PS[len(PS)-1]
    defer File.Close()
    Zip := zip.NewWriter(File)
    defer Zip.Close()
    walk := func(Path string, info os.FileInfo, err error) error {
        if err != nil {
            fmt.Println(err)
            return err
        }
        if info.IsDir() {
            return nil
        }
        Src, _ := os.Open(Path)
        defer Src.Close()
        //FileName, _ := Zip.Create(Path)
        h := &zip.FileHeader{Name: Path, Method: zip.Deflate, Flags: 0x800}
        FileName, _ := Zip.CreateHeader(h)
        io.Copy(FileName, Src)
        Zip.Flush()
        return nil
    }
    if err := filepath.Walk(Path, walk); err != nil {
        fmt.Println(err)
    }
}