在 CSV 文件中传输二进制数据(图像等)
Transfer binary data (image etc) in a CSV file
除了一些存储为字符串、数字等的其他元数据信息外,我还有一些二进制数据要传输。我拥有的二进制数据是图像文件,作为数据库中的 blob 列存储,我想包括 blob 列在 csv 文件中并将 csv 文件存储在文件系统或 sftp 服务器中,我想它存储在哪里并不重要。
如何将二进制数据存储为 csv 文件中的另一列?这是以这种方式传输二进制数据的好习惯吗?
Base64
通常(也是正确的)方法是对 Base64 中的二进制数据进行编码。这将使数据变大 4:3.
倍
虽然 CSV 文件通常被认为是文本文件,但您 可以 将原始二进制数据写入文件。
然后该数据应该用双引号引起来,数据中所有现有的双引号都必须用另一个双引号转义。引用字段也将处理二进制数据中的任何换行符,但 reader 必须支持它。如果 reader 知道它正在读取二进制数据(即如果您自己提供 reader),那么里面也可能有空字节。
但是,如果您的数据必须采用某种 unicode 格式,则可能会出现问题..
因此,一般来说,将原始二进制数据写入 csv 文件并不是一个好的做法,最好使用 base64
编码。
如上 discusses, yes indeed you can include binary data in a CSV but you must encode that data in a text-and-CSV-friendly manner. The obvious choice for such text-and-CSV-friendly is Base64,如前所述。
示例应用程序
这是示例应用程序的完整源代码。
此应用程序从 Internet 下载几张图片。即,来自姊妹网站 whosebug.com 和 StackExchange.com 的徽标。下载后,这些图像被编码为 Base64 字符串。
然后写入一个2列的CSV文件。列是名称和图像数据。对于此示例,我们有 2 行,上面列出的每个站点各一行。请注意,Base64 编码不涉及逗号或引号,因此无需在标准 CSV 格式中用引号括起 Base64。
为了证明这有效,然后读取 CSV 文件。图像数据从 Base64 解码回二进制数据。二进制数据被写入存储。您可以自己打开 PNG 图像文件。它们应该看起来像 this & this.
CSV 的写入和读取是使用 Apache Commons CSV 库完成的。
package work.basil.example;
import org.apache.commons.csv.CSVFormat;
import org.apache.commons.csv.CSVPrinter;
import org.apache.commons.csv.CSVRecord;
import javax.imageio.ImageIO;
import java.awt.image.BufferedImage;
import java.io.*;
import java.net.MalformedURLException;
import java.net.URL;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.Base64;
import java.util.HashMap;
import java.util.Map;
import java.util.Objects;
public class CsvImager
{
// Write a CSV file, of two columns: name of the image, image in Base64 data.
private void writeCsv ( final Map < String, BufferedImage > map , final Path path )
{
Objects.requireNonNull( map );
CSVFormat format = CSVFormat.RFC4180.withHeader( "Name" , "Image" );
try (
BufferedWriter writer = Files.newBufferedWriter( path , StandardCharsets.UTF_8 ) ;
CSVPrinter printer = new CSVPrinter( writer , format ) ;
)
{
// Print rows.
for ( String key : map.keySet() )
{
printer.print( key );
BufferedImage image = Objects.requireNonNull( map.get( key ) );
ByteArrayOutputStream stream = new ByteArrayOutputStream();
ImageIO.write( image , "PNG" , stream );
String imageData = Base64.getEncoder().encodeToString( stream.toByteArray() );
printer.print( imageData );
printer.println();
}
} catch ( IOException e )
{
e.printStackTrace();
}
}
// Read images from a CSV file in storage.
public Map < String, BufferedImage > readCsv ( final Path path )
{
Objects.requireNonNull( path );
Map < String, BufferedImage > map = Map.of();
try ( BufferedReader reader = Files.newBufferedReader( path ) )
{
map = new HashMap <>();
CSVFormat format = CSVFormat.RFC4180.withHeader( "Name" , "Image" ).withFirstRecordAsHeader();
Iterable < CSVRecord > records = format.parse( reader );
for ( CSVRecord record : records )
{
String name = record.get( "Name" );
String imageBase64 = record.get( "Image" );
// System.out.println("imageBase64:\n" + imageBase64 + "\n");
byte[] bytes = Base64.getDecoder().decode( imageBase64 );
ByteArrayInputStream stream = new ByteArrayInputStream( bytes );
BufferedImage image = ImageIO.read( stream );
map.put( name , image );
}
} catch ( IOException e )
{
e.printStackTrace();
}
return map;
}
// Download images from the Internet.
private Map < String, BufferedImage > fetchImages ()
{
Map < String, BufferedImage > map = Map.of(); // Initialize to empty map.
try
{
URL urlWhosebug = null, urlStackExchange = null;
urlWhosebug = new URL( "https://cdn.sstatic.net/Sites/Whosebug/company/img/logos/so/so-logo.png" );
urlStackExchange = new URL( "https://cdn.sstatic.net/Sites/Whosebug/company/img/logos/se/se-logo.png" );
BufferedImage imageWhosebug = ImageIO.read( urlWhosebug );
BufferedImage imageStackExchange = ImageIO.read( urlStackExchange );
System.out.println( "imageWhosebug: " + imageWhosebug );
System.out.println( "imageStackExchange: " + imageStackExchange );
map = Map.of( "logoWhosebug.png" , imageWhosebug , "logoStackExchange.png" , imageStackExchange );
} catch ( MalformedURLException e ) // `new URL` fail.
{
e.printStackTrace();
} catch ( IOException e ) // `ImageIO.read` fail.
{
e.printStackTrace();
}
;
return map;
}
// Produce individual image files on disk, to manually verify that the downloaded images were successfully Base64 endcoded, written to CSV, read from CSV, and decoded back to images.
public void writeImages ( final Map < String, BufferedImage > map , final Path pathToFolder )
{
Objects.requireNonNull( map );
Objects.requireNonNull( pathToFolder );
if ( map.isEmpty() )
{
throw new IllegalArgumentException( "The Map should have elements but is empty. Message # 77063b5a-4398-49f0-b1a4-442255a13b77." );
}
if ( ! Files.isDirectory( pathToFolder ) )
{
throw new IllegalArgumentException( "The specified path must lead to an existing folder. Message # 6a19313d-b8a9-4a53-9b82-7672172923f9." );
}
for ( String key : map.keySet() )
{
Path pathToFile = pathToFolder.resolve( key );
try (
OutputStream stream = Files.newOutputStream( pathToFile ) ;
)
{
BufferedImage image = Objects.requireNonNull( map.get( key ) );
ImageIO.write( image , "PNG" , stream );
} catch ( IOException e )
{
e.printStackTrace();
}
}
}
// --------| Demo |-----------------------
public void demo ()
{
Map < String, BufferedImage > map = this.fetchImages(); // Pairs of name & image.
Path path = Paths.get( "/Users/basilbourque/images.csv" );
this.writeCsv( map , path );
Map < String, BufferedImage > mapOut = this.readCsv( path );
Path pathOut = path.getParent();
this.writeImages( mapOut , pathOut );
}
public static void main ( String[] args )
{
CsvImager app = new CsvImager();
app.demo();
System.out.println( "Done." );
}
}
提示:对于 CSV 中列名的魔术字符串 Name
和 Image
的分散使用应替换为 Apache Commons CSV 支持的枚举。我把它留作 reader.
的练习
除了一些存储为字符串、数字等的其他元数据信息外,我还有一些二进制数据要传输。我拥有的二进制数据是图像文件,作为数据库中的 blob 列存储,我想包括 blob 列在 csv 文件中并将 csv 文件存储在文件系统或 sftp 服务器中,我想它存储在哪里并不重要。
如何将二进制数据存储为 csv 文件中的另一列?这是以这种方式传输二进制数据的好习惯吗?
Base64
通常(也是正确的)方法是对 Base64 中的二进制数据进行编码。这将使数据变大 4:3.
倍虽然 CSV 文件通常被认为是文本文件,但您 可以 将原始二进制数据写入文件。
然后该数据应该用双引号引起来,数据中所有现有的双引号都必须用另一个双引号转义。引用字段也将处理二进制数据中的任何换行符,但 reader 必须支持它。如果 reader 知道它正在读取二进制数据(即如果您自己提供 reader),那么里面也可能有空字节。
但是,如果您的数据必须采用某种 unicode 格式,则可能会出现问题..
因此,一般来说,将原始二进制数据写入 csv 文件并不是一个好的做法,最好使用 base64
编码。
如上
示例应用程序
这是示例应用程序的完整源代码。
此应用程序从 Internet 下载几张图片。即,来自姊妹网站 whosebug.com 和 StackExchange.com 的徽标。下载后,这些图像被编码为 Base64 字符串。
然后写入一个2列的CSV文件。列是名称和图像数据。对于此示例,我们有 2 行,上面列出的每个站点各一行。请注意,Base64 编码不涉及逗号或引号,因此无需在标准 CSV 格式中用引号括起 Base64。
为了证明这有效,然后读取 CSV 文件。图像数据从 Base64 解码回二进制数据。二进制数据被写入存储。您可以自己打开 PNG 图像文件。它们应该看起来像 this & this.
CSV 的写入和读取是使用 Apache Commons CSV 库完成的。
package work.basil.example;
import org.apache.commons.csv.CSVFormat;
import org.apache.commons.csv.CSVPrinter;
import org.apache.commons.csv.CSVRecord;
import javax.imageio.ImageIO;
import java.awt.image.BufferedImage;
import java.io.*;
import java.net.MalformedURLException;
import java.net.URL;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.Base64;
import java.util.HashMap;
import java.util.Map;
import java.util.Objects;
public class CsvImager
{
// Write a CSV file, of two columns: name of the image, image in Base64 data.
private void writeCsv ( final Map < String, BufferedImage > map , final Path path )
{
Objects.requireNonNull( map );
CSVFormat format = CSVFormat.RFC4180.withHeader( "Name" , "Image" );
try (
BufferedWriter writer = Files.newBufferedWriter( path , StandardCharsets.UTF_8 ) ;
CSVPrinter printer = new CSVPrinter( writer , format ) ;
)
{
// Print rows.
for ( String key : map.keySet() )
{
printer.print( key );
BufferedImage image = Objects.requireNonNull( map.get( key ) );
ByteArrayOutputStream stream = new ByteArrayOutputStream();
ImageIO.write( image , "PNG" , stream );
String imageData = Base64.getEncoder().encodeToString( stream.toByteArray() );
printer.print( imageData );
printer.println();
}
} catch ( IOException e )
{
e.printStackTrace();
}
}
// Read images from a CSV file in storage.
public Map < String, BufferedImage > readCsv ( final Path path )
{
Objects.requireNonNull( path );
Map < String, BufferedImage > map = Map.of();
try ( BufferedReader reader = Files.newBufferedReader( path ) )
{
map = new HashMap <>();
CSVFormat format = CSVFormat.RFC4180.withHeader( "Name" , "Image" ).withFirstRecordAsHeader();
Iterable < CSVRecord > records = format.parse( reader );
for ( CSVRecord record : records )
{
String name = record.get( "Name" );
String imageBase64 = record.get( "Image" );
// System.out.println("imageBase64:\n" + imageBase64 + "\n");
byte[] bytes = Base64.getDecoder().decode( imageBase64 );
ByteArrayInputStream stream = new ByteArrayInputStream( bytes );
BufferedImage image = ImageIO.read( stream );
map.put( name , image );
}
} catch ( IOException e )
{
e.printStackTrace();
}
return map;
}
// Download images from the Internet.
private Map < String, BufferedImage > fetchImages ()
{
Map < String, BufferedImage > map = Map.of(); // Initialize to empty map.
try
{
URL urlWhosebug = null, urlStackExchange = null;
urlWhosebug = new URL( "https://cdn.sstatic.net/Sites/Whosebug/company/img/logos/so/so-logo.png" );
urlStackExchange = new URL( "https://cdn.sstatic.net/Sites/Whosebug/company/img/logos/se/se-logo.png" );
BufferedImage imageWhosebug = ImageIO.read( urlWhosebug );
BufferedImage imageStackExchange = ImageIO.read( urlStackExchange );
System.out.println( "imageWhosebug: " + imageWhosebug );
System.out.println( "imageStackExchange: " + imageStackExchange );
map = Map.of( "logoWhosebug.png" , imageWhosebug , "logoStackExchange.png" , imageStackExchange );
} catch ( MalformedURLException e ) // `new URL` fail.
{
e.printStackTrace();
} catch ( IOException e ) // `ImageIO.read` fail.
{
e.printStackTrace();
}
;
return map;
}
// Produce individual image files on disk, to manually verify that the downloaded images were successfully Base64 endcoded, written to CSV, read from CSV, and decoded back to images.
public void writeImages ( final Map < String, BufferedImage > map , final Path pathToFolder )
{
Objects.requireNonNull( map );
Objects.requireNonNull( pathToFolder );
if ( map.isEmpty() )
{
throw new IllegalArgumentException( "The Map should have elements but is empty. Message # 77063b5a-4398-49f0-b1a4-442255a13b77." );
}
if ( ! Files.isDirectory( pathToFolder ) )
{
throw new IllegalArgumentException( "The specified path must lead to an existing folder. Message # 6a19313d-b8a9-4a53-9b82-7672172923f9." );
}
for ( String key : map.keySet() )
{
Path pathToFile = pathToFolder.resolve( key );
try (
OutputStream stream = Files.newOutputStream( pathToFile ) ;
)
{
BufferedImage image = Objects.requireNonNull( map.get( key ) );
ImageIO.write( image , "PNG" , stream );
} catch ( IOException e )
{
e.printStackTrace();
}
}
}
// --------| Demo |-----------------------
public void demo ()
{
Map < String, BufferedImage > map = this.fetchImages(); // Pairs of name & image.
Path path = Paths.get( "/Users/basilbourque/images.csv" );
this.writeCsv( map , path );
Map < String, BufferedImage > mapOut = this.readCsv( path );
Path pathOut = path.getParent();
this.writeImages( mapOut , pathOut );
}
public static void main ( String[] args )
{
CsvImager app = new CsvImager();
app.demo();
System.out.println( "Done." );
}
}
提示:对于 CSV 中列名的魔术字符串 Name
和 Image
的分散使用应替换为 Apache Commons CSV 支持的枚举。我把它留作 reader.