将文件拆分为大小大于 127 的块
Splitting files into chunks with size bigger than 127
我正在尝试为分布式系统课程的期末项目制作一个简化的 HDFS(Hadoop 分布式文件系统)。
因此,我尝试的第一件事是编写一个程序,将任意文件拆分为任意维度的块(块)。
我找到了 this 有用的例子,代码是:
package javabeat.net.io;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
/**
* Split File Example
*
* @author Krishna
*
*/
public class SplitFileExample {
private static String FILE_NAME = "TextFile.txt";
private static byte PART_SIZE = 5;
public static void main(String[] args) {
File inputFile = new File(FILE_NAME);
FileInputStream inputStream;
String newFileName;
FileOutputStream filePart;
int fileSize = (int) inputFile.length();
int nChunks = 0, read = 0, readLength = PART_SIZE;
byte[] byteChunkPart;
try {
inputStream = new FileInputStream(inputFile);
while (fileSize > 0) {
if (fileSize <= 5) {
readLength = fileSize;
}
byteChunkPart = new byte[readLength];
read = inputStream.read(byteChunkPart, 0, readLength);
fileSize -= read;
assert (read == byteChunkPart.length);
nChunks++;
newFileName = FILE_NAME + ".part"
+ Integer.toString(nChunks - 1);
filePart = new FileOutputStream(new File(newFileName));
filePart.write(byteChunkPart);
filePart.flush();
filePart.close();
byteChunkPart = null;
filePart = null;
}
inputStream.close();
} catch (IOException exception) {
exception.printStackTrace();
}
}
}
但我觉得有个大问题:PART_SIZE的值不能大于127,否则会出现error: possible loss of precision
。
如何在不完全更改代码的情况下解决问题?
问题是PART_SIZE
是一个byte
;因此它的最大值确实是 127.
但是您目前的代码充满了问题;一方面,不正确的资源处理等
这是一个使用 java.nio.file 的版本:
private static final String FILENAME = "TextFile.txt";
private static final int PART_SIZE = xxx; // HERE
public static void main(final String... args)
throws IOException
{
final Path file = Paths.get(FILENAME).toRealPath();
final String filenameBase = file.getFileName().toString();
final byte[] buf = new byte[PART_SIZE];
int partNumber = 0;
Path part;
int bytesRead;
byte[] toWrite;
try (
final InputStream in = Files.newInputStream(file);
) {
while ((bytesRead = in.read(buf)) != -1) {
part = file.resolveSibling(filenameBase + ".part" + partNumber);
toWrite = bytesRead == PART_SIZE ? buf : Arrays.copyOf(buf, bytesRead);
Files.write(part, toWrite, StandardOpenOption.CREATE_NEW);
partNumber++;
}
}
}
List<PDDocument> Pages=new ArrayList<PDDocument>();
Document.load(filePath);
try {
Splitter splitter = new Splitter();
splitter.setSplitAtPage(NoOfPagesDocumentWillContain);
Pages = splitter.split(document);
}catch(Exception e)
{
l
e.getCause().printStackTrace();
}
我正在尝试为分布式系统课程的期末项目制作一个简化的 HDFS(Hadoop 分布式文件系统)。
因此,我尝试的第一件事是编写一个程序,将任意文件拆分为任意维度的块(块)。
我找到了 this 有用的例子,代码是:
package javabeat.net.io;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
/**
* Split File Example
*
* @author Krishna
*
*/
public class SplitFileExample {
private static String FILE_NAME = "TextFile.txt";
private static byte PART_SIZE = 5;
public static void main(String[] args) {
File inputFile = new File(FILE_NAME);
FileInputStream inputStream;
String newFileName;
FileOutputStream filePart;
int fileSize = (int) inputFile.length();
int nChunks = 0, read = 0, readLength = PART_SIZE;
byte[] byteChunkPart;
try {
inputStream = new FileInputStream(inputFile);
while (fileSize > 0) {
if (fileSize <= 5) {
readLength = fileSize;
}
byteChunkPart = new byte[readLength];
read = inputStream.read(byteChunkPart, 0, readLength);
fileSize -= read;
assert (read == byteChunkPart.length);
nChunks++;
newFileName = FILE_NAME + ".part"
+ Integer.toString(nChunks - 1);
filePart = new FileOutputStream(new File(newFileName));
filePart.write(byteChunkPart);
filePart.flush();
filePart.close();
byteChunkPart = null;
filePart = null;
}
inputStream.close();
} catch (IOException exception) {
exception.printStackTrace();
}
}
}
但我觉得有个大问题:PART_SIZE的值不能大于127,否则会出现error: possible loss of precision
。
如何在不完全更改代码的情况下解决问题?
问题是PART_SIZE
是一个byte
;因此它的最大值确实是 127.
但是您目前的代码充满了问题;一方面,不正确的资源处理等
这是一个使用 java.nio.file 的版本:
private static final String FILENAME = "TextFile.txt";
private static final int PART_SIZE = xxx; // HERE
public static void main(final String... args)
throws IOException
{
final Path file = Paths.get(FILENAME).toRealPath();
final String filenameBase = file.getFileName().toString();
final byte[] buf = new byte[PART_SIZE];
int partNumber = 0;
Path part;
int bytesRead;
byte[] toWrite;
try (
final InputStream in = Files.newInputStream(file);
) {
while ((bytesRead = in.read(buf)) != -1) {
part = file.resolveSibling(filenameBase + ".part" + partNumber);
toWrite = bytesRead == PART_SIZE ? buf : Arrays.copyOf(buf, bytesRead);
Files.write(part, toWrite, StandardOpenOption.CREATE_NEW);
partNumber++;
}
}
}
List<PDDocument> Pages=new ArrayList<PDDocument>();
Document.load(filePath);
try {
Splitter splitter = new Splitter();
splitter.setSplitAtPage(NoOfPagesDocumentWillContain);
Pages = splitter.split(document);
}catch(Exception e)
{
l
e.getCause().printStackTrace();
}