在 Spring 批次中对字段进行处理时读取新文件
Read New File While Doing Processing For A Field In Spring Batch
我使用 SPRING BATCH 读取固定长度的输入文件。
我已经实现了 Job、Step、Processor 等。
这是示例代码。
@Configuration
public class BatchConfig {
private JobBuilderFactory jobBuilderFactory;
private StepBuilderFactory stepBuilderFactory;
@Value("${inputFile}")
private Resource resource;
@Autowired
public BatchConfig(JobBuilderFactory jobBuilderFactory, StepBuilderFactory stepBuilderFactory) {
this.jobBuilderFactory = jobBuilderFactory;
this.stepBuilderFactory = stepBuilderFactory;
}
@Bean
public Job job() {
return this.jobBuilderFactory.get("JOB-Load")
.start(fileReadingStep())
.build();
}
@Bean
public Step fileReadingStep() {
return stepBuilderFactory.get("File-Read-Step1")
.<Employee,EmpOutput>chunk(1000)
.reader(itemReader())
.processor(new CustomFileProcesser())
.writer(new CustomFileWriter())
.faultTolerant()
.skipPolicy(skipPolicy())
.build();
}
@Bean
public FlatFileItemReader<Employee> itemReader() {
FlatFileItemReader<Employee> flatFileItemReader = new FlatFileItemReader<Employee>();
flatFileItemReader.setResource(resource);
flatFileItemReader.setName("File-Reader");
flatFileItemReader.setLineMapper(LineMapper());
return flatFileItemReader;
}
@Bean
public LineMapper<Employee> LineMapper() {
DefaultLineMapper<Employee> defaultLineMapper = new DefaultLineMapper<Employee>();
FixedLengthTokenizer fixedLengthTokenizer = new FixedLengthTokenizer();
fixedLengthTokenizer.setNames(new String[] { "employeeId", "employeeName", "employeeSalary" });
fixedLengthTokenizer.setColumns(new Range[] { new Range(1, 9), new Range(10, 20), new Range(20, 30)});
fixedLengthTokenizer.setStrict(false);
defaultLineMapper.setLineTokenizer(fixedLengthTokenizer);
defaultLineMapper.setFieldSetMapper(new CustomFieldSetMapper());
return defaultLineMapper;
}
@Bean
public JobSkipPolicy skipPolicy() {
return new JobSkipPolicy();
}
}
对于处理,我添加了一些我需要的示例代码,但是如果我在此处添加 BufferedReader,则需要更多时间来完成这项工作。
@Component
public class CustomFileProcesser implements ItemProcessor<Employee, EmpOutput> {
@Override
public EmpOutput process(Employee item) throws Exception {
EmpOutput emp = new EmpOutput();
emp.setEmployeeSalary(checkSal(item.getEmployeeSalary()));
return emp;
}
public String checkSal(String sal) {
// need to read the another file
// required to do some kind of validation
// after that final result need to return
File f1 = new File("C:\Users\John\New\salary.txt");
FileReader fr;
try {
fr = new FileReader(f1);
BufferedReader br = new BufferedReader(fr);
String s = br.readLine();
while (s != null) {
String value = s.substring(5, 7);
if(value.equals(sal))
sal = value;
else
sal = "5000";
s = br.readLine();
}
} catch (Exception e) {
e.printStackTrace();
}
return sal;
}
// other fields need to check by reading different different file.
// These new files contains more than 30k records.
// all are fixedlength file.
// I need to get the field by giving the index
}
在对一个或多个字段进行处理时,我需要通过读取另一个文件来检入该文件(这是我将从 fileSystem/Cloud 读取的文件)。
在处理 5 个字段的数据时,我需要再次读取 5 个不同的文件,我将检查这些文件中的字段详细信息,然后生成结果,该结果将进一步处理。
您可以将文件的内容缓存在内存中并对照缓存进行检查,而不是为每个项目从磁盘重新读取整个文件。
您可以在此处找到示例:。
我使用 SPRING BATCH 读取固定长度的输入文件。 我已经实现了 Job、Step、Processor 等。 这是示例代码。
@Configuration
public class BatchConfig {
private JobBuilderFactory jobBuilderFactory;
private StepBuilderFactory stepBuilderFactory;
@Value("${inputFile}")
private Resource resource;
@Autowired
public BatchConfig(JobBuilderFactory jobBuilderFactory, StepBuilderFactory stepBuilderFactory) {
this.jobBuilderFactory = jobBuilderFactory;
this.stepBuilderFactory = stepBuilderFactory;
}
@Bean
public Job job() {
return this.jobBuilderFactory.get("JOB-Load")
.start(fileReadingStep())
.build();
}
@Bean
public Step fileReadingStep() {
return stepBuilderFactory.get("File-Read-Step1")
.<Employee,EmpOutput>chunk(1000)
.reader(itemReader())
.processor(new CustomFileProcesser())
.writer(new CustomFileWriter())
.faultTolerant()
.skipPolicy(skipPolicy())
.build();
}
@Bean
public FlatFileItemReader<Employee> itemReader() {
FlatFileItemReader<Employee> flatFileItemReader = new FlatFileItemReader<Employee>();
flatFileItemReader.setResource(resource);
flatFileItemReader.setName("File-Reader");
flatFileItemReader.setLineMapper(LineMapper());
return flatFileItemReader;
}
@Bean
public LineMapper<Employee> LineMapper() {
DefaultLineMapper<Employee> defaultLineMapper = new DefaultLineMapper<Employee>();
FixedLengthTokenizer fixedLengthTokenizer = new FixedLengthTokenizer();
fixedLengthTokenizer.setNames(new String[] { "employeeId", "employeeName", "employeeSalary" });
fixedLengthTokenizer.setColumns(new Range[] { new Range(1, 9), new Range(10, 20), new Range(20, 30)});
fixedLengthTokenizer.setStrict(false);
defaultLineMapper.setLineTokenizer(fixedLengthTokenizer);
defaultLineMapper.setFieldSetMapper(new CustomFieldSetMapper());
return defaultLineMapper;
}
@Bean
public JobSkipPolicy skipPolicy() {
return new JobSkipPolicy();
}
}
对于处理,我添加了一些我需要的示例代码,但是如果我在此处添加 BufferedReader,则需要更多时间来完成这项工作。
@Component
public class CustomFileProcesser implements ItemProcessor<Employee, EmpOutput> {
@Override
public EmpOutput process(Employee item) throws Exception {
EmpOutput emp = new EmpOutput();
emp.setEmployeeSalary(checkSal(item.getEmployeeSalary()));
return emp;
}
public String checkSal(String sal) {
// need to read the another file
// required to do some kind of validation
// after that final result need to return
File f1 = new File("C:\Users\John\New\salary.txt");
FileReader fr;
try {
fr = new FileReader(f1);
BufferedReader br = new BufferedReader(fr);
String s = br.readLine();
while (s != null) {
String value = s.substring(5, 7);
if(value.equals(sal))
sal = value;
else
sal = "5000";
s = br.readLine();
}
} catch (Exception e) {
e.printStackTrace();
}
return sal;
}
// other fields need to check by reading different different file.
// These new files contains more than 30k records.
// all are fixedlength file.
// I need to get the field by giving the index
}
在对一个或多个字段进行处理时,我需要通过读取另一个文件来检入该文件(这是我将从 fileSystem/Cloud 读取的文件)。
在处理 5 个字段的数据时,我需要再次读取 5 个不同的文件,我将检查这些文件中的字段详细信息,然后生成结果,该结果将进一步处理。
您可以将文件的内容缓存在内存中并对照缓存进行检查,而不是为每个项目从磁盘重新读取整个文件。
您可以在此处找到示例: