Spring 批处理 - FlatFileItemReader - csv 元素的序列和映射
Spring batch - FlatFileItemReader - Sequence and Mapping of csv elements
我的 POJO 是这样的:
@Data
@NoArgsConstructor
@AllArgsConstructor
public class FileInfo {
private String filepath;
private String ignorestr1;
private String firstname;
private String lastname;
private String employeeid;
private String applicantid;
private String createdate;
private String startretdate;
private String retlength;
private String emporapplicant;
}
而我的 ItemReader 是这样的:
@Bean
@StepScope
@Qualifier("FileInfoItemReader")
@DependsOn("partitioner")
public FlatFileItemReader<FileInfo> FileInfoItemReader(@Value("#{stepExecutionContext['fileName']}") String filename)
throws MalformedURLException {
return new FlatFileItemReaderBuilder<FileInfo>().name("FileInfoItemReader").delimited().delimiter("|")
.names(new String[] { "filepath", "ignorestr1", "firstname", "lastname", "employeeid", "applicantid", "createdate", "startretdate", "retlength", "emporapplicant" })
.fieldSetMapper(new BeanWrapperFieldSetMapper<FileInfo>() {
{
setTargetType(FileInfo.class);
}
}).resource(new UrlResource(filename)).build();
}
更新:
我完整的 BatchConfig:
@Configuration
@EnableBatchProcessing
public class BatchConfiguration {
private static final Logger log = LoggerFactory.getLogger(BatchConfiguration.class);
@Autowired
public JobBuilderFactory jobBuilderFactory;
@Autowired
public StepBuilderFactory stepBuilderFactory;
@Autowired
private FlatFileItemReader<FileInfo> FileInfoItemReader;
@Bean("partitioner")
@StepScope
public Partitioner partitioner() {
log.info("In Partitioner");
MultiResourcePartitioner partitioner = new MultiResourcePartitioner();
ResourcePatternResolver resolver = new PathMatchingResourcePatternResolver();
Resource[] resources = null;
try {
resources = resolver.getResources("*.csv");
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
partitioner.setResources(resources);
partitioner.partition(10);
return partitioner;
}
@Bean
public FileInfoItemProcessor processor() {
return new FileInfoItemProcessor();
}
@Bean
public FileInfoWriter<FileInfo> writer() {
return new FileInfoWriter<FileInfo>();
}
@Bean
public Job importUserJob(JobCompletionNotificationListener listener, Step step1) {
return jobBuilderFactory.get("importUserJob").incrementer(new RunIdIncrementer()).listener(listener)
.flow(masterStep()).end().build();
}
@Bean
public Step step1() {
return stepBuilderFactory.get("step1").<FileInfo, FileInfo>chunk(10).reader(FileInfoItemReader).processor(processor()).writer(writer())
.build();
}
@Bean
public ThreadPoolTaskExecutor taskExecutor() {
ThreadPoolTaskExecutor taskExecutor = new ThreadPoolTaskExecutor();
taskExecutor.setMaxPoolSize(25);
taskExecutor.setCorePoolSize(25);
taskExecutor.setQueueCapacity(25);
taskExecutor.afterPropertiesSet();
return taskExecutor;
}
@Bean
@Qualifier("masterStep")
public Step masterStep() {
return stepBuilderFactory.get("masterStep").partitioner("step1", partitioner()).step(step1())
.taskExecutor(taskExecutor()).build();
}
@Bean
@StepScope
@Qualifier("FileInfoItemReader")
@DependsOn("partitioner")
public FlatFileItemReader<FileInfo> FileInfoItemReader(@Value("#{stepExecutionContext['fileName']}") String filename)
throws MalformedURLException {
return new FlatFileItemReaderBuilder<FileInfo>().name("FileInfoItemReader").delimited().delimiter("|")
.names(new String[] { "I", "can", "put", "literally", "anything", "here", "and", "it", "works", "just_fine" })
.fieldSetMapper(new BeanWrapperFieldSetMapper<FileInfo>() {
{
setTargetType(FileInfo.class);
}
}).resource(new UrlResource(filename)).build();
}
}
Doubt/Question: 我的映射遵循我的 FileInfo 中的严格顺序。如果我在我的 POJO 中切换任何 private String....
的位置,csv 的行元素的映射就会混乱。这是预期的行为吗?如果没有,那么我在这里缺少什么?或者让它独立于POJO序列的正确方法是什么?
BeanWrapperFieldSetMapper
使用反射来映射字段,因此它们在 class 中的声明顺序应该无关紧要。
你在数组参数中声明的字段顺序.names()
对应于输入文件中的列顺序,而不是POJO中的声明顺序。
编辑:添加样本
persons.csv
1,foo
2,bar
SO69224405.java
import javax.sql.DataSource;
import org.springframework.batch.core.Job;
import org.springframework.batch.core.JobParameters;
import org.springframework.batch.core.configuration.annotation.EnableBatchProcessing;
import org.springframework.batch.core.configuration.annotation.JobBuilderFactory;
import org.springframework.batch.core.configuration.annotation.StepBuilderFactory;
import org.springframework.batch.core.launch.JobLauncher;
import org.springframework.batch.item.ItemWriter;
import org.springframework.batch.item.file.FlatFileItemReader;
import org.springframework.batch.item.file.builder.FlatFileItemReaderBuilder;
import org.springframework.context.ApplicationContext;
import org.springframework.context.annotation.AnnotationConfigApplicationContext;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;
import org.springframework.core.io.FileSystemResource;
import org.springframework.jdbc.datasource.embedded.EmbeddedDatabaseBuilder;
import org.springframework.jdbc.datasource.embedded.EmbeddedDatabaseType;
@Configuration
@EnableBatchProcessing
public class SO69224405 {
@Bean
public FlatFileItemReader<Person> itemReader() {
return new FlatFileItemReaderBuilder<Person>()
.name("personItemReader")
.resource(new FileSystemResource("persons.csv"))
.delimited()
.names("id", "name") // with names("name", "id") the example fails
.targetType(Person.class)
.build();
}
@Bean
public ItemWriter<Person> itemWriter() {
return items -> items.forEach(System.out::println);
}
@Bean
public Job job(JobBuilderFactory jobs, StepBuilderFactory steps) {
return jobs.get("job")
.start(steps.get("step")
.<Person, Person>chunk(5)
.reader(itemReader())
.writer(itemWriter())
.build())
.build();
}
public static void main(String[] args) throws Exception {
ApplicationContext context = new AnnotationConfigApplicationContext(SO69224405.class);
JobLauncher jobLauncher = context.getBean(JobLauncher.class);
Job job = context.getBean(Job.class);
jobLauncher.run(job, new JobParameters());
}
@Bean
public DataSource dataSource() {
return new EmbeddedDatabaseBuilder()
.setType(EmbeddedDatabaseType.H2)
.addScript("/org/springframework/batch/core/schema-h2.sql")
.build();
}
public static class Person {
// the declaration order of fields should not matter
private String name;
private int id;
public Person() {
}
public int getId() {
return id;
}
public void setId(int id) {
this.id = id;
}
public String getName() {
return name;
}
public void setName(String name) {
this.name = name;
}
public String toString() {
return "Person{id=" + id + ", name='" + name + '\'' + '}';
}
}
}
pom.xml
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 https://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>com.example</groupId>
<artifactId>so69224405</artifactId>
<version>0.0.1-SNAPSHOT</version>
<name>so69224405</name>
<properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<java.version>1.8</java.version>
</properties>
<dependencies>
<dependency>
<groupId>org.springframework.batch</groupId>
<artifactId>spring-batch-core</artifactId>
</dependency>
<dependency>
<groupId>org.springframework</groupId>
<artifactId>spring-jdbc</artifactId>
</dependency>
<dependency>
<groupId>com.h2database</groupId>
<artifactId>h2</artifactId>
<scope>runtime</scope>
</dependency>
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-simple</artifactId>
</dependency>
</dependencies>
<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
<configuration>
<source>${java.version}</source>
<target>${java.version}</target>
</configuration>
</plugin>
</plugins>
</build>
<dependencyManagement>
<dependencies>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-dependencies</artifactId>
<version>2.5.4</version>
<type>pom</type>
<scope>import</scope>
</dependency>
</dependencies>
</dependencyManagement>
</project>
我的 POJO 是这样的:
@Data
@NoArgsConstructor
@AllArgsConstructor
public class FileInfo {
private String filepath;
private String ignorestr1;
private String firstname;
private String lastname;
private String employeeid;
private String applicantid;
private String createdate;
private String startretdate;
private String retlength;
private String emporapplicant;
}
而我的 ItemReader 是这样的:
@Bean
@StepScope
@Qualifier("FileInfoItemReader")
@DependsOn("partitioner")
public FlatFileItemReader<FileInfo> FileInfoItemReader(@Value("#{stepExecutionContext['fileName']}") String filename)
throws MalformedURLException {
return new FlatFileItemReaderBuilder<FileInfo>().name("FileInfoItemReader").delimited().delimiter("|")
.names(new String[] { "filepath", "ignorestr1", "firstname", "lastname", "employeeid", "applicantid", "createdate", "startretdate", "retlength", "emporapplicant" })
.fieldSetMapper(new BeanWrapperFieldSetMapper<FileInfo>() {
{
setTargetType(FileInfo.class);
}
}).resource(new UrlResource(filename)).build();
}
更新:
我完整的 BatchConfig:
@Configuration
@EnableBatchProcessing
public class BatchConfiguration {
private static final Logger log = LoggerFactory.getLogger(BatchConfiguration.class);
@Autowired
public JobBuilderFactory jobBuilderFactory;
@Autowired
public StepBuilderFactory stepBuilderFactory;
@Autowired
private FlatFileItemReader<FileInfo> FileInfoItemReader;
@Bean("partitioner")
@StepScope
public Partitioner partitioner() {
log.info("In Partitioner");
MultiResourcePartitioner partitioner = new MultiResourcePartitioner();
ResourcePatternResolver resolver = new PathMatchingResourcePatternResolver();
Resource[] resources = null;
try {
resources = resolver.getResources("*.csv");
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
partitioner.setResources(resources);
partitioner.partition(10);
return partitioner;
}
@Bean
public FileInfoItemProcessor processor() {
return new FileInfoItemProcessor();
}
@Bean
public FileInfoWriter<FileInfo> writer() {
return new FileInfoWriter<FileInfo>();
}
@Bean
public Job importUserJob(JobCompletionNotificationListener listener, Step step1) {
return jobBuilderFactory.get("importUserJob").incrementer(new RunIdIncrementer()).listener(listener)
.flow(masterStep()).end().build();
}
@Bean
public Step step1() {
return stepBuilderFactory.get("step1").<FileInfo, FileInfo>chunk(10).reader(FileInfoItemReader).processor(processor()).writer(writer())
.build();
}
@Bean
public ThreadPoolTaskExecutor taskExecutor() {
ThreadPoolTaskExecutor taskExecutor = new ThreadPoolTaskExecutor();
taskExecutor.setMaxPoolSize(25);
taskExecutor.setCorePoolSize(25);
taskExecutor.setQueueCapacity(25);
taskExecutor.afterPropertiesSet();
return taskExecutor;
}
@Bean
@Qualifier("masterStep")
public Step masterStep() {
return stepBuilderFactory.get("masterStep").partitioner("step1", partitioner()).step(step1())
.taskExecutor(taskExecutor()).build();
}
@Bean
@StepScope
@Qualifier("FileInfoItemReader")
@DependsOn("partitioner")
public FlatFileItemReader<FileInfo> FileInfoItemReader(@Value("#{stepExecutionContext['fileName']}") String filename)
throws MalformedURLException {
return new FlatFileItemReaderBuilder<FileInfo>().name("FileInfoItemReader").delimited().delimiter("|")
.names(new String[] { "I", "can", "put", "literally", "anything", "here", "and", "it", "works", "just_fine" })
.fieldSetMapper(new BeanWrapperFieldSetMapper<FileInfo>() {
{
setTargetType(FileInfo.class);
}
}).resource(new UrlResource(filename)).build();
}
}
Doubt/Question: 我的映射遵循我的 FileInfo 中的严格顺序。如果我在我的 POJO 中切换任何 private String....
的位置,csv 的行元素的映射就会混乱。这是预期的行为吗?如果没有,那么我在这里缺少什么?或者让它独立于POJO序列的正确方法是什么?
BeanWrapperFieldSetMapper
使用反射来映射字段,因此它们在 class 中的声明顺序应该无关紧要。
你在数组参数中声明的字段顺序.names()
对应于输入文件中的列顺序,而不是POJO中的声明顺序。
编辑:添加样本
persons.csv
1,foo
2,bar
SO69224405.java
import javax.sql.DataSource;
import org.springframework.batch.core.Job;
import org.springframework.batch.core.JobParameters;
import org.springframework.batch.core.configuration.annotation.EnableBatchProcessing;
import org.springframework.batch.core.configuration.annotation.JobBuilderFactory;
import org.springframework.batch.core.configuration.annotation.StepBuilderFactory;
import org.springframework.batch.core.launch.JobLauncher;
import org.springframework.batch.item.ItemWriter;
import org.springframework.batch.item.file.FlatFileItemReader;
import org.springframework.batch.item.file.builder.FlatFileItemReaderBuilder;
import org.springframework.context.ApplicationContext;
import org.springframework.context.annotation.AnnotationConfigApplicationContext;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;
import org.springframework.core.io.FileSystemResource;
import org.springframework.jdbc.datasource.embedded.EmbeddedDatabaseBuilder;
import org.springframework.jdbc.datasource.embedded.EmbeddedDatabaseType;
@Configuration
@EnableBatchProcessing
public class SO69224405 {
@Bean
public FlatFileItemReader<Person> itemReader() {
return new FlatFileItemReaderBuilder<Person>()
.name("personItemReader")
.resource(new FileSystemResource("persons.csv"))
.delimited()
.names("id", "name") // with names("name", "id") the example fails
.targetType(Person.class)
.build();
}
@Bean
public ItemWriter<Person> itemWriter() {
return items -> items.forEach(System.out::println);
}
@Bean
public Job job(JobBuilderFactory jobs, StepBuilderFactory steps) {
return jobs.get("job")
.start(steps.get("step")
.<Person, Person>chunk(5)
.reader(itemReader())
.writer(itemWriter())
.build())
.build();
}
public static void main(String[] args) throws Exception {
ApplicationContext context = new AnnotationConfigApplicationContext(SO69224405.class);
JobLauncher jobLauncher = context.getBean(JobLauncher.class);
Job job = context.getBean(Job.class);
jobLauncher.run(job, new JobParameters());
}
@Bean
public DataSource dataSource() {
return new EmbeddedDatabaseBuilder()
.setType(EmbeddedDatabaseType.H2)
.addScript("/org/springframework/batch/core/schema-h2.sql")
.build();
}
public static class Person {
// the declaration order of fields should not matter
private String name;
private int id;
public Person() {
}
public int getId() {
return id;
}
public void setId(int id) {
this.id = id;
}
public String getName() {
return name;
}
public void setName(String name) {
this.name = name;
}
public String toString() {
return "Person{id=" + id + ", name='" + name + '\'' + '}';
}
}
}
pom.xml
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 https://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>com.example</groupId>
<artifactId>so69224405</artifactId>
<version>0.0.1-SNAPSHOT</version>
<name>so69224405</name>
<properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<java.version>1.8</java.version>
</properties>
<dependencies>
<dependency>
<groupId>org.springframework.batch</groupId>
<artifactId>spring-batch-core</artifactId>
</dependency>
<dependency>
<groupId>org.springframework</groupId>
<artifactId>spring-jdbc</artifactId>
</dependency>
<dependency>
<groupId>com.h2database</groupId>
<artifactId>h2</artifactId>
<scope>runtime</scope>
</dependency>
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-simple</artifactId>
</dependency>
</dependencies>
<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
<configuration>
<source>${java.version}</source>
<target>${java.version}</target>
</configuration>
</plugin>
</plugins>
</build>
<dependencyManagement>
<dependencies>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-dependencies</artifactId>
<version>2.5.4</version>
<type>pom</type>
<scope>import</scope>
</dependency>
</dependencies>
</dependencyManagement>
</project>