Spring 批处理 - FlatFileItemReader - csv 元素的序列和映射

Spring batch - FlatFileItemReader - Sequence and Mapping of csv elements

我的 POJO 是这样的:

@Data
@NoArgsConstructor
@AllArgsConstructor

public class FileInfo {
    
    private String filepath;
    private String ignorestr1;
    private String firstname;
    private String lastname;
    private String employeeid;
    private String applicantid;
    private String createdate;
    private String startretdate;
    private String retlength;
    private String emporapplicant;
    
}

而我的 ItemReader 是这样的:

@Bean
    @StepScope
    @Qualifier("FileInfoItemReader")
    @DependsOn("partitioner")
    public FlatFileItemReader<FileInfo> FileInfoItemReader(@Value("#{stepExecutionContext['fileName']}") String filename)
            throws MalformedURLException {
        return new FlatFileItemReaderBuilder<FileInfo>().name("FileInfoItemReader").delimited().delimiter("|")
                .names(new String[] { "filepath", "ignorestr1", "firstname", "lastname", "employeeid", "applicantid", "createdate", "startretdate", "retlength", "emporapplicant" })
                .fieldSetMapper(new BeanWrapperFieldSetMapper<FileInfo>() {
                    {
                        setTargetType(FileInfo.class);
                    }
                }).resource(new UrlResource(filename)).build();
    }

更新:

我完整的 BatchConfig:

@Configuration
@EnableBatchProcessing
public class BatchConfiguration {

    private static final Logger log = LoggerFactory.getLogger(BatchConfiguration.class);
    @Autowired
    public JobBuilderFactory jobBuilderFactory;

    @Autowired
    public StepBuilderFactory stepBuilderFactory;

    @Autowired
    private FlatFileItemReader<FileInfo> FileInfoItemReader;

    @Bean("partitioner")
    @StepScope
    public Partitioner partitioner() {
        log.info("In Partitioner");

        MultiResourcePartitioner partitioner = new MultiResourcePartitioner();
        ResourcePatternResolver resolver = new PathMatchingResourcePatternResolver();
        Resource[] resources = null;
        try {
            resources = resolver.getResources("*.csv");
        } catch (IOException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        }
        partitioner.setResources(resources);
        partitioner.partition(10);
        return partitioner;
    }

    @Bean
    public FileInfoItemProcessor processor() {
        return new FileInfoItemProcessor();
    }


    @Bean
    public FileInfoWriter<FileInfo> writer() {
        return new FileInfoWriter<FileInfo>();
    }

    @Bean
    public Job importUserJob(JobCompletionNotificationListener listener, Step step1) {
        return jobBuilderFactory.get("importUserJob").incrementer(new RunIdIncrementer()).listener(listener)
                .flow(masterStep()).end().build();
    }

    @Bean
    public Step step1() {
        return stepBuilderFactory.get("step1").<FileInfo, FileInfo>chunk(10).reader(FileInfoItemReader).processor(processor()).writer(writer())
                .build();
    }

    @Bean
    public ThreadPoolTaskExecutor taskExecutor() {
        ThreadPoolTaskExecutor taskExecutor = new ThreadPoolTaskExecutor();
        taskExecutor.setMaxPoolSize(25);
        taskExecutor.setCorePoolSize(25);
        taskExecutor.setQueueCapacity(25);
        taskExecutor.afterPropertiesSet();
        return taskExecutor;
    }

    @Bean
    @Qualifier("masterStep")
    public Step masterStep() {
        return stepBuilderFactory.get("masterStep").partitioner("step1", partitioner()).step(step1())
                .taskExecutor(taskExecutor()).build();
    }

    @Bean
    @StepScope
    @Qualifier("FileInfoItemReader")
    @DependsOn("partitioner")
    public FlatFileItemReader<FileInfo> FileInfoItemReader(@Value("#{stepExecutionContext['fileName']}") String filename)
            throws MalformedURLException {
        return new FlatFileItemReaderBuilder<FileInfo>().name("FileInfoItemReader").delimited().delimiter("|")
                .names(new String[] { "I", "can", "put", "literally", "anything", "here", "and", "it", "works", "just_fine" })
                .fieldSetMapper(new BeanWrapperFieldSetMapper<FileInfo>() {
                    {
                        setTargetType(FileInfo.class);
                    }
                }).resource(new UrlResource(filename)).build();
    }
}

Doubt/Question: 我的映射遵循我的 FileInfo 中的严格顺序。如果我在我的 POJO 中切换任何 private String.... 的位置,csv 的行元素的映射就会混乱。这是预期的行为吗?如果没有,那么我在这里缺少什么?或者让它独立于POJO序列的正确方法是什么?

BeanWrapperFieldSetMapper 使用反射来映射字段,因此它们在 class 中的声明顺序应该无关紧要。

你在数组参数中声明的字段顺序.names()对应于输入文件中的列顺序,而不是POJO中的声明顺序。

编辑:添加样本

persons.csv

1,foo
2,bar

SO69224405.java

import javax.sql.DataSource;

import org.springframework.batch.core.Job;
import org.springframework.batch.core.JobParameters;
import org.springframework.batch.core.configuration.annotation.EnableBatchProcessing;
import org.springframework.batch.core.configuration.annotation.JobBuilderFactory;
import org.springframework.batch.core.configuration.annotation.StepBuilderFactory;
import org.springframework.batch.core.launch.JobLauncher;
import org.springframework.batch.item.ItemWriter;
import org.springframework.batch.item.file.FlatFileItemReader;
import org.springframework.batch.item.file.builder.FlatFileItemReaderBuilder;
import org.springframework.context.ApplicationContext;
import org.springframework.context.annotation.AnnotationConfigApplicationContext;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;
import org.springframework.core.io.FileSystemResource;
import org.springframework.jdbc.datasource.embedded.EmbeddedDatabaseBuilder;
import org.springframework.jdbc.datasource.embedded.EmbeddedDatabaseType;

@Configuration
@EnableBatchProcessing
public class SO69224405 {

    @Bean
    public FlatFileItemReader<Person> itemReader() {
        return new FlatFileItemReaderBuilder<Person>()
                .name("personItemReader")
                .resource(new FileSystemResource("persons.csv"))
                .delimited()
                .names("id", "name") // with names("name", "id") the example fails
                .targetType(Person.class)
                .build();
    }

    @Bean
    public ItemWriter<Person> itemWriter() {
        return items -> items.forEach(System.out::println);
    }

    @Bean
    public Job job(JobBuilderFactory jobs, StepBuilderFactory steps) {
        return jobs.get("job")
                .start(steps.get("step")
                        .<Person, Person>chunk(5)
                        .reader(itemReader())
                        .writer(itemWriter())
                        .build())
                .build();
    }

    public static void main(String[] args) throws Exception {
        ApplicationContext context = new AnnotationConfigApplicationContext(SO69224405.class);
        JobLauncher jobLauncher = context.getBean(JobLauncher.class);
        Job job = context.getBean(Job.class);
        jobLauncher.run(job, new JobParameters());
    }

    @Bean
    public DataSource dataSource() {
        return new EmbeddedDatabaseBuilder()
                .setType(EmbeddedDatabaseType.H2)
                .addScript("/org/springframework/batch/core/schema-h2.sql")
                .build();
    }

    public static class Person {
        // the declaration order of fields should not matter
        private String name;
        private int id;

        public Person() {
        }

        public int getId() {
            return id;
        }

        public void setId(int id) {
            this.id = id;
        }

        public String getName() {
            return name;
        }

        public void setName(String name) {
            this.name = name;
        }

        public String toString() {
            return "Person{id=" + id + ", name='" + name + '\'' + '}';
        }
    }

}

pom.xml

<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 https://maven.apache.org/xsd/maven-4.0.0.xsd">
    <modelVersion>4.0.0</modelVersion>

    <groupId>com.example</groupId>
    <artifactId>so69224405</artifactId>
    <version>0.0.1-SNAPSHOT</version>
    <name>so69224405</name>

    <properties>
        <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
        <java.version>1.8</java.version>
    </properties>

    <dependencies>
        <dependency>
            <groupId>org.springframework.batch</groupId>
            <artifactId>spring-batch-core</artifactId>
        </dependency>
        <dependency>
            <groupId>org.springframework</groupId>
            <artifactId>spring-jdbc</artifactId>
        </dependency>
        <dependency>
            <groupId>com.h2database</groupId>
            <artifactId>h2</artifactId>
            <scope>runtime</scope>
        </dependency>
        <dependency>
            <groupId>org.slf4j</groupId>
            <artifactId>slf4j-simple</artifactId>
        </dependency>
    </dependencies>

    <build>
        <plugins>
            <plugin>
                <groupId>org.apache.maven.plugins</groupId>
                <artifactId>maven-compiler-plugin</artifactId>
                <configuration>
                    <source>${java.version}</source>
                    <target>${java.version}</target>
                </configuration>
            </plugin>
        </plugins>
    </build>

    <dependencyManagement>
        <dependencies>
            <dependency>
                <groupId>org.springframework.boot</groupId>
                <artifactId>spring-boot-dependencies</artifactId>
                <version>2.5.4</version>
                <type>pom</type>
                <scope>import</scope>
            </dependency>
        </dependencies>
    </dependencyManagement>

</project>