Spring Batch : How to read footer of CSV file and validation using FlatFileItemReader

6.1k Views Asked by At

I am using Spring Batch and FlatFileItemReader to read a .CSV file. A file have a header(first line), details and footer(last line). So, I want to validate total number of details by a footer line.

This is my example .csv file.

movie.csv

Name|Type|Year
Notting Hill|romantic comedy|1999
Toy Story 3|Animation|2010
Captain America: The First Avenger|Action|2011
3

from example file
First line is a header (and I ignore it).
At line 2-4 is a detail lines, and last is a footer.

I want to read footer and get value (last line = 3)
and after, get total number recod of details (in this case we have 3 lines)
and last I'll validation total from footer (3) and total number record of details (3) is equals?


and this is my code.

@Bean
@StepScope
public FlatFileItemReader<Movie> movieItemReader(String filePath) {
        FlatFileItemReader<Movie> reader = new FlatFileItemReader<>();
        reader.setLinesToSkip(1);   //skip header line
        reader.setResource(new PathResource(filePath));

        DelimitedLineTokenizer tokenizer = new DelimitedLineTokenizer("|");
        DefaultLineMapper<Movie> movieLineMapper = new DefaultLineMapper<>();
        FieldSetMapper<Movie> movieMapper = movieFieldSetMapper();

        movieLineMapper.setLineTokenizer(tokenizer);
        movieLineMapper.setFieldSetMapper(movieFieldSetMapper);
        movieLineMapper.afterPropertiesSet();
        reader.setLineMapper(movieLineMapper);
        return reader;
}

public FieldSetMapper<Movie> movieFieldSetMapper() {
        BeanWrapperFieldSetMapper<Movie> movieMapper = new BeanWrapperFieldSetMapper<>();
        movieMapper.setTargetType(Movie.class);
        return movieMapper;
}
2

There are 2 best solutions below

2
On BEST ANSWER

You can use a chunk oriented step as a validation step before your job's business logic. This step would use a ItemReadListener to save the last item and a StepExecutionListener for the validation. Here is a quick example:

import org.springframework.batch.core.ExitStatus;
import org.springframework.batch.core.ItemReadListener;
import org.springframework.batch.core.Job;
import org.springframework.batch.core.JobParameters;
import org.springframework.batch.core.Step;
import org.springframework.batch.core.StepExecution;
import org.springframework.batch.core.StepExecutionListener;
import org.springframework.batch.core.configuration.annotation.EnableBatchProcessing;
import org.springframework.batch.core.configuration.annotation.JobBuilderFactory;
import org.springframework.batch.core.configuration.annotation.StepBuilderFactory;
import org.springframework.batch.core.configuration.annotation.StepScope;
import org.springframework.batch.core.launch.JobLauncher;
import org.springframework.batch.core.listener.StepExecutionListenerSupport;
import org.springframework.batch.item.ItemWriter;
import org.springframework.batch.item.file.FlatFileItemReader;
import org.springframework.batch.item.file.mapping.PassThroughLineMapper;
import org.springframework.batch.repeat.RepeatStatus;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.context.ApplicationContext;
import org.springframework.context.annotation.AnnotationConfigApplicationContext;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;
import org.springframework.core.io.ByteArrayResource;

@Configuration
@EnableBatchProcessing
public class MyJob {

    @Autowired
    private JobBuilderFactory jobs;

    @Autowired
    private StepBuilderFactory steps;

    @Bean
    @StepScope
    public FlatFileItemReader<String> itemReader() {
        FlatFileItemReader<String> reader = new FlatFileItemReader<>();
        reader.setLinesToSkip(1);   //skip header line
        reader.setResource(new ByteArrayResource("header\nitem1\nitem2\n2".getBytes()));
        reader.setLineMapper(new PassThroughLineMapper());
        return reader;
    }

    @Bean
    public ItemWriter<String> itemWriter() {
        return items -> {
            for (String item : items) {
                System.out.println("item = " + item);
            }
        };
    }

    @Bean
    public Step step1() {
        MyListener myListener = new MyListener();
        return steps.get("step1")
                .<String, String>chunk(5)
                .reader(itemReader())
                .writer(itemWriter())
                .listener((ItemReadListener<String>) myListener)
                .listener((StepExecutionListener) myListener)
                .build();
    }

    @Bean
    public Step step2() {
        return steps.get("step2")
                .tasklet((contribution, chunkContext) -> {
                    System.out.println("Total count is ok as validated by step1");
                    return RepeatStatus.FINISHED;
                })
                .build();
    }

    @Bean
    public Job job() {
        return jobs.get("job")
                .start(step1())
                .next(step2())
                .build();
    }

    static class MyListener extends StepExecutionListenerSupport implements ItemReadListener<String> {

        private String lastItem;

        @Override
        public void beforeRead() {
        }

        @Override
        public void afterRead(String item) {
            this.lastItem = item;
        }

        @Override
        public void onReadError(Exception ex) {

        }

        @Override
        public ExitStatus afterStep(StepExecution stepExecution) {
            int readCount = stepExecution.getReadCount();
            int totalCountInFooter = Integer.valueOf(this.lastItem); // TODO sanity checks (number format, etc)
            System.out.println("readCount = " + (readCount - 1)); // substract footer from the read count
            System.out.println("totalCountInFooter = " + totalCountInFooter);
            // TODO do validation on readCount vs totalCountInFooter
            return ExitStatus.COMPLETED; // return appropriate exit status according to validation result
        }
    }

    public static void main(String[] args) throws Exception {
        ApplicationContext context = new AnnotationConfigApplicationContext(MyJob.class);
        JobLauncher jobLauncher = context.getBean(JobLauncher.class);
        Job job = context.getBean(Job.class);
        jobLauncher.run(job, new JobParameters());
    }

}

This example prints:

item = item1
item = item2
item = 2
readCount = 2
totalCountInFooter = 2
Total count is ok as validated by step1

Hope this helps.

0
On

There is a proper way in spring documentation to read a flat file when all record formats are different to each other.

  @Bean
public PatternMatchingCompositeLineMapper<MyDomainObject> lineMapper() {
    PatternMatchingCompositeLineMapper<MyDomainObject> lineMapper = new PatternMatchingCompositeLineMapper<>();
    lineMapper.setTokenizers(tokenizers());
    lineMapper.setFieldSetMappers(fieldSetMappers());
    return lineMapper;
}

 @Bean
public PatternMatchingCompositeLineTokenizer tokenizers() {
    PatternMatchingCompositeLineTokenizer tokenizers = new PatternMatchingCompositeLineTokenizer();

    // Tokenizer for details lines
    RegexLineTokenizer detailsTokenizer = new RegexLineTokenizer();
    detailsTokenizer.setPattern("^[0-9]{3}.*"); // Example: Details lines start with 3 digits
    tokenizers.setTokenizers(new HashMap<String, LineTokenizer>() {{
        put("DETAILS", detailsTokenizer);
    }});

    // Tokenizer for footer lines
    RegexLineTokenizer footerTokenizer = new RegexLineTokenizer();
    footerTokenizer.setPattern("^FOOTER.*"); // Example: Footer lines start with "FOOTER"
    tokenizers.setTokenizers(new HashMap<String, LineTokenizer>() {{
        put("FOOTER", footerTokenizer);
    }});

    return tokenizers;
}


@Bean
public PatternMatchingCompositeFieldSetMapper<MyDomainObject> fieldSetMappers() {
    PatternMatchingCompositeFieldSetMapper<MyDomainObject> fieldSetMappers = new PatternMatchingCompositeFieldSetMapper<>();

    // FieldSetMapper for details lines
    BeanWrapperFieldSetMapper<MyDomainObject> detailsMapper = new BeanWrapperFieldSetMapper<>();
    detailsMapper.setTargetType(MyDetailsObject.class);
    fieldSetMappers.setFieldSetMappers(new HashMap<String, FieldSetMapper<MyDomainObject>>() {{
        put("DETAILS", detailsMapper);
    }});

    // FieldSetMapper for footer lines
    BeanWrapperFieldSetMapper<MyDomainObject> footerMapper = new BeanWrapperFieldSetMapper<>();
    footerMapper.setTargetType(MyFooterObject.class);
    fieldSetMappers.setFieldSetMappers(new HashMap<String, FieldSetMapper<MyDomainObject>>() {{
        put("FOOTER", footerMapper);
    }});

    return fieldSetMappers;
}
  • PatternMatchingCompositeLineMapper is used to map lines from a file to domain objects based on patterns.
  • PatternMatchingCompositeLineTokenizer is used to define different tokenizers based on patterns. Each tokenizer can extract fields from lines matching specific patterns.
  • RegexLineTokenizer is used to define regular expression patterns for details and footer lines.
  • BeanWrapperFieldSetMapper is used to map field sets to domain objects for details and footer lines.
  • For each type of line (details and footer), a tokenizer and a field set mapper are configured in the PatternMatchingCompositeLineTokenizer and PatternMatchingCompositeFieldSetMapper, respectively.