resttemplate does not retrieve the full data of a file from server

115 Views Asked by At

My project is using sping boot 2.3.3.RELEASE.

<parent>
    <groupId>org.springframework.boot</groupId>
    <artifactId>spring-boot-starter-parent</artifactId>
    <version>2.3.3.RELEASE</version>
    <relativePath/>
</parent>

I am trying to retrieve files from the server using Resttemplate.

My program works fine with small files. But my program does not obtain the bigger file correctly sometimes. The file size of one of the sample files is approximately 12.9M (It shows 13572529 by ls -l). The size of the file I downloaded always changes.

I can retrieve the file correctly using curl.

The code I used:

    @Override
    public String downloadFile(String logId, String urlStr, String fName, String localPath) {
        long begin = System.currentTimeMillis();
        String filePath = null;
        try {
            log.info("logId={}-Start to download, fName={}, data={}, filePath={}", logId, fName, urlStr, localPath);

            
            File dir = new File(localPath);
            if (!dir.exists()) {// Check whether the folder exists
                dir.mkdir();
            }

            filePath = localPath + fName;

            File file = new File(filePath);
            // Do not download the target file if it exists
            if (file.exists()){
                return filePath;
            }

            RequestCallback requestCallback = request -> request.getHeaders()
                    .setAccept(Arrays.asList(MediaType.APPLICATION_OCTET_STREAM, MediaType.ALL));

            // Fetch data as stream instead loading all data into memory
            String finalFilePath = filePath;
            RestTemplate restTemplate = new RestTemplate();
            restTemplate.execute(urlStr, HttpMethod.GET, requestCallback, clientHttpResponse -> {
                
                long contentLength = clientHttpResponse.getHeaders().getContentLength();
                log.info("logId={}-Content-Length in Response Header: {}", logId, contentLength);
                StreamUtils.copy(clientHttpResponse.getBody(), new FileOutputStream(finalFilePath));
                log.info("logId={}-Content-Length in Response Header: {}- file length: {}", logId, contentLength ,file.length());
                return null;
            });

            log.info("logId={}-download success, fName={}, ursStr={}, filePath={}, time cost={}ms", logId, fName, urlStr, filePath, System.currentTimeMillis() - begin);

        } catch (Exception e) {
            log.error("logId={}-download exception, fName={}, urlStr={}, e=", logId, fName, urlStr, e);
            return null;
        }

        log.info("logId={}-download success, fName={}, urlStr={}, filePath={}, time cost={}ms", logId, fName, urlStr, filePath, System.currentTimeMillis() - begin);
        return filePath;
    }

In the log, the content length from the response header is always correct, which is 13572529. But the file length always changes.

I copy the code of StreamUtils.copy() to log the stream size.

    @Override
    public String downloadFile(String logId, String urlStr, String fName, String localPath) {
        long begin = System.currentTimeMillis();
        String filePath = null;
        try {
            log.info("logId={}-Start to download, fName={}, data={}, filePath={}", logId, fName, urlStr, localPath);

            
            File dir = new File(localPath);
            if (!dir.exists()) {// Check whether the folder exists
                dir.mkdir();
            }

            filePath = localPath + fName;

            File file = new File(filePath);
            // Do not download the target file if it exists
            if (file.exists()){
                return filePath;
            }

            RequestCallback requestCallback = request -> request.getHeaders()
                    .setAccept(Arrays.asList(MediaType.APPLICATION_OCTET_STREAM, MediaType.ALL));

            // Fetch data as stream instead loading all data into memory
            String finalFilePath = filePath;
            RestTemplate restTemplate = new RestTemplate();
            restTemplate.execute(urlStr, HttpMethod.GET, requestCallback, clientHttpResponse -> {
                
                long contentLength = clientHttpResponse.getHeaders().getContentLength();
                log.info("logId={}-Content-Length in Response Header: {}", logId, contentLength);
                copy(clientHttpResponse.getBody(), new FileOutputStream(finalFilePath));
                log.info("logId={}-Content-Length in Response Header: {}- file length: {}", logId, contentLength ,file.length());
                return null;
            });

            log.info("logId={}-download success, fName={}, ursStr={}, filePath={}, time cost={}ms", logId, fName, urlStr, filePath, System.currentTimeMillis() - begin);

        } catch (Exception e) {
            log.error("logId={}-download exception, fName={}, urlStr={}, e=", logId, fName, urlStr, e);
            return null;
        }

        log.info("logId={}-download success, fName={}, urlStr={}, filePath={}, time cost={}ms", logId, fName, urlStr, filePath, System.currentTimeMillis() - begin);
        return filePath;
    }


    public static int copy(InputStream in, OutputStream out) throws IOException {
        Assert.notNull(in, "No InputStream specified");
        Assert.notNull(out, "No OutputStream specified");
        log.info("inputStream.................|{}", in.available()); // Always changes and less than content length. and less than byteCount
        int byteCount = 0;
        byte[] buffer = new byte[4096]; // 4096 is the value of BUFFER_SIZE in StreamUtils.copy()
        int bytesRead;
        while ((bytesRead = in.read(buffer)) != -1) {
            out.write(buffer, 0, bytesRead);
            byteCount += bytesRead;
        }
        out.flush();
        log.info("inputStream.................|{}-{}", in.available(), byteCount); // in.available() is always 0. byteCount sometimes can get the correct value, `13572529`.
        return byteCount;
    }

The logs of this part after I tried several times:

inputStream.................|157672
inputStream.................|0-314056
inputStream.................|14320
inputStream.................|0-2206592
inputStream.................|32615
inputStream.................|0-13572529
inputStream.................|14320
inputStream.................|0-546655

in.available() shows the size of inputstream which is clientHttpResponse.getBody(), but why the size is always less than byteCount and content-length.

I read articles spring-resttemplate-download-large-file, download-large-file-through-spring-rest-template, spring-resttemplate-large-files-contentlength-auto-changed, download-large-file-from-server-using-rest-template-java-spring-mvc. I still do not find my solution and confused about the logs.

How do I get the right size for a bigger file? Thank you for your consideration.

1

There are 1 best solutions below

0
VonC On BEST ANSWER

You can start with a couple of best practices (assuming you cannot upgrade your Sprint Boot from the old Aug. 2020 v2.3.3, to a v3.0.0+ release, currently v3.2.1):

import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.http.HttpMethod;
import org.springframework.http.MediaType;
import org.springframework.http.client.SimpleClientHttpRequestFactory;
import org.springframework.http.client.ClientHttpResponse;
import org.springframework.web.client.RequestCallback;
import org.springframework.web.client.ResponseExtractor;
import org.springframework.web.client.RestTemplate;
import org.springframework.util.StreamUtils;

import java.io.BufferedOutputStream;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.nio.file.StandardOpenOption;
import java.util.Arrays;

public class FileDownloader {

    private final RestTemplate restTemplate;

    @Autowired
    public FileDownloader(RestTemplate restTemplate) {
        this.restTemplate = restTemplate;
    }

    public String downloadFile(String urlStr, String localPath, String fileName) {
        long begin = System.currentTimeMillis();
        String filePath = localPath + fileName;
        Path path = Paths.get(filePath);

        SimpleClientHttpRequestFactory requestFactory = new SimpleClientHttpRequestFactory();
        requestFactory.setBufferRequestBody(false);
        requestFactory.setConnectTimeout(10000); // 10 seconds
        requestFactory.setReadTimeout(60000); // 60 seconds
        restTemplate.setRequestFactory(requestFactory);

        RequestCallback requestCallback = request -> request.getHeaders()
                .setAccept(Arrays.asList(MediaType.APPLICATION_OCTET_STREAM, MediaType.ALL));

        ResponseExtractor<Void> responseExtractor = new ResponseExtractor<Void>() {
            @Override
            public Void extractData(ClientHttpResponse response) throws IOException {
                try (BufferedOutputStream outputStream = new BufferedOutputStream(Files.newOutputStream(path, StandardOpenOption.CREATE, StandardOpenOption.WRITE))) {
                    StreamUtils.copy(response.getBody(), outputStream);
                }
                return null;
            }
        };

        try {
            restTemplate.execute(urlStr, HttpMethod.GET, requestCallback, responseExtractor);
            long end = System.currentTimeMillis();
            System.out.println("Download completed. Time taken: " + (end - begin) + " ms");
        } catch (Exception e) {
            System.err.println("Error occurred during file download: " + e.getMessage());
            return null;
        }

        return filePath;
    }
}

Consider also verifying the server's ability to handle large file requests, and make sure there is no intermediary (like a proxy or load balancer) that might be interfering with or truncating the file downloads.
And you could improve the downloadFile() function with, after downloading, validating the file (e.g., through checksums) to make sure it is complete and uncorrupted.