Edit the final part of large(1.5gb) text file in NodeJS

68 Views Asked by At

My tool appends little json blocks with comma at their end into a txt file which initially has [ as the first character, in order to create a whole JSON format text file, like below,

{data: text1},

and txt file seems like at the end of the day,

[{data: text1},{data: text2},{data: text3},

So I have to remove last comma at the end of the text and put a ] to make it valid JSON right, but this file size is around 1.5GB so I couldn't figure how to edit it in NodeJS in order to make it valid JSON file.

2

There are 2 best solutions below

7
Milos Stojanovic On BEST ANSWER
const fs = require('fs');

// Input and output file paths
const inputFile = 'input.json';

// Open the file for reading and writing
fs.open(inputFile, 'r+', (err, fd) => {
  if (err) {
    console.error('Error opening the input file:', err);
    return;
  }

  // Move to the end of the file
  fs.fstat(fd, (err, stats) => {
    if (err) {
      console.error('Error reading file information:', err);
      fs.close(fd, () => console.log('Closed the input file due to error.'));
      return;
    }

    const endPosition = stats.size;

    // Create a buffer for reading
    const buffer = Buffer.alloc(1); // Read only one byte (comma)

    // Read the last byte in the file
    fs.read(fd, buffer, 0, buffer.length, endPosition - 1, (err, bytesRead, data) => {
      if (err) {
        console.error('Error reading the last byte:', err);
        fs.close(fd, () => console.log('Closed the input file due to error.'));
        return;
      }

      // Check if the last byte is a comma
      if (data[0] !== 44) { // Unicode code for comma is the same as ASCII
        console.log('The last character is not a comma.');
        fs.close(fd, () => console.log('Closed the input file.'));
        return;
      }

      // If the last byte is a comma, replace it with a closing bracket
      const closingBracket = Buffer.from(']', 'utf8'); // Use UTF-8 encoding
      fs.write(fd, closingBracket, 0, closingBracket.length, endPosition - 1, (err) => {
        if (err) {
          console.error('Error writing to the file:', err);
        } else {
          console.log('Replaced the comma with a closing bracket.');
        }
        // Close the input file
        fs.close(fd, () => console.log('Closed the input file after modification.'));
      });
    });
  });
});
0
blueway On

Additional solution for updating large-sized files with Transform as you wanted but working slower than Milos's code for my specific situation,

const fs = require('graceful-fs');
const { Transform } = require('stream');

const sourceFilePath = 'jsfiles.txt';
const destinationFilePath = 'new.txt';

// Create a transform stream to modify the data
const transformStream = new Transform({
    transform(chunk, encoding, callback) {
        // Modify the data as needed
        var modifiedData = chunk.toString().slice(0, -1); // remove last comma
        this.push(`${modifiedData}]`); // add last closing bracket
        callback();
    }
});

// Create a readable stream from the source file
const readStream = fs.createReadStream(sourceFilePath, { encoding: 'utf8' });

// Create a writable stream to the destination file
const writeStream = fs.createWriteStream(destinationFilePath, { encoding: 'utf8' });

// Pipe the data through the transform stream before writing to the destination file
readStream.pipe(transformStream).pipe(writeStream);

// Handle events when the copying is complete
readStream.on('end', () => {
    console.log('File duplication completed.');
});

writeStream.on('finish', () => {
    console.log('Data has been written to the destination file.');
});

// Handle errors during the process
readStream.on('error', (err) => {
    console.error('Error reading from the source file:', err.message);
});

transformStream.on('error', (err) => {
    console.error('Error transforming data:', err.message);
});