Problem modifying a parquet file using parquet-dotnet

89 Views Asked by At

I have the following code:

        using var filestream = new FileStream(filename, FileMode.Open, FileAccess.ReadWrite, FileShare.Write);
        using var memorystream = new MemoryStream();
        using var reader = await ParquetReader.CreateAsync(filestream);
        using var writer = await ParquetWriter.CreateAsync(reader.Schema, memorystream);

        Console.WriteLine($"Reading file {filename}");
        Console.WriteLine(string.Empty);
        var tasks = new List<Task>();
        for (int i = 0; i < reader.RowGroupCount; i++)
        {
            Console.SetCursorPosition(0, Console.CursorTop - 1);
            Console.Write(Enumerable.Repeat(' ', Console.BufferWidth).ToArray());
            Console.WriteLine($"\rReading row group {i + 1} of {reader.RowGroupCount}");
            Console.WriteLine(string.Empty);
            var table = await reader.ReadAsTableAsync(rowGroupIndex: i);
            foreach (var row in table)
            {
                Console.SetCursorPosition(0, Console.CursorTop - 1);
                Console.Write(Enumerable.Repeat(' ', Console.BufferWidth).ToArray());
                Console.WriteLine($"\rProcessing row {table.IndexOf(row) + 1} of {table.Count}");
                var row1 = row.GetString(0);
                var row2 = row.GetString(1);
                row[0] = NetCodeParser.RemoveComments(row1);
                row[1] = NetCodeParser.RemoveComments(row2);
            }
            using (var groupWriter = writer.CreateRowGroup())
            {
                await groupWriter.WriteAsync(table);
            }
            Console.SetCursorPosition(0, Console.CursorTop - 1);
        }
        var tempFile = Path.GetRandomFileName();
        using var tempStream = new FileStream(tempFile, FileMode.OpenOrCreate, FileAccess.ReadWrite, FileShare.ReadWrite);
        memorystream.WriteTo(tempStream);
        try
        {
            _ = await ParquetReader.CreateAsync(tempFile);
        }
        catch (IOException)
        {
            Console.WriteLine("File modifications failed");
        }
        memorystream.WriteTo(filestream);
        Console.WriteLine("Successfully updated parquet file");
        writer.Dispose();

I don't know why, when I save a file, the file gets corrupted. This is the message I get back: Error Message from Visual Studio

I would think that I'm making the modifications correctly, and that it should result in a proper file, but I don't see what I'm doing wrong.

0

There are 0 best solutions below