I have the following code:
using var filestream = new FileStream(filename, FileMode.Open, FileAccess.ReadWrite, FileShare.Write);
using var memorystream = new MemoryStream();
using var reader = await ParquetReader.CreateAsync(filestream);
using var writer = await ParquetWriter.CreateAsync(reader.Schema, memorystream);
Console.WriteLine($"Reading file {filename}");
Console.WriteLine(string.Empty);
var tasks = new List<Task>();
for (int i = 0; i < reader.RowGroupCount; i++)
{
Console.SetCursorPosition(0, Console.CursorTop - 1);
Console.Write(Enumerable.Repeat(' ', Console.BufferWidth).ToArray());
Console.WriteLine($"\rReading row group {i + 1} of {reader.RowGroupCount}");
Console.WriteLine(string.Empty);
var table = await reader.ReadAsTableAsync(rowGroupIndex: i);
foreach (var row in table)
{
Console.SetCursorPosition(0, Console.CursorTop - 1);
Console.Write(Enumerable.Repeat(' ', Console.BufferWidth).ToArray());
Console.WriteLine($"\rProcessing row {table.IndexOf(row) + 1} of {table.Count}");
var row1 = row.GetString(0);
var row2 = row.GetString(1);
row[0] = NetCodeParser.RemoveComments(row1);
row[1] = NetCodeParser.RemoveComments(row2);
}
using (var groupWriter = writer.CreateRowGroup())
{
await groupWriter.WriteAsync(table);
}
Console.SetCursorPosition(0, Console.CursorTop - 1);
}
var tempFile = Path.GetRandomFileName();
using var tempStream = new FileStream(tempFile, FileMode.OpenOrCreate, FileAccess.ReadWrite, FileShare.ReadWrite);
memorystream.WriteTo(tempStream);
try
{
_ = await ParquetReader.CreateAsync(tempFile);
}
catch (IOException)
{
Console.WriteLine("File modifications failed");
}
memorystream.WriteTo(filestream);
Console.WriteLine("Successfully updated parquet file");
writer.Dispose();
I don't know why, when I save a file, the file gets corrupted. This is the message I get back:
I would think that I'm making the modifications correctly, and that it should result in a proper file, but I don't see what I'm doing wrong.