I am trying to create a pyarrow table and then write that into parquet files.
def test_pyarow():
import pyarrow as pa
import pyarrow.parquet
import pandas as pd
fields = [pa.field('id', pa.string(), nullable=False),
pa.field('name', pa.string(), nullable=False)]
array = [pa.array(['10', '11', '12', '13']),
pa.array(['AAA', None, 'BBB', 'CCC'])]
table = pa.Table.from_arrays(array, schema=pa.schema(fields))
pyarrow.parquet.write_table(table, 'test_arrow.parquet', compression='SNAPPY', use_compliant_nested_type=True)
df = pd.read_parquet("/Users/fki/Documents/git/Demo/bq_api/test_arrow.parquet", engine='pyarrow')
print("\n\n\n")
print(df)
when nullable is True:
id name
0 10 AAA
1 11 None
2 12 BBB
3 13 CCC
when nullable is False:
id name
0 10 AAA
1 11 BBB
2 12 CCC
3 13 AAA