how to use "tzdata" file with pyarrow.compute.assume_timezone

738 Views Asked by At

Im trying to use the method pyarrow.compute.assume_timezone but i get the error:

pyarrow.lib.ArrowInvalid: Cannot locate timezone 'UTC': Unable to get Timezone database version from C:\Users\Nick\Downloads\tzdata\

I tried download the db from https://www.iana.org/time-zones without success

Anyone got it work ?

import pyarrow
import pyarrow.compute as pc

import numpy
dt = pyarrow.array([numpy.datetime64("2022-10-10T12:00:12.123456789")], pyarrow.timestamp("ns"))
print(pc.assume_timezone(dt, "UTC"))
1

There are 1 best solutions below

0
Devyl On

Indeed there is doc how to install in Arrow, thanks @Matt Johnson-Pint

I made a script to install it if anyone wants

def download_tzdata_windows(
    base_dir=None,
    year=2022,
    name="tzdata"
):
    import os
    import tarfile
    import urllib3

    http = urllib3.PoolManager()
    folder = base_dir if base_dir else os.path.join(os.path.expanduser('~'), "Downloads")
    tz_path = os.path.join(folder, "tzdata.tar.gz")
    
    with open(tz_path, "wb") as f:
        f.write(http.request('GET', f'https://data.iana.org/time-zones/releases/tzdata{year}f.tar.gz').data)
    
    folder = os.path.join(folder, name)
    
    if not os.path.exists(folder):
        os.makedirs(folder)
    
    tarfile.open(tz_path).extractall(folder)
    
    with open(os.path.join(folder, "windowsZones.xml"), "wb") as f:
        f.write(http.request('GET', f'https://raw.githubusercontent.com/unicode-org/cldr/master/common/supplemental/windowsZones.xml').data)
download_tzdata_windows(year=2022)