How to write a function that works on both Numpy arrays and Pandas series, returning the same type

66 Views Asked by At

A feature of some numpy functions is that they work on both arrays and Pandas series:

my_series = pd.Series([10, 20, 30], index=([2000, 2001, 2002]), name='My series')
my_array = np.array([10, 20, 30])

print(np.cumsum(my_series))
print(np.cumsum(my_array))

Output:

2000    10
2001    30
2002    60
Name: My series, dtype: int64
[10 30 60]

How is this achieved and can I write my own functions in a way that does this?

As an example, let's say I have a Python function that takes an array and returns an array of the same length:

def my_func(x):
    a = np.empty_like(x)
    b = 0
    for i in range(len(x)):
        b += x[i]
        a[i] = b
    return a

How can I generalize it so that it either returns an array or a Pandas series/dataframe if one was passed?

I realize I could do the following, but I suspect this is not how it is done in the Numpy example above:

def my_func_for_array_or_series(x):
    try:
        a = x.copy()
        a[:] = my_func(x.values)
    except AttributeError:
        a = my_func(x)
    return a
1

There are 1 best solutions below

0
Bill On BEST ANSWER

Here's a workaround utilizing a decorator.

import numpy as np
import pandas as pd
import functools


def apply_to_pandas(func):
    @functools.wraps(func)
    def wrapper_func(x, *args, **kwargs):
        out = x.copy()
        out[:] = np.apply_along_axis(func, 0, x, *args, **kwargs)
        return out
    return wrapper_func


@apply_to_pandas
def my_func(x):
    a = np.empty_like(x)
    b = 0
    for i in range(len(x)):
        b += x[i]
        a[i] = b
    return a


# Test
my_array = np.array([10, 20, 30])
my_series = pd.Series([10, 20, 30], index=[2000, 2001, 2002], name='My series')
my_df = pd.concat([my_series] * 3, axis=1)

for item in [my_array, my_series, my_df]:
    print(my_func(item), end='\n\n')

Output:

[10 30 60]

2000    10
2001    30
2002    60
Name: My series, dtype: int64

      My series  My series  My series
2000         10         10         10
2001         30         30         30
2002         60         60         60

Here is a more verbose but efficient version of the decorator that doesn't make an unnecessary copy of the data:

def apply_to_pandas(func):
    @functools.wraps(func)
    def wrapper_func(x, *args, **kwargs):
        if isinstance(x, (np.ndarray, list)):
            out = func(x, *args, **kwargs)
        else:
            out = x.copy(deep=False)
            out[:] = np.apply_along_axis(func, 0, x, *args, **kwargs)
        return out
    return wrapper_func