copy a dataframe to new variable with method chaining

109 Views Asked by At

Is it possible to copy a dataframe in the middle of a method chain to a new variable? Something like:

import pandas as pd

df = (pd.DataFrame([[2, 4, 6],
                    [8, 10, 12],
                    [14, 16, 18],
                    ])
      .assign(something_else=100)
      .div(2)
      .copy_to_new_variable(df_imag)  # Imaginated method to copy df to df_imag.
      .div(10)
      )

print(df_imag) would then return:

    0   1   2   something_else
0   1.0 2.0 3.0 50.0
1   4.0 5.0 6.0 50.0
2   7.0 8.0 9.0 50.0

.copy_to_new_variable(df_imag) could be replaced by df_imag = df.copy() but this would result in compromising the method chain.

2

There are 2 best solutions below

2
mozway On BEST ANSWER

Creating variables dynamically is not a good idea, but you can easily take advantage of mutable objects like dictionaries.

Adding a new DataFrame method to do this seamlessly:

from pandas.core.base import PandasObject

### this only needs to be done once per session
def to_name(df, dic, name, copy=False):
    dic[name] = df.copy() if copy else df
    return df
    
PandasObject.to_name = to_name
###

tmp = {}

df = (pd.DataFrame([[2, 4, 6],
                    [8, 10, 12],
                    [14, 16, 18],
                    ])
      .assign(something_else=100)
      .div(2)
      .to_name(tmp, 'after_div2', copy=True)
      .div(10)
      )

print(tmp['after_div2'])

print(df)

Output:

# tmp['after_div2']
     0    1    2  something_else
0  1.0  2.0  3.0            50.0
1  4.0  5.0  6.0            50.0
2  7.0  8.0  9.0            50.0

# df
     0    1    2  something_else
0  0.1  0.2  0.3             5.0
1  0.4  0.5  0.6             5.0
2  0.7  0.8  0.9             5.0

If you don't want to monkey patch the DataFrame objects, use pipe:

def to_name(df, dic, name, copy=False):
    dic[name] = df.copy() if copy else df
    return df

tmp = {}

df = (pd.DataFrame([[2, 4, 6],
                    [8, 10, 12],
                    [14, 16, 18],
                    ])
      .assign(something_else=100)
      .div(2)
      .pipe(to_name, tmp, 'after_div2')
      .div(10)
      .pipe(lambda df: print('\nQuick alternative:', df, sep='\n') or df)
      )

print(tmp['after_div2'])

printing

In the same line you can also add a chainable print method, or again use a lambda in pipe:

from pandas.core.base import PandasObject

### this only needs to be done once per session
def df_print(df, *args):
    if args:
        print(*args)
    print(df)
    return df
    
PandasObject.print = df_print
###

df = (pd.DataFrame([[2, 4, 6],
                    [8, 10, 12],
                    [14, 16, 18],
                    ])
      .print()
      .assign(something_else=100)
      .div(2)
      .print('\nAfter 2:')
      .div(10)
      .pipe(lambda df: print('\nQuick alternative:', df, sep='\n') or df)
      )

Output:

    0   1   2
0   2   4   6
1   8  10  12
2  14  16  18

After 2:
     0    1    2  something_else
0  1.0  2.0  3.0            50.0
1  4.0  5.0  6.0            50.0
2  7.0  8.0  9.0            50.0

Quick alternative:
     0    1    2  something_else
0  0.1  0.2  0.3             5.0
1  0.4  0.5  0.6             5.0
2  0.7  0.8  0.9             5.0

As a module

You could also create a module:

pandas_debug.py

from pandas.core.base import PandasObject

def df_print(df, *args):
    if args:
        print(*args)
    print(df)
    return df
    
PandasObject.print = df_print

def to_name(df, dic, name, copy=False):
    dic[name] = df.copy() if copy else df
    return df

PandasObject.to_name = to_name

Then in your code:

import pandas as pd
import pandas_debug

tmp = {}
df = (pd.DataFrame([[2, 4, 6],
                    [8, 10, 12],
                    [14, 16, 18],
                    ])
      .assign(something_else=100)
      .div(2)
      .to_name(tmp, 'after_div2')
      .div(10)
      .print()
      )
1
Andrej Kesely On

Use := operator:

df = (df_imag := df.assign(new_var=100).div(2)).div(10)
print(df)
print(df_imag)

Prints:

     0    1    2  new_var
0  0.1  0.2  0.3      5.0
1  0.4  0.5  0.6      5.0
2  0.7  0.8  0.9      5.0

     0    1    2  new_var
0  1.0  2.0  3.0     50.0
1  4.0  5.0  6.0     50.0
2  7.0  8.0  9.0     50.0