mock_open with variable read_data

150 Views Asked by At

I tried looking through the similar questions, but couldn't find an answer that answers my question.

I have a function like so:

from smart_open import open
import pandas as pd
def foo():
  with open('file1.txt', 'r') as f:
    df1 = pd.read_csv(f)
  value1 = do_something(df1)

  with open('file2.txt', 'r') as f:
    df2 = pd.read_csv(f)
  value2 = do_something_else(df2)
  
  return value1, value2
     

Now I want to test this, without having to read the actual files. So I am looking at using mock_open inside a patch to mock the return of open, so that I can ingest different data for the files.

The question: How do I ensure that I get two different files depending on the filename provided to the open function?

This is my current code:

def read_sample_data(*args, **kwargs):
        # depending on filepath return different sample data
        df = None
        if args[0] == 'file1.txt':
            df = pd.DataFrame({'test': [1,2]})
        elif args[0] == 'file2.txt':
            df = pd.DataFrame({'test': [3,4]})
        return df 

def forward_filename(*args, **kwargs):
        return mock_open(read_data=args[0])

@patch('__main__.pd.read_csv', side_effect=read_sample_data)
@patch("__main__.open", new_callable=forward_filename)
def test_foo(self, open, read_csv):
        value1, value2 = foo()
        #...do some testing

Just to clarify: The code fails with an error. I tried return the filename directly as str in the with open() context, but that fails because string objects are not allowed in context vars.

I am guessing the solution is simple, so I am grateful for any tips :)

1

There are 1 best solutions below

2
On BEST ANSWER

I guess the easiest way is to supply an iterable of the desired dataframes as side_effect to the read_csv mock (and verify that open was called with the intended filenames):

import unittest
from unittest.mock import patch, call, MagicMock
import pandas as pd
from smart_open import open

def foo():
    with open('file1.txt', 'r') as f:
        df1 = pd.read_csv(f)
    value1 = df1.sum().sum()

    with open('file2.txt', 'r') as f:
        df2 = pd.read_csv(f)
    value2 = df2.sum().sum()
  
    return value1, value2

class Test(unittest.TestCase):

    @patch('__main__.pd.read_csv', side_effect=[pd.DataFrame({'test': [1, 2]}), pd.DataFrame({'test': [3, 4]})])
    @patch("__main__.open")
    def test_foo(self, open, read_csv):
        value1, value2 = foo()
        self.assertEqual(value1, 3)
        self.assertEqual(value2, 7)
        self.assertEqual(open.call_args_list, [call('file1.txt', 'r'), call('file2.txt', 'r')])

unittest.TextTestRunner().run(Test('test_foo'))

If you want to stick with your original approach you'll need to mock the __enter__ method of your mock:

class Test(unittest.TestCase):
    
    def read_sample_data(*args, **kwargs):
        # depending on filepath return different sample data
        df = None
        if args[0] == 'file1.txt':
            df = pd.DataFrame({'test': [1,2]})
        elif args[0] == 'file2.txt':
            df = pd.DataFrame({'test': [3,4]})
        return df 

    def side_effect(*args, **kwargs):
        m = MagicMock()
        m.__enter__ = MagicMock(return_value=args[0])
        return m

    @patch('__main__.pd.read_csv', side_effect=read_sample_data)
    @patch("__main__.open", side_effect=side_effect)
    def test_foo(self, open, read_csv):
        value1, value2 = foo()
        self.assertEqual(value1, 3)
        self.assertEqual(value2, 7)