lundi 19 septembre 2016

AsserionError when testing two Pandas DataFrames

I am building a small test class to test a pandas heavy script. The script takes an xml file as input, however for my test class I made .data files out of the element attributes to easily load them in to a dict object.

class MetricsTest(TestCase):


    @classmethod
    def setUpClass(cls):
        def get_files(dir_path):
            return [join(dir_path, f)
                    for f in listdir(dir_path) if isfile(join(dir_path, f))]

        super(MetricsTest, cls).setUpClass()


        cls.data_files = ['p1-left-left', 'p2-left-right', 'p3-left-left','p4-left-right', 'p5-left-left', 'p6-left-right']

        file_name = 'TEST_p_stats.xml'
        file_path = os.path.join(
            os.path.dirname(os.path.realpath(__file__)),
            'test_files/games/{}'.format(
                file_name))

        dir_path = os.path.dirname(file_path)
        cls.files = get_files(dir_path)

        cls.metrics = Metrics(cls.files)
        #cls.metrics.run()
        cls.data = dict()
        xml = xml_parse(file_path)
        cls.xml = xml
        cls.df = dataframe_from_clusters(xml['p'])

    def setup_default_df(self):
        data = []
        for f in self.data_files:
            _data = []
            with open(os.path.join(
                    os.path.dirname(os.path.realpath(__file__)),
                    'test_files/games_data/{}.data'.format(f))) as _f:
                _data.append(dict(x.replace('\n','').split(None, 1) for x in _f))
            data.append(_data)
        return dataframe_from_clusters(data)

    def assertFrameEqual(self, df1, df2):
        """
        Assert that two dataframes are equal,
        ignoring ordering of columns"""
        return assert_frame_equal(df1.sort(axis=1), df2.sort(axis=1),
                                  check_names=True)

    def test_filter_df_no_direction(self):
        actual_df = self.df

        expected_df = self.setup_default_df()
        self.assertFrameEqual(expected_df, self.df)

However this gives me an error of

  File "das/src/testing.pyx", line 58, in pandas._testing.assert_almost_equal (pandas/src/testing.c:2758)
  File "das/src/testing.pyx", line 93, in pandas._testing.assert_almost_equal (pandas/src/testing.c:1843)
  File "das/src/testing.pyx", line 135, in pandas._testing.assert_almost_equal (pandas/src/testing.c:2527)
AssertionError: (very low values) expected 1.00000 but got 0.00000, with decimal 5

The code of dataframe_from_clusters function is

def dataframe_from_clusters(clusters):
    df = pd.DataFrame()

    for (idx, cluster) in enumerate(clusters):
        cluster_df = pd.DataFrame(cluster)
        cluster_df["cluster"] = idx
        df = pd.concat([df, cluster_df], ignore_index=True)

    return df

Aucun commentaire:

Enregistrer un commentaire