I am building a small test class to test a pandas
heavy script. The script takes an xml file as input, however for my test class I made .data
files out of the element attributes to easily load them in to a dict object.
class MetricsTest(TestCase):
@classmethod
def setUpClass(cls):
def get_files(dir_path):
return [join(dir_path, f)
for f in listdir(dir_path) if isfile(join(dir_path, f))]
super(MetricsTest, cls).setUpClass()
cls.data_files = ['p1-left-left', 'p2-left-right', 'p3-left-left','p4-left-right', 'p5-left-left', 'p6-left-right']
file_name = 'TEST_p_stats.xml'
file_path = os.path.join(
os.path.dirname(os.path.realpath(__file__)),
'test_files/games/{}'.format(
file_name))
dir_path = os.path.dirname(file_path)
cls.files = get_files(dir_path)
cls.metrics = Metrics(cls.files)
#cls.metrics.run()
cls.data = dict()
xml = xml_parse(file_path)
cls.xml = xml
cls.df = dataframe_from_clusters(xml['p'])
def setup_default_df(self):
data = []
for f in self.data_files:
_data = []
with open(os.path.join(
os.path.dirname(os.path.realpath(__file__)),
'test_files/games_data/{}.data'.format(f))) as _f:
_data.append(dict(x.replace('\n','').split(None, 1) for x in _f))
data.append(_data)
return dataframe_from_clusters(data)
def assertFrameEqual(self, df1, df2):
"""
Assert that two dataframes are equal,
ignoring ordering of columns"""
return assert_frame_equal(df1.sort(axis=1), df2.sort(axis=1),
check_names=True)
def test_filter_df_no_direction(self):
actual_df = self.df
expected_df = self.setup_default_df()
self.assertFrameEqual(expected_df, self.df)
However this gives me an error of
File "das/src/testing.pyx", line 58, in pandas._testing.assert_almost_equal (pandas/src/testing.c:2758)
File "das/src/testing.pyx", line 93, in pandas._testing.assert_almost_equal (pandas/src/testing.c:1843)
File "das/src/testing.pyx", line 135, in pandas._testing.assert_almost_equal (pandas/src/testing.c:2527)
AssertionError: (very low values) expected 1.00000 but got 0.00000, with decimal 5
The code of dataframe_from_clusters
function is
def dataframe_from_clusters(clusters):
df = pd.DataFrame()
for (idx, cluster) in enumerate(clusters):
cluster_df = pd.DataFrame(cluster)
cluster_df["cluster"] = idx
df = pd.concat([df, cluster_df], ignore_index=True)
return df
Aucun commentaire:
Enregistrer un commentaire