|
4 | 4 | from ydata_profiling import ProfileReport
|
5 | 5 |
|
6 | 6 |
|
7 |
| -# Generating dummy data |
8 |
| -def generate_cat_data_series(categories): |
9 |
| - dummy_data = [] |
10 |
| - for cat, i in categories.items(): |
11 |
| - dummy_data.extend([cat, ] * i) # fmt: skip |
12 |
| - return pd.DataFrame({"dummy_cat": dummy_data}) |
13 |
| - |
14 |
| - |
15 |
| -dummy_bool_data = generate_cat_data_series(pd.Series({True: 82, False: 36})) |
16 |
| -dummy_cat_data = generate_cat_data_series( |
17 |
| - pd.Series( |
| 7 | +# Enhanced fixture with more diverse data types |
| 8 | +@pytest.fixture |
| 9 | +def sample_categorical_data(): |
| 10 | + return pd.DataFrame( |
18 | 11 | {
|
19 |
| - "Amadeou_plus": 75, |
20 |
| - "Beta_front": 50, |
21 |
| - "Calciumus": 20, |
22 |
| - "Dimitrius": 1, |
23 |
| - "esperagus_anonymoliumus": 75, |
24 |
| - "FrigaTTTBrigde_Writap": 50, |
25 |
| - "galgarartiy": 30, |
26 |
| - "He": 1, |
27 |
| - "I": 10, |
28 |
| - "JimISGODDOT": 1, |
| 12 | + "dummy_cat": [ |
| 13 | + "Amadeou_plus", |
| 14 | + "Amadeou_plus", |
| 15 | + "Beta_front", |
| 16 | + "Calciumus", |
| 17 | + "Dimitrius", |
| 18 | + "esperagus_anonymoliumus", |
| 19 | + "FrigaTTTBrigde_Writap", |
| 20 | + "galgarartiy", |
| 21 | + "He", |
| 22 | + "I", |
| 23 | + "JimISGODDOT", |
| 24 | + ] |
| 25 | + * 10 |
29 | 26 | }
|
30 | 27 | )
|
31 |
| -) |
32 | 28 |
|
33 | 29 |
|
34 |
| -def generate_report(data): |
35 |
| - return ProfileReport( |
36 |
| - df=data, |
37 |
| - progress_bar=False, |
38 |
| - samples=None, |
39 |
| - correlations=None, |
40 |
| - missing_diagrams=None, |
41 |
| - duplicates=None, |
42 |
| - interactions=None, |
43 |
| - ) |
| 30 | +@pytest.fixture |
| 31 | +def sample_boolean_data(): |
| 32 | + return pd.DataFrame({"dummy_bool": [True] * 82 + [False] * 36}) |
| 33 | + |
| 34 | + |
| 35 | +def generate_cat_data_series(categories): |
| 36 | + """Helper function to generate categorical data""" |
| 37 | + dummy_data = [] |
| 38 | + for cat, i in categories.items(): |
| 39 | + dummy_data.extend([cat] * i) |
| 40 | + return pd.DataFrame({"dummy_cat": dummy_data}) |
44 | 41 |
|
45 | 42 |
|
46 |
| -# Unit tests |
47 |
| -# - Test category frequency plots general options |
48 |
| -@pytest.mark.parametrize("data", [dummy_bool_data, dummy_cat_data], ids=["bool", "cat"]) |
| 43 | +def generate_report(data, **kwargs): |
| 44 | + """Helper function to generate report with common settings""" |
| 45 | + default_settings = { |
| 46 | + "progress_bar": False, |
| 47 | + "samples": None, |
| 48 | + "correlations": None, |
| 49 | + "missing_diagrams": None, |
| 50 | + "duplicates": None, |
| 51 | + "interactions": None, |
| 52 | + } |
| 53 | + default_settings.update(kwargs) |
| 54 | + return ProfileReport(df=data, **default_settings) |
| 55 | + |
| 56 | + |
| 57 | +# Test category frequency plots general options |
| 58 | +@pytest.mark.parametrize( |
| 59 | + "data_fixture", |
| 60 | + ["sample_boolean_data", "sample_categorical_data"], |
| 61 | + ids=["boolean", "categorical"], |
| 62 | +) |
49 | 63 | @pytest.mark.parametrize("plot_type", ["bar", "pie"])
|
50 |
| -def test_deactivated_cat_frequency_plot(data, plot_type): |
| 64 | +def test_deactivated_cat_frequency_plot(data_fixture, plot_type, request): |
| 65 | + data = request.getfixturevalue(data_fixture) |
51 | 66 | profile = generate_report(data)
|
52 | 67 | profile.config.plot.cat_freq.show = False
|
53 | 68 | profile.config.plot.cat_freq.type = plot_type
|
54 | 69 | html_report = profile.to_html()
|
55 | 70 | assert "Common Values (Plot)" not in html_report
|
56 | 71 |
|
57 | 72 |
|
58 |
| -@pytest.mark.parametrize("data", [dummy_bool_data, dummy_cat_data], ids=["bool", "cat"]) |
59 |
| -def test_cat_frequency_default_barh_plot(data): |
| 73 | +@pytest.mark.parametrize( |
| 74 | + "data_fixture", |
| 75 | + ["sample_boolean_data", "sample_categorical_data"], |
| 76 | + ids=["boolean", "categorical"], |
| 77 | +) |
| 78 | +def test_cat_frequency_default_barh_plot(data_fixture, request): |
| 79 | + data = request.getfixturevalue(data_fixture) |
60 | 80 | profile = generate_report(data)
|
61 | 81 | html_report = profile.to_html()
|
62 | 82 | assert "Common Values (Plot)" in html_report
|
63 | 83 |
|
64 | 84 |
|
65 |
| -@pytest.mark.parametrize("data", [dummy_bool_data, dummy_cat_data], ids=["bool", "cat"]) |
66 |
| -def test_cat_frequency_pie_plot(data): |
| 85 | +@pytest.mark.parametrize( |
| 86 | + "data_fixture", |
| 87 | + ["sample_boolean_data", "sample_categorical_data"], |
| 88 | + ids=["boolean", "categorical"], |
| 89 | +) |
| 90 | +def test_cat_frequency_pie_plot(data_fixture, request): |
| 91 | + data = request.getfixturevalue(data_fixture) |
67 | 92 | profile = generate_report(data)
|
68 | 93 | profile.config.plot.cat_freq.type = "pie"
|
69 | 94 | html_report = profile.to_html()
|
70 | 95 | assert "pie" in html_report
|
71 | 96 |
|
72 | 97 |
|
73 | 98 | @pytest.mark.parametrize("plot_type", ["bar", "pie"])
|
74 |
| -def test_max_nuique_smaller_than_unique_cats(plot_type): |
75 |
| - profile = generate_report(dummy_cat_data) |
76 |
| - profile.config.plot.cat_freq.max_unique = 2 # smaller than the number of categories |
| 99 | +def test_max_unique_categories(plot_type): |
| 100 | + # Test with different numbers of unique categories |
| 101 | + categories = {f"cat_{i}": 5 for i in range(10)} |
| 102 | + data = generate_cat_data_series(categories) |
| 103 | + profile = generate_report(data) |
| 104 | + profile.config.plot.cat_freq.max_unique = 5 |
77 | 105 | profile.config.plot.cat_freq.type = plot_type
|
78 | 106 | html_report = profile.to_html()
|
| 107 | + |
| 108 | + # Should not show plot when unique categories exceed max_unique |
79 | 109 | assert "Common Values (Plot)" not in html_report
|
80 | 110 |
|
81 | 111 |
|
82 |
| -# - Test category frequency plots color options |
83 |
| -@pytest.mark.parametrize("plot_type", ["bar", "pie"]) |
84 |
| -def test_cat_frequency_with_custom_colors(plot_type): |
85 |
| - test_data = generate_cat_data_series(pd.Series({"A": 10, "B": 10, "C": 10})) |
86 |
| - custom_colors = {"gold": "#ffd700", "b": "#0000ff", "#FF796C": "#ff796c"} |
| 112 | +def test_more_categories_than_colors(): |
| 113 | + # Test handling when there are more categories than defined colors |
| 114 | + test_data = generate_cat_data_series({f"cat_{i}": 10 for i in range(5)}) |
| 115 | + custom_colors = ["gold", "blue", "coral"] |
| 116 | + |
87 | 117 | profile = generate_report(test_data)
|
88 |
| - profile.config.plot.cat_freq.colors = list(custom_colors.keys()) |
89 |
| - profile.config.plot.cat_freq.type = plot_type |
| 118 | + profile.config.plot.cat_freq.colors = custom_colors |
90 | 119 | html_report = profile.to_html()
|
91 |
| - for c, hex_code in custom_colors.items(): |
92 |
| - assert f"fill: {hex_code}" in html_report, f"Missing color code of {c}" |
93 | 120 |
|
| 121 | + # Should still generate plot without errors |
| 122 | + assert "Common Values (Plot)" in html_report |
94 | 123 |
|
95 |
| -def test_more_cats_than_colors(): |
96 |
| - test_data = generate_cat_data_series( |
97 |
| - pd.Series({"A": 10, "B": 10, "C": 10, "D": 10}) |
98 |
| - ) |
99 |
| - custom_colors = {"gold": "#ffd700", "b": "#0000ff", "#FF796C": "#ff796c"} |
| 124 | + |
| 125 | +@pytest.mark.skip("Skipping empty color list test. Code needs to be updated.") |
| 126 | +def test_empty_color_list(): |
| 127 | + # Test behavior with empty color list |
| 128 | + test_data = generate_cat_data_series({"A": 10, "B": 10}) |
100 | 129 | profile = generate_report(test_data)
|
101 |
| - profile.config.plot.cat_freq.colors = list(custom_colors.keys()) |
| 130 | + profile.config.plot.cat_freq.colors = [] |
102 | 131 | html_report = profile.to_html()
|
103 |
| - assert "Common Values (Plot)" in html_report # just check that it worked |
104 | 132 |
|
| 133 | + # Should use default colors |
| 134 | + assert "Common Values (Plot)" in html_report |
| 135 | + |
| 136 | + |
| 137 | +@pytest.mark.parametrize("invalid_type", ["scatter", "box", "invalid"]) |
| 138 | +def test_invalid_plot_types(invalid_type): |
| 139 | + test_data = generate_cat_data_series({"A": 10, "B": 10}) |
105 | 140 |
|
106 |
| -# - Test exceptions |
107 |
| -@pytest.mark.parametrize("data", [dummy_bool_data, dummy_cat_data], ids=["bool", "cat"]) |
108 |
| -def test_exception_with_invalid_cat_freq_type(data): |
109 |
| - profile = generate_report(data) |
110 |
| - profile.config.plot.cat_freq.type = "box" |
111 | 141 | with pytest.raises(ValueError):
|
| 142 | + profile = generate_report(test_data) |
| 143 | + profile.config.plot.cat_freq.type = invalid_type |
112 | 144 | profile.to_html()
|
| 145 | + |
| 146 | + |
| 147 | +def test_config_persistence(): |
| 148 | + # Test that plot configuration persists after cache invalidation |
| 149 | + test_data = generate_cat_data_series({"A": 10, "B": 10}) |
| 150 | + profile = generate_report(test_data) |
| 151 | + profile.config.plot.cat_freq.type = "pie" |
| 152 | + profile.config.plot.cat_freq.colors = ["gold", "blue"] |
| 153 | + |
| 154 | + # Cache invalidation shouldn't affect config |
| 155 | + profile.invalidate_cache() |
| 156 | + html_report = profile.to_html() |
| 157 | + assert "pie" in html_report |
| 158 | + assert "fill: #ffd700" in html_report |
0 commit comments