BUG: barplot with NaNs

TomAugspurger · TomAugspurger · commit 4089cacbbd5f · 2014-09-06T19:29:07.000-05:00
diff --git a/doc/source/v0.15.0.txt b/doc/source/v0.15.0.txt
@@ -670,3 +670,4 @@ Bug Fixes
 
 - Bug with kde plot and NaNs (:issue:`8182`)
 - Bug in ``GroupBy.count`` with float32 data type were nan values were not excluded (:issue:`8169`).
+- Bug with stacked barplots and NaNs (:issue:`8175`).
diff --git a/doc/source/visualization.rst b/doc/source/visualization.rst
@@ -203,6 +203,13 @@ To get horizontal bar plots, pass ``kind='barh'``:
    @savefig barh_plot_stacked_ex.png
    df2.plot(kind='barh', stacked=True);
 
+Pandas tries to be pragmatic about plotting DataFrames or Series
+that contain missing data. When it makes sense missing values will
+be filled with 0 (as is the case with bar plots). For other plots,
+like line plots, filling in 0 usually doesn't make sense, and so pandas doesn't
+try to guess. If you want to be explicit you can always call
+:meth:`~pandas.DataFrame.fillna` before plotting.
+
 .. _visualization.hist:
 
 Histograms
diff --git a/pandas/tests/test_graphics.py b/pandas/tests/test_graphics.py
@@ -1479,6 +1479,23 @@ def test_bar_bottom_left(self):
             result = [p.get_x() for p in ax.patches]
             self.assertEqual(result, [1] * 5)
 
+    @slow
+    def test_bar_nan(self):
+        df = DataFrame({'A': [10, np.nan, 20], 'B': [5, 10, 20],
+                        'C': [1, 2, 3]})
+        ax = df.plot(kind='bar')
+        expected = [10, 0, 20, 5, 10, 20, 1, 2, 3]
+        result = [p.get_height() for p in ax.patches]
+        self.assertEqual(result, expected)
+
+        ax = df.plot(kind='bar', stacked=True)
+        result = [p.get_height() for p in ax.patches]
+        self.assertEqual(result, expected)
+
+        result = [p.get_y() for p in ax.patches]
+        expected = [0.0, 0.0, 0.0, 10.0, 0.0, 20.0, 15.0, 10.0, 40.0]
+        self.assertEqual(result, expected)
+
     @slow
     def test_plot_scatter(self):
         df = DataFrame(randn(6, 4),
diff --git a/pandas/tools/plotting.py b/pandas/tools/plotting.py
@@ -870,9 +870,11 @@ def _validate_color_args(self):
                                  " use one or the other or pass 'style' "
                                  "without a color symbol")
 
-    def _iter_data(self, data=None, keep_index=False):
+    def _iter_data(self, data=None, keep_index=False, fillna=None):
         if data is None:
             data = self.data
+        if fillna is not None:
+            data = data.fillna(fillna)
 
         from pandas.core.frame import DataFrame
         if isinstance(data, (Series, np.ndarray, Index)):
@@ -1780,7 +1782,7 @@ def _make_plot(self):
         pos_prior = neg_prior = np.zeros(len(self.data))
         K = self.nseries
 
-        for i, (label, y) in enumerate(self._iter_data()):
+        for i, (label, y) in enumerate(self._iter_data(fillna=0)):
             ax = self._get_ax(i)
             kwds = self.kwds.copy()
             kwds['color'] = colors[i % ncolors]

Original file line number	Diff line number	Diff line change
`@@ -670,3 +670,4 @@ Bug Fixes`
`670`	`670`
`671`	`671`	- Bug with kde plot and NaNs (:issue:`8182`)
`672`	`672`	- Bug in ``GroupBy.count`` with float32 data type were nan values were not excluded (:issue:`8169`).
	`673`	+- Bug with stacked barplots and NaNs (:issue:`8175`).