diff --git a/CHANGES.rst b/CHANGES.rst
index e7974c8..66c11f3 100644
--- a/CHANGES.rst
+++ b/CHANGES.rst
@@ -70,7 +70,12 @@ Bug Fixes
   peaks. Previously for Gaussian, the entire fit failed. [#205, #206]
 
 - Fixed input of `traces` in `Background`. Added a condition to 'FlatTrace' that
-  trace position must be a positive number. [#211]
+
+- Fix in FitTrace to set fully-masked column bin peaks to NaN. Previously, for
+  peak_method='max' these were set to 0.0, and for peak_method='centroid' they
+  were set to the number of rows in the image, biasing the final fit to all bin
+  peaks. [#206]
+
 
 Other changes
 ^^^^^^^^^^^^^
diff --git a/specreduce/background.py b/specreduce/background.py
index 5b8c6b6..29c81ca 100644
--- a/specreduce/background.py
+++ b/specreduce/background.py
@@ -32,9 +32,9 @@ class Background(_ImageParser):
     ----------
     image : `~astropy.nddata.NDData`-like or array-like
         image with 2-D spectral image data
-    traces : trace, int, float (single or list)
+    traces : List, `tracing.Trace`, int, float
         Individual or list of trace object(s) (or integers/floats to define
-        FlatTraces) to extract the background. If None, a FlatTrace at the
+        FlatTraces) to extract the background. If None, a `FlatTrace` at the
         center of the image (according to `disp_axis`) will be used.
     width : float
         width of extraction aperture in pixels
@@ -44,8 +44,22 @@ class Background(_ImageParser):
         pixels.
     disp_axis : int
         dispersion axis
+        [default: 1]
     crossdisp_axis : int
         cross-dispersion axis
+        [default: 0]
+    mask_treatment : string, optional
+        The method for handling masked or non-finite data. Choice of `filter`,
+        `omit`, or `zero-fill`. If `filter` is chosen, masked and non-finite
+        data will not contribute to the background statistic that is calculated
+        in each column along `disp_axis`. If `omit` is chosen, columns along
+        disp_axis with any masked/non-finite data values will be fully masked
+        (i.e, 2D mask is collapsed to 1D and applied). If `zero-fill` is chosen,
+        masked/non-finite data will be replaced with 0.0 in the input image,
+        and the mask will then be dropped. For all three options, the input mask
+        (optional on input NDData object) will be combined with a mask generated
+        from any non-finite values in the image data.
+        [default: ``filter``]
     """
     # required so numpy won't call __rsub__ on individual elements
     # https://stackoverflow.com/a/58409215
@@ -57,6 +71,8 @@ class Background(_ImageParser):
     statistic: str = 'average'
     disp_axis: int = 1
     crossdisp_axis: int = 0
+    mask_treatment: str = 'filter'
+    _valid_mask_treatment_methods = ('filter', 'omit', 'zero-fill')
 
     # TO-DO: update bkg_array with Spectrum1D alternative (is bkg_image enough?)
     bkg_array = deprecated_attribute('bkg_array', '1.3')
@@ -82,9 +98,29 @@ def __post_init__(self):
             dispersion axis
         crossdisp_axis : int
             cross-dispersion axis
+        mask_treatment : string
+            The method for handling masked or non-finite data. Choice of `filter`,
+            `omit`, or `zero-fill`. If `filter` is chosen, masked/non-finite data
+            will be filtered during the fit to each bin/column (along disp. axis) to
+            find the peak. If `omit` is chosen, columns along disp_axis with any
+            masked/non-finite data values will be fully masked (i.e, 2D mask is
+            collapsed to 1D and applied). If `zero-fill` is chosen, masked/non-finite
+            data will be replaced with 0.0 in the input image, and the mask will then
+            be dropped. For all three options, the input mask (optional on input
+            NDData object) will be combined with a mask generated from any non-finite
+            values in the image data.
         """
+
+        # Parse image, including masked/nonfinite data handling based on
+        # choice of `mask_treatment`. Any uncaught nonfinte data values will be
+        # masked as well. Returns a Spectrum1D.
         self.image = self._parse_image(self.image)
 
+        # always work with masked array, even if there is no masked
+        # or nonfinite data, in case padding is needed. if not, mask will be
+        # dropped at the end and a regular array will be returned.
+        img = np.ma.masked_array(self.image.data, self.image.mask)
+
         if self.width < 0:
             raise ValueError("width must be positive")
         if self.width == 0:
@@ -95,9 +131,13 @@ def __post_init__(self):
 
         bkg_wimage = np.zeros_like(self.image.data, dtype=np.float64)
         for trace in self.traces:
+            # note: ArrayTrace can have masked values, but if it does a MaskedArray
+            # will be returned so this should be reflected in the window size here
+            # (i.e, np.nanmax is not required.)
             windows_max = trace.trace.data.max() + self.width/2
             windows_min = trace.trace.data.min() - self.width/2
-            if windows_max >= self.image.shape[self.crossdisp_axis]:
+
+            if windows_max > self.image.shape[self.crossdisp_axis]:
                 warnings.warn("background window extends beyond image boundaries " +
                               f"({windows_max} >= {self.image.shape[self.crossdisp_axis]})")
             if windows_min < 0:
@@ -115,6 +155,9 @@ def __post_init__(self):
             raise ValueError("background regions overlapped")
         if np.any(np.sum(bkg_wimage, axis=self.crossdisp_axis) == 0):
             raise ValueError("background window does not remain in bounds across entire dispersion axis")  # noqa
+        # check if image contained within background window is fully-nonfinite and raise an error
+        if np.all(img.mask[bkg_wimage > 0]):
+            raise ValueError("Image is fully masked within background window determined by `width`.")  # noqa
 
         if self.statistic == 'median':
             # make it clear in the expose image that partial pixels are fully-weighted
@@ -122,20 +165,16 @@ def __post_init__(self):
 
         self.bkg_wimage = bkg_wimage
 
-        # mask user-highlighted and invalid values (if any) before taking stats
-        or_mask = (np.logical_or(~np.isfinite(self.image.data), self.image.mask)
-                   if self.image.mask is not None
-                   else ~np.isfinite(self.image.data))
-
         if self.statistic == 'average':
-            image_ma = np.ma.masked_array(self.image.data, mask=or_mask)
-            self._bkg_array = np.ma.average(image_ma,
+            self._bkg_array = np.ma.average(img,
                                             weights=self.bkg_wimage,
-                                            axis=self.crossdisp_axis).data
+                                            axis=self.crossdisp_axis)
+
         elif self.statistic == 'median':
-            med_mask = np.logical_or(self.bkg_wimage == 0, or_mask)
-            image_ma = np.ma.masked_array(self.image.data, mask=med_mask)
-            self._bkg_array = np.ma.median(image_ma, axis=self.crossdisp_axis).data
+            # combine where background weight image is 0 with image masked (which already
+            # accounts for non-finite data that wasn't already masked)
+            img.mask = np.logical_or(self.bkg_wimage == 0, self.image.mask)
+            self._bkg_array = np.ma.median(img, axis=self.crossdisp_axis)
         else:
             raise ValueError("statistic must be 'average' or 'median'")
 
@@ -204,7 +243,19 @@ def two_sided(cls, image, trace_object, separation, **kwargs):
             dispersion axis
         crossdisp_axis : int
             cross-dispersion axis
+        mask_treatment : string
+            The method for handling masked or non-finite data. Choice of `filter`,
+            `omit`, or `zero-fill`. If `filter` is chosen, masked/non-finite data
+            will be filtered during the fit to each bin/column (along disp. axis) to
+            find the peak. If `omit` is chosen, columns along disp_axis with any
+            masked/non-finite data values will be fully masked (i.e, 2D mask is
+            collapsed to 1D and applied). If `zero-fill` is chosen, masked/non-finite
+            data will be replaced with 0.0 in the input image, and the mask will then
+            be dropped. For all three options, the input mask (optional on input
+            NDData object) will be combined with a mask generated from any non-finite
+            values in the image data.
         """
+
         image = _ImageParser._get_data_from_image(image) if image is not None else cls.image
         kwargs['traces'] = [trace_object-separation, trace_object+separation]
         return cls(image=image, **kwargs)
@@ -241,6 +292,17 @@ def one_sided(cls, image, trace_object, separation, **kwargs):
             dispersion axis
         crossdisp_axis : int
             cross-dispersion axis
+        mask_treatment : string
+            The method for handling masked or non-finite data. Choice of `filter`,
+            `omit`, or `zero-fill`. If `filter` is chosen, masked/non-finite data
+            will be filtered during the fit to each bin/column (along disp. axis) to
+            find the peak. If `omit` is chosen, columns along disp_axis with any
+            masked/non-finite data values will be fully masked (i.e, 2D mask is
+            collapsed to 1D and applied). If `zero-fill` is chosen, masked/non-finite
+            data will be replaced with 0.0 in the input image, and the mask will then
+            be dropped. For all three options, the input mask (optional on input
+            NDData object) will be combined with a mask generated from any non-finite
+            values in the image data.
         """
         image = _ImageParser._get_data_from_image(image) if image is not None else cls.image
         kwargs['traces'] = [trace_object+separation]
diff --git a/specreduce/core.py b/specreduce/core.py
index 1737d0f..4fc6ded 100644
--- a/specreduce/core.py
+++ b/specreduce/core.py
@@ -1,5 +1,6 @@
 # Licensed under a 3-clause BSD style license - see LICENSE.rst
 
+from copy import deepcopy
 import inspect
 from dataclasses import dataclass
 
@@ -67,14 +68,20 @@ def _get_data_from_image(image, disp_axis=1):
         else:  # NDData, including CCDData and Spectrum1D
             img = image.data
 
-        # mask and uncertainty are set as None when they aren't specified upon
-        # creating a Spectrum1D object, so we must check whether these
-        # attributes are absent *and* whether they are present but set as None
-        if getattr(image, 'mask', None) is not None:
-            mask = image.mask
-        else:
-            mask = ~np.isfinite(img)
+        mask = getattr(image, 'mask', None)
+
+        # next, handle masked and nonfinite data in image.
+        # A mask will be created from any nonfinite image data, and combined
+        # with any additional 'mask' passed in. If image is being parsed within
+        # a specreduce operation that has 'mask_treatment' options, this will be
+        # handled as well. Note that input data may be modified if a fill value
+        # is chosen to handle masked data. The returned image will always have
+        # `image.mask` even if there are no nonfinte or masked values.
+        img, mask = self._mask_and_nonfinite_data_handling(image=img, mask=mask)
 
+        # mask (handled above) and uncertainty are set as None when they aren't
+        # specified upon creating a Spectrum1D object, so we must check whether
+        # these attributes are absent *and* whether they are present but set as None
         if getattr(image, 'uncertainty', None) is not None:
             uncertainty = image.uncertainty
         else:
@@ -85,11 +92,106 @@ def _get_data_from_image(image, disp_axis=1):
         spectral_axis = getattr(image, 'spectral_axis',
                                 np.arange(img.shape[disp_axis]) * u.pix)
 
-        return Spectrum1D(img * unit, spectral_axis=spectral_axis,
-                          uncertainty=uncertainty, mask=mask)
+        img = Spectrum1D(img * unit, spectral_axis=spectral_axis,
+                         uncertainty=uncertainty, mask=mask)
+
+        return img
+
+    @staticmethod
+    def _get_data_from_image(image):
+        """Extract data array from various input types for `image`.
+           Retruns `np.ndarray` of image data."""
 
+        if isinstance(image, u.quantity.Quantity):
+            img = image.value
+        if isinstance(image, np.ndarray):
+            img = image
+        else:  # NDData, including CCDData and Spectrum1D
+            img = image.data
         return img
 
+    def _mask_and_nonfinite_data_handling(self, image, mask):
+        """
+        This function handles the treatment of masked and nonfinite data,
+        including input validation.
+
+        All operations in Specreduce can take in a mask for the data as
+        part of the input NDData. Additionally, any non-finite values in the
+        data that aren't in the user-supplied mask will be combined bitwise
+        with the input mask.
+
+        There are three options currently implemented for the treatment
+        of masked and nonfinite data - filter, omit, and zero-fill.
+        Depending on the step, all or a subset of these three options are valid.
+
+        """
+
+        # valid options depend on Specreduce step, and are set accordingly there
+        # for steps that this isn't implemeted for yet, default to 'filter',
+        # which will return unmodified input image and mask
+        mask_treatment = getattr(self, 'mask_treatment', 'filter')
+
+        # make sure chosen option is valid. if _valid_mask_treatment_methods
+        # is not an attribue, proceed with 'filter' to return back inupt data
+        # and mask that is combined with nonfinite data.
+        if mask_treatment is not None:  # None in operations where masks aren't relevant (FlatTrace)
+            valid_mask_treatment_methods = getattr(self, '_valid_mask_treatment_methods', ['filter'])  # noqa
+            if mask_treatment not in valid_mask_treatment_methods:
+                raise ValueError(f"`mask_treatment` must be one of {valid_mask_treatment_methods}")
+
+        # make sure there is always a 'mask', even when all data is unmasked and finite.
+        if mask is not None:
+            mask = self.image.mask
+            # always mask any previously uncaught nonfinite values in image array
+            # combining these with the (optional) user-provided mask on `image.mask`
+            mask = np.logical_or(mask, ~np.isfinite(image))
+        else:
+            mask = ~np.isfinite(image)
+
+        # if mask option is the default 'filter' option, or None,
+        # nothing needs to be done. input mask (combined with nonfinite data)
+        # remains with data as-is.
+
+        if mask_treatment == 'zero-fill':
+            # make a copy of the input image since we will be modifying it
+            image = deepcopy(image)
+
+            # if mask_treatment is 'zero_fill', set masked values to zero in
+            # image data and drop image.mask. note that this is done after
+            # _combine_mask_with_nonfinite_from_data, so non-finite values in
+            # data (which are now in the mask) will also be set to zero.
+            # set masked values to zero
+            image[mask] = 0.
+
+            # masked array with no masked values, so accessing image.mask works
+            # but we don't need the actual mask anymore since data has been set to 0
+            mask = np.zeros(image.shape)
+
+        elif mask_treatment == 'omit':
+            # collapse 2d mask (after accounting for addl non-finite values in
+            # data) to a 1d mask, along dispersion axis, to fully mask columns
+            # that have any masked values.
+
+            # must have a crossdisp_axis specified to use 'omit' optoin
+            if hasattr(self, 'crossdisp_axis'):
+                crossdisp_axis = self.crossdisp_axis
+            if hasattr(self, '_crossdisp_axis'):
+                crossdisp_axis = self._crossdisp_axis
+
+            # create a 1d mask along crossdisp axis - if any column has a single nan,
+            # the entire column should be masked
+            reduced_mask = np.logical_or.reduce(mask,
+                                                axis=crossdisp_axis)
+
+            # back to a 2D mask
+            shape = (image.shape[0], 1) if crossdisp_axis == 0 else (1, image.shape[1])
+            mask = np.tile(reduced_mask, shape)
+
+        # check for case where entire image is masked.
+        if mask.all():
+            raise ValueError('Image is fully masked. Check for invalid values.')
+
+        return image, mask
 
 @dataclass
 class SpecreduceOperation(_ImageParser):
diff --git a/specreduce/extract.py b/specreduce/extract.py
index 4b41ca2..96128af 100644
--- a/specreduce/extract.py
+++ b/specreduce/extract.py
@@ -113,6 +113,10 @@ def _ap_weight_image(trace, width, disp_axis, crossdisp_axis, image_shape):
     for i in range(image_shape[disp_axis]):
         # TODO trace must handle transposed data (disp_axis == 0)
         # pass trace.trace.data[i] to avoid any mask if part of the regions is out-of-bounds
+
+        # ArrayTrace can have nonfinite or masked data in trace, and this will fail,
+        # so figure out how to handle that...
+
         wimage[:, i] = _get_boxcar_weights(trace.trace.data[i], hwidth, image_sizes)
 
     return wimage
@@ -154,6 +158,8 @@ class BoxcarExtract(SpecreduceOperation):
     disp_axis: int = 1
     crossdisp_axis: int = 0
     # TODO: should disp_axis and crossdisp_axis be defined in the Trace object?
+    mask_treatment: str = 'filter'
+    _valid_mask_treatment_methods = ('filter', 'omit', 'zero-fill')
 
     @property
     def spectrum(self):
@@ -176,6 +182,20 @@ def __call__(self, image=None, trace_object=None, width=None,
             dispersion axis [default: 1]
         crossdisp_axis : int, optional
             cross-dispersion axis [default: 0]
+        mask_treatment : string, optional
+            The method for handling masked or non-finite data. Choice of `filter`,
+            `omit`, or `zero-fill`. If `filter` is chosen, masked/non-finite data
+            will be filtered during the fit to each bin/column (along disp. axis) to
+            find the peak. If `omit` is chosen, columns along disp_axis with any
+            masked/non-finite data values will be fully masked (i.e, 2D mask is
+            collapsed to 1D and applied). If `zero-fill` is chosen, masked/non-finite
+            data will be replaced with 0.0 in the input image, and the mask will then
+            be dropped. For all three options, the input mask (optional on input
+            NDData object) will be combined with a mask generated from any non-finite
+            values in the image data. Also note that because binning is an option in
+            FitTrace, that masked data will contribute zero to the sum when binning
+            adjacent columns.
+            [default: ``filter``]
 
 
         Returns
@@ -190,24 +210,24 @@ def __call__(self, image=None, trace_object=None, width=None,
         disp_axis = disp_axis if disp_axis is not None else self.disp_axis
         crossdisp_axis = crossdisp_axis if crossdisp_axis is not None else self.crossdisp_axis
 
-        # handle image processing based on its type
+        # Parse image, including masked/nonfinite data handling based on
+        # choice of `mask_treatment`, which for BoxcarExtract can be filter, zero-fill, or
+        # omit. non-finite data will be masked, always. Returns a Spectrum1D.
         self.image = self._parse_image(image)
 
-        # TODO: this check can be removed if/when implemented as a check in FlatTrace
-        if isinstance(trace_object, FlatTrace):
-            if trace_object.trace_pos < 1:
-                raise ValueError('trace_object.trace_pos must be >= 1')
+        # # _parse_image returns a Spectrum1D. convert this to a masked array
+        # # for ease of calculations here (even if there is no masked data).
+        # img = np.ma.masked_array(self.image.data, self.image.mask)
 
-        if width < 0:
+        if width <= 0:
             raise ValueError("width must be positive")
 
         # weight image to use for extraction
-        wimg = _ap_weight_image(
-            trace_object,
-            width,
-            disp_axis,
-            crossdisp_axis,
-            self.image.shape)
+        wimg = _ap_weight_image(trace_object,
+                                width,
+                                disp_axis,
+                                crossdisp_axis,
+                                self.image.shape)
 
         # extract, assigning no weight to non-finite pixels outside the window
         # (non-finite pixels inside the window will still make it into the sum)
diff --git a/specreduce/tests/test_background.py b/specreduce/tests/test_background.py
index 09364c3..388da5c 100644
--- a/specreduce/tests/test_background.py
+++ b/specreduce/tests/test_background.py
@@ -1,3 +1,5 @@
+from astropy.nddata import NDData
+import astropy.units as u
 import numpy as np
 import pytest
 from specutils import Spectrum1D
@@ -22,13 +24,13 @@ def test_background(mk_test_img_raw, mk_test_spec_no_spectral_axis,
 
     # all the following should be equivalent, whether image's spectral axis
     # is in pixels or physical units:
-    bg1 = Background(image, [trace-bkg_sep, trace+bkg_sep], width=bkg_width)
+    bg1 = Background(image, [trace - bkg_sep, trace + bkg_sep], width=bkg_width)
     bg2 = Background.two_sided(image, trace, bkg_sep, width=bkg_width)
     bg3 = Background.two_sided(image, trace_pos, bkg_sep, width=bkg_width)
     assert np.allclose(bg1.bkg_image().flux, bg2.bkg_image().flux)
     assert np.allclose(bg1.bkg_image().flux, bg3.bkg_image().flux)
 
-    bg4 = Background(image_um, [trace-bkg_sep, trace+bkg_sep], width=bkg_width)
+    bg4 = Background(image_um, [trace - bkg_sep, trace + bkg_sep], width=bkg_width)
     bg5 = Background.two_sided(image_um, trace, bkg_sep, width=bkg_width)
     bg6 = Background.two_sided(image_um, trace_pos, bkg_sep, width=bkg_width)
     assert np.allclose(bg1.bkg_image().flux, bg4.bkg_image().flux)
@@ -72,7 +74,7 @@ def test_background(mk_test_img_raw, mk_test_spec_no_spectral_axis,
     stats = ['average', 'median']
 
     for st in stats:
-        bg = Background(img, trace-bkg_sep, width=bkg_width, statistic=st)
+        bg = Background(img, trace - bkg_sep, width=bkg_width, statistic=st)
         assert np.isnan(bg.image.flux).sum() == 2
         assert np.isnan(bg._bkg_array).sum() == 0
         assert np.isnan(bg.bkg_spectrum().flux).sum() == 0
@@ -99,7 +101,7 @@ def test_warnings_errors(mk_test_spec_no_spectral_axis):
     with pytest.warns(match="background window extends beyond image boundaries"):
         Background.two_sided(image, 7, 5, width=6)
 
-    trace = ArrayTrace(image, trace=np.arange(10)+20)  # from 20 to 29
+    trace = ArrayTrace(image, trace=np.arange(10) + 20)  # from 20 to 29
     with pytest.warns(match="background window extends beyond image boundaries"):
         with pytest.raises(ValueError,
                            match="background window does not remain in bounds across entire dispersion axis"):  # noqa
@@ -112,6 +114,11 @@ def test_warnings_errors(mk_test_spec_no_spectral_axis):
 
 
 def test_trace_inputs(mk_test_img_raw):
+    """
+    Tests for the input argument 'traces' to `Background`. This should accept
+    a list of or a single Trace object, or a list of or a single (positive)
+    number to define a FlatTrace.
+    """
 
     image = mk_test_img_raw
 
@@ -135,3 +142,181 @@ def test_trace_inputs(mk_test_img_raw):
                 'or None to use a FlatTrace in the middle of the image.'
     with pytest.raises(ValueError, match=match_str):
         Background(image, 'non_valid_trace_pos')
+
+
+class TestMasksBackground():
+
+    """
+    Various test functions to test how masked and non-finite data is handled
+    in `Background. There are three currently implemented options for masking
+    in Background: filter, omit, and zero-fill.
+    """
+
+    def mk_img(self, nrows=4, ncols=5, nan_slices=None):
+        """
+        Make a simple gradient image to test masking in Background.
+        Optionally add NaNs to data with `nan_slices`. Returned array is in
+        u.DN.
+        """
+
+        img = np.tile((np.arange(1., ncols + 1)), (nrows, 1))
+
+        if nan_slices:  # add nans in data
+            for s in nan_slices:
+                img[s] = np.nan
+
+        return img * u.DN
+
+    @pytest.mark.parametrize("mask", ["filter", "omit", "zero-fill"])
+    def test_fully_masked_column(self, mask):
+        """
+        Test background with some fully-masked columns (not fully masked image).
+        In this case, the background value for that fully-masked column should
+        be 0.0, with no error or warning raised. This is the case for
+        mask_treatment=filter, omit, or zero-fill.
+        """
+
+        img = self.mk_img(nrows=10, ncols=10)
+        img[:, 0:1] = np.nan
+
+        bkg = Background(img, traces=FlatTrace(img, 6), mask_treatment=mask)
+        assert np.all(bkg.bkg_image().data[:, 0:1] == 0.0)
+
+    @pytest.mark.parametrize("mask", ["filter", "omit"])
+    def test_fully_masked_image(self, mask):
+        """
+        Test that the appropriate error is raised by `Background` when image
+        is fully masked/NaN.
+        """
+
+        with pytest.raises(ValueError, match='Image is fully masked.'):
+            # fully NaN image
+            img = self.mk_img() * np.nan
+            Background(img, traces=FlatTrace(self.mk_img(), 2), mask_treatment=mask)
+
+        with pytest.raises(ValueError, match='Image is fully masked.'):
+            # fully masked image (should be equivalent)
+            img = NDData(np.ones((4, 5)), mask=np.ones((4, 5)))
+            Background(img, traces=FlatTrace(self.mk_img(), 2), mask_treatment=mask)
+
+        # Now test that an image that isn't fully masked, but is fully masked
+        # within the window determined by `width`, produces the correct result.
+        # only applicable for mask_treatment=filter, because this is the only
+        # option that allows a slice of masked values that don't span all rows.
+        msg = 'Image is fully masked within background window determined by `width`.'
+        with pytest.raises(ValueError, match=msg):
+            img = self.mk_img(nrows=12, ncols=12, nan_slices=[np.s_[3:10, :]])
+            Background(img, traces=FlatTrace(img, 6), width=7)
+
+    @pytest.mark.filterwarnings("ignore:background window extends beyond image boundaries")
+    @pytest.mark.parametrize("method,expected",
+                             [("filter", np.array([1., 2., 3., 4., 5., 6., 7.,
+                                                  8., 9., 10., 11., 12.])),
+                              ("omit", np.array([0., 2., 3., 0., 5., 6.,
+                                                 7., 0., 9., 10., 11., 12.])),
+                              ("zero-fill", np.array([0.58333333, 2., 3.,
+                                                      2.33333333, 5., 6., 7.,
+                                                      7.33333333, 9., 10., 11.,
+                                                      12.]))])
+    def test_mask_treatment_bkg_img_spectrum(self, method, expected):
+        """
+        This test function tests `Background.bkg_image` and
+        `Background.bkg_spectrum` when there is masked data. It also tests
+        background subtracting the image, and returning the spectrum of the
+        background subtracted image. This test is parameterized over all
+        currently implemented mask handling methods (filter, omit, and
+        zero-fill) to test that all three work as intended. The window size is
+        set to use the entire image array, so warning about background window
+        is ignored."""
+
+        img_size = 12  # square 12 x 12 image
+
+        # make image, set some value to nan, which will be masked in the function
+        image1 = self.mk_img(nrows=img_size, ncols=img_size,
+                             nan_slices=[np.s_[5:10, 0], np.s_[7:12, 3],
+                                         np.s_[2, 7]])
+
+        # also make an image that doesn't have nonf data values, but has
+        # masked values at the same locations, to make sure they give the same
+        # results
+        mask = ~np.isfinite(image1)
+        dat = self.mk_img(nrows=img_size, ncols=img_size)
+        image2 = NDData(dat, mask=mask)
+
+        for image in [image1, image2]:
+
+            # construct a flat trace in center of image
+            trace = FlatTrace(image, img_size / 2)
+
+            # create 'Background' object with `mask_treatment` set
+            # 'width' should be > size of image to use all pix (but warning will
+            # be raised, which we ignore.)
+            background = Background(image, mask_treatment=method,
+                                    traces=trace, width=img_size + 1)
+
+            # test background image matches 'expected'
+            bk_img = background.bkg_image()
+            # change this and following assertions to assert_quantity_allclose once
+            # issue #213 is fixed
+            np.testing.assert_allclose(bk_img.flux.value,
+                                       np.tile(expected, (img_size, 1)))
+
+            # test background spectrum matches 'expected' times the number of rows
+            # in cross disp axis, since this is a sum and all values in a col are
+            # the same.
+            bk_spec = background.bkg_spectrum()
+            np.testing.assert_allclose(bk_spec.flux.value, expected * img_size)
+
+    def test_sub_bkg_image(self):
+        """
+        Test that masked and nonfinite data is handled correctly when subtracting
+        background from image, for all currently implemented masking
+        options ('filter', 'omit', and 'zero-fill').
+        """
+
+        # make image, set some value to nan, which will be masked in the function
+        image = self.mk_img(nrows=12, ncols=12,
+                            nan_slices=[np.s_[5:10, 0], np.s_[7:12, 3],
+                                        np.s_[2, 7]])
+
+        # Calculate a background value using mask_treatment = 'filter'.
+        # For 'filter', the flag applies to how masked values are handled during
+        # calculation of background for each column, but nonfinite data will
+        # remain in input data array
+        background_filter = Background(image, mask_treatment='filter',
+                                       traces=FlatTrace(image, 6),
+                                       width=2)
+        subtracted_img_filter = background_filter.sub_image()
+
+        assert np.all(np.isfinite(subtracted_img_filter.data) == np.isfinite(image.data))
+
+        # Calculate a background value using mask_treatment = 'omit'. The input
+        # 2d mask is reduced to a 1d mask to mask out full columns in the
+        # presence of any nans - this means that (as tested above in
+        # `test_mask_treatment_bkg_img_spectrum`) those columns will have 0.0
+        # background. In this case, image.mask is expanded to mask full
+        # columns - the image itself will not have full columns set to np.nan,
+        # so there are still valid background subtracted data values in this
+        # case, but the corresponding mask for that entire column will be masked.
+
+        background_omit = Background(image, mask_treatment='omit',
+                                     traces=FlatTrace(image, 6),
+                                     width=2)
+        subtracted_img_omit = background_omit.sub_image()
+
+        assert np.all(np.isfinite(subtracted_img_omit.data) == np.isfinite(image.data))
+
+        # Calculate a background value using mask_treatment = 'zero-fill'. Data
+        # values at masked locations are set to 0 in the image array, and the
+        # background value calculated for that column will be subtracted
+        # resulting in a negative value. The resulting background subtracted
+        # image should be fully finite and the mask should be zero everywhere
+        # (all unmasked)
+
+        background_zero_fill = Background(image, mask_treatment='zero-fill',
+                                          traces=FlatTrace(image, 6),
+                                          width=2)
+        subtracted_img_zero_fill = background_zero_fill.sub_image()
+
+        assert np.all(np.isfinite(subtracted_img_zero_fill.data))
+        assert np.all(subtracted_img_zero_fill.mask == 0)
diff --git a/specreduce/tests/test_extract.py b/specreduce/tests/test_extract.py
index 4465a1b..7200d2d 100644
--- a/specreduce/tests/test_extract.py
+++ b/specreduce/tests/test_extract.py
@@ -2,20 +2,23 @@
 import pytest
 from astropy import units as u
 from astropy.modeling import models
-from astropy.nddata import VarianceUncertainty, UnknownUncertainty
+from astropy.nddata import NDData, VarianceUncertainty, UnknownUncertainty
 from astropy.tests.helper import assert_quantity_allclose
+from specutils import Spectrum1D
 
+from specreduce.background import Background
 from specreduce.extract import (
     BoxcarExtract, HorneExtract, OptimalExtract, _align_along_trace
 )
-from specreduce.tracing import FlatTrace, ArrayTrace
+from specreduce.tracing import FitTrace, FlatTrace, ArrayTrace
 
 
 def add_gaussian_source(image, amps=2, stddevs=2, means=None):
 
     """ Modify `image.data` to add a horizontal spectrum across the image.
         Each column can have a different amplitude, stddev or mean position
-        if these are arrays (otherwise, constant across image)."""
+        if these are arrays (otherwise, constant across image).
+    """
 
     nrows, ncols = image.shape
 
@@ -325,3 +328,65 @@ def test_horne_interpolated_nbins_fails(mk_test_img):
                           spatial_profile={'name': 'interpolated_profile',
                                            'n_bins_interpolated_profile': 100})
         ex.spectrum
+
+
+class TestMasksExtract():
+
+    def mk_flat_gauss_img(self, nrows=200, ncols=160, nan_slices=None, add_noise=True):
+
+        """
+        Makes a flat gaussian image for testing, with optional added gaussian
+        nosie and optional data values set to NaN. Variance is included, which
+        is required by HorneExtract. Returns a Spectrum1D with flux, spectral
+        axis, and uncertainty.
+        """
+
+        sigma_pix = 4
+        col_model = models.Gaussian1D(amplitude=1, mean=nrows/2,
+                                      stddev=sigma_pix)
+        spec2dvar = np.ones((nrows, ncols))
+        noise = 0
+        if add_noise:
+            np.random.seed(7)
+            sigma_noise = 1
+            noise = np.random.normal(scale=sigma_noise, size=(nrows, ncols))
+
+        index_arr = np.tile(np.arange(nrows), (ncols, 1))
+        img = col_model(index_arr.T) + noise
+
+        if nan_slices:  # add nans in data
+            for s in nan_slices:
+                img[s] = np.nan
+
+        wave = np.arange(0, img.shape[1], 1)
+        objectspec = Spectrum1D(spectral_axis=wave*u.m, flux=img*u.Jy,
+                                uncertainty=VarianceUncertainty(spec2dvar*u.Jy*u.Jy))
+
+        return objectspec
+
+    def test_boxcar_fully_masked(self):
+        """
+        Test that the appropriate error is raised by `BoxcarExtract` when image
+        is fully masked/NaN.
+        """
+        return
+
+        img = self.mk_flat_gauss_img()
+        trace = FitTrace(img)
+
+        with pytest.raises(ValueError, match='Image is fully masked.'):
+            # fully NaN image
+            img = np.zeros((4, 5)) * np.nan
+            Background(img, traces=trace, width=2)
+
+        with pytest.raises(ValueError, match='Image is fully masked.'):
+            # fully masked image (should be equivalent)
+            img = NDData(np.ones((4, 5)), mask=np.ones((4, 5)))
+            Background(img, traces=trace, width=2)
+
+        # Now test that an image that isn't fully masked, but is fully masked
+        # within the window determined by `width`, produces the correct result
+        msg = 'Image is fully masked within background window determined by `width`.'
+        with pytest.raises(ValueError, match=msg):
+            img = self.mk_img(nrows=12, ncols=12, nan_slices=[np.s_[3:10, :]])
+            Background(img, traces=FlatTrace(img, 6), width=7)
diff --git a/specreduce/tests/test_tracing.py b/specreduce/tests/test_tracing.py
index 867ef2c..03cc294 100644
--- a/specreduce/tests/test_tracing.py
+++ b/specreduce/tests/test_tracing.py
@@ -1,6 +1,6 @@
 import numpy as np
 import pytest
-from astropy.modeling import models
+from astropy.modeling import fitting, models
 from astropy.nddata import NDData
 import astropy.units as u
 from specreduce.utils.synth_data import make_2d_trace_image
@@ -9,6 +9,34 @@
 IM = make_2d_trace_image()
 
 
+def mk_img(nrows=200, ncols=160, nan_slices=None, add_noise=True):
+
+    """
+    Makes a gaussian image for testing, with optional added gaussian
+    nosie and optional data values set to NaN.
+    """
+
+    # NOTE: Will move this to a fixture at some point.
+
+    sigma_pix = 4
+    col_model = models.Gaussian1D(amplitude=1, mean=nrows/2,
+                                  stddev=sigma_pix)
+    noise = 0
+    if add_noise:
+        np.random.seed(7)
+        sigma_noise = 1
+        noise = np.random.normal(scale=sigma_noise, size=(nrows, ncols))
+
+    index_arr = np.tile(np.arange(nrows), (ncols, 1))
+    img = col_model(index_arr.T) + noise
+
+    if nan_slices:  # add nans in data
+        for s in nan_slices:
+            img[s] = np.nan
+
+    return img * u.DN
+
+
 # test basic trace class
 def test_basic_trace():
     t_pos = IM.shape[0] / 2
@@ -42,6 +70,8 @@ def test_negative_flat_trace_err():
     # negative trace_pos
     with pytest.raises(ValueError, match='must be positive.'):
         FlatTrace(IM, trace_pos=-1)
+    with pytest.raises(ValueError, match='must be positive.'):
+        FlatTrace(IM, trace_pos=0)
 
 
 # test array traces
@@ -70,6 +100,12 @@ def test_array_trace():
     assert np.ma.is_masked(t_short[-1])
     assert t_short.shape[0] == IM.shape[1]
 
+    # make sure nonfinite data in input `trace` is masked
+    arr[0:5] = np.nan
+    t = ArrayTrace(IM, arr)
+    assert np.all(t.trace.mask[0:5])
+    assert np.all(t.trace.mask[5:] == 0)
+
 
 # test fitted traces
 @pytest.mark.filterwarnings("ignore:Model is linear in parameters")
@@ -143,35 +179,103 @@ def test_fit_trace():
         FitTrace(img, bins=ncols + 1)
 
 
+@pytest.mark.filterwarnings("ignore:The fit may be unsuccessful")
+@pytest.mark.filterwarnings("ignore:Model is linear in parameters")
 class TestMasksTracing():
-
-    def mk_img(self, nrows=200, ncols=160):
-
-        np.random.seed(7)
-
-        sigma_pix = 4
-        sigma_noise = 1
-
-        col_model = models.Gaussian1D(amplitude=1, mean=nrows/2, stddev=sigma_pix)
-        noise = np.random.normal(scale=sigma_noise, size=(nrows, ncols))
-
-        index_arr = np.tile(np.arange(nrows), (ncols, 1))
-        img = col_model(index_arr.T) + noise
-
-        return img * u.DN
-
-    def test_window_fit_trace(self):
-
-        """This test function will test that masked values are treated correctly in
-        FitTrace, and produce the correct results and warning messages based on
-        `peak_method`."""
-        img = self.mk_img()
-
-        # create same-shaped variations of image with invalid values
+    """
+    There are three currently implemented options for masking in FitTrace: filter,
+    omit, and zero-fill. Trace, FlatTrace, and ArrayTrace do not have
+    `mask_treatment` options as input because masked/nonfinite values in the data
+    are not relevant for those trace types as they are not affected by masked
+    input data. The tests in this class test masking options for FitTrace, as
+    well as some basic tests (errors, etc) for the other trace types.
+    """
+
+    def test_flat_and_basic_trace_mask(self):
+        """
+        Mask handling is not relevant for basic and flat trace - nans or masked
+        values in the input image will not impact the trace value. The attribute
+        should be initialized though, and be one of the valid options ([None]
+        in this case), for consistancy with all other Specreduce operations.
+        Note that unlike FitTrace, a fully-masked image should NOT result in an
+        error raised because the trace does not depend on the data.
+        """
+
+        img = mk_img(nrows=5, ncols=5)
+
+        basic_trace = Trace(img)
+        assert basic_trace.mask_treatment is None
+
+        flat_trace = FlatTrace(img, trace_pos=2)
+        assert flat_trace.mask_treatment is None
+
+        arr = [1, 2, np.nan, 3, 4]
+        array_trace = ArrayTrace(img, arr)
+        assert array_trace.mask_treatment is None
+
+    def test_array_trace_masking(self):
+        """
+        The `trace` input to ArrayTrace can be a masked array, or an array
+        containing nonfinite data which will be converted to a masked array.
+        Additionally, if any padding needs to be added, the returned trace will
+        be a masked array. Otherwise, it should be a regular array.
+
+        Even though an ArrayTrace may have nans or masked values
+        in the input 1D array for the trace, `mask_treatment_method` refers
+        to how masked values in the input data should be treated. Nans / masked
+        values passed in the array trace should be considered intentional, so
+        also test that `mask_treatment` is initialized to None.
+        """
+        img = mk_img(nrows=10, ncols=10)
+
+        # non-finite data in input trace should be masked out
+        trace_arr = np.array((1, 2, np.nan, 4, 5))
+        array_trace = ArrayTrace(img, trace_arr)
+        assert array_trace.trace.mask[2]
+        assert isinstance(array_trace.trace, np.ma.MaskedArray)
+
+        # and combined with any input masked data
+        trace_arr = np.ma.MaskedArray([1, 2, np.nan, 4, 5], mask=[1, 0, 0, 0, 0])
+        array_trace = ArrayTrace(img, trace_arr)
+        assert array_trace.trace.mask[0]
+        assert array_trace.trace.mask[2]
+        assert isinstance(array_trace.trace, np.ma.MaskedArray)
+
+        # check that mask_treatment is None as there are no valid choices
+        assert array_trace.mask_treatment is None
+
+        # check that if array is fully finite and not masked, that the returned
+        # trace is a notrmal array, not a masked array
+        trace = ArrayTrace(img, np.ones(100))
+        assert isinstance(trace.trace, np.ndarray)
+        assert not isinstance(trace.trace, np.ma.MaskedArray)
+
+        # ensure correct warning is raised when entire trace is masked.
+        trace_arr = np.ma.MaskedArray([1, 2, np.nan, 4, 5], mask=[1, 1, 0, 1, 1])
+        with pytest.raises(UserWarning, match=r'Entire trace array is masked.'):
+            array_trace = ArrayTrace(img, trace_arr)
+
+    def test_fit_trace_fully_masked_image(self):
+        """
+        Test that the correct warning is raised when a fully maksed image is
+        encountered. Also test that when a non-fully masked image is provided,
+        but `window` is set and the image is fully masked within that window,
+        that the correct error is raised.
+        """
+
+        # make simple gaussian image.
+        img = mk_img()
+
+        # create same-shaped variations of image with nans in data array
+        # which will be masked within FitTrace.
         nrows = 200
         ncols = 160
         img_all_nans = np.tile(np.nan, (nrows, ncols))
 
+        # error on trace of all-nan image
+        with pytest.raises(ValueError, match=r'Image is fully masked. Check for invalid values.'):
+            FitTrace(img_all_nans)
+
         window = 10
         guess = int(nrows/2)
         img_win_nans = img.copy()
@@ -181,49 +285,16 @@ def test_window_fit_trace(self):
         with pytest.raises(ValueError, match='pixels in window region are masked'):
             FitTrace(img_win_nans, guess=guess, window=window)
 
-        # error on trace of all-nan image
-        with pytest.raises(ValueError, match=r'image is fully masked'):
-            FitTrace(img_all_nans)
-
-    @pytest.mark.filterwarnings("ignore:The fit may be unsuccessful")
-    @pytest.mark.filterwarnings("ignore:Model is linear in parameters")
-    @pytest.mark.filterwarnings("ignore:All pixels in bins")
-    def test_fit_trace_all_nan_cols(self):
-
-        # make sure that the actual trace that is fit is correct when
-        # all-masked bin peaks are set to NaN
-        img = self.mk_img(nrows=10, ncols=11)
-
-        img[:, 7] = np.nan
-        img[:, 4] = np.nan
-        img[:, 0] = np.nan
-
-        # peak_method = 'max'
-        truth = [1.6346154, 2.2371795, 2.8397436, 3.4423077, 4.0448718,
-                 4.6474359, 5.25, 5.8525641, 6.4551282, 7.0576923,
-                 7.6602564]
-        max_trace = FitTrace(img, peak_method='max')
-        np.testing.assert_allclose(truth, max_trace.trace)
-
-        # peak_method = 'gaussian'
-        truth = [1.947455, 2.383634, 2.8198131, 3.2559921, 3.6921712,
-                 4.1283502, 4.5645293, 5.0007083, 5.4368874, 5.8730665,
-                 6.3092455]
-        max_trace = FitTrace(img, peak_method='gaussian')
-        np.testing.assert_allclose(truth, max_trace.trace)
-
-        # peak_method = 'centroid'
-        truth = [2.5318835, 2.782069, 3.0322546, 3.2824402, 3.5326257,
-                 3.7828113, 4.0329969, 4.2831824, 4.533368, 4.7835536,
-                 5.0337391]
-        max_trace = FitTrace(img, peak_method='centroid')
-        np.testing.assert_allclose(truth, max_trace.trace)
-
-    @pytest.mark.filterwarnings("ignore:The fit may be unsuccessful")
-    @pytest.mark.filterwarnings("ignore:Model is linear in parameters")
-    def test_warn_msg_fit_trace_all_nan_cols(self):
-
-        img = self.mk_img()
+    def test_fit_trace_fully_masked_columns_warn_msg(self):
+        """
+        Test that the correct warning message is raised when fully masked columns
+        (in a not-fully-masked image) are encountered in FitTrace. These columns
+        will be set to NaN and filtered from the final all-bin fit (as tested in
+        test_fit_trace_fully_masked_cols), but a warning message is raised. This
+        should happen for mask_treatment='filter' and 'omit' (for 'zero-fill',
+        all NaN columns become all-zero columns).
+        """
+        img = mk_img()
 
         # test that warning (dependent on choice of `peak_method`) is raised when a
         # few bins are masked, and that theyre listed individually
@@ -252,3 +323,190 @@ def test_warn_msg_fit_trace_all_nan_cols(self):
                           '0, 1, 2, 3, 4, 5, 6, 7, 8, 9, ..., 20 are '
                           'fully masked. Setting bin peaks to NaN.'):
             FitTrace(nddat)
+
+    @pytest.mark.filterwarnings("ignore:All pixels in bins")
+    @pytest.mark.parametrize("mask_treatment", ['filter', 'omit'])
+    def test_fit_trace_fully_masked_cols(self, mask_treatment):
+        """
+        Create a test image with some fully-nan/masked columns, and test that
+        when the final fit to all bin peaks is done for the trace, that these
+        fully-masked columns are set to NaN and filtered during the final all-bin
+        fit. This should happen for mask_treatment = 'filter' and 'omit'
+        (for 'zero-fill', all NaN columns become all-zero columns). Ignore the
+        warning that is produced when this case is encountered (that is tested
+        in `test_fit_trace_fully_masked_cols_warn_msg`.)
+        """
+        img = mk_img(nrows=10, ncols=11)
+
+        # set some columns fully to nan, which will be masked out
+        img[:, 7] = np.nan
+        img[:, 4] = np.nan
+        img[:, 0] = np.nan
+
+        # also create an image that doesn't have nans in the data, but
+        # is masked in the same locations, to make sure that is equivilant.
+
+        # test peak_method = 'max'
+        truth = [1.6346154, 2.2371795, 2.8397436, 3.4423077, 4.0448718,
+                 4.6474359, 5.25, 5.8525641, 6.4551282, 7.0576923,
+                 7.6602564]
+        max_trace = FitTrace(img, peak_method='max',
+                             mask_treatment=mask_treatment)
+        np.testing.assert_allclose(truth, max_trace.trace)
+
+        # peak_method = 'gaussian'
+        truth = [1.947455, 2.383634, 2.8198131, 3.2559921, 3.6921712,
+                 4.1283502, 4.5645293, 5.0007083, 5.4368874, 5.8730665,
+                 6.3092455]
+        max_trace = FitTrace(img, peak_method='gaussian',
+                             mask_treatment=mask_treatment)
+        np.testing.assert_allclose(truth, max_trace.trace)
+
+        # peak_method = 'centroid'
+        truth = [2.5318835, 2.782069, 3.0322546, 3.2824402, 3.5326257,
+                 3.7828113, 4.0329969, 4.2831824, 4.533368, 4.7835536,
+                 5.0337391]
+        max_trace = FitTrace(img, peak_method='centroid', mask_treatment=mask_treatment)
+        np.testing.assert_allclose(truth, max_trace.trace)
+
+    @pytest.mark.filterwarnings("ignore:All pixels in bins")
+    @pytest.mark.parametrize("peak_method,expected",
+                             [("max", [5., 3., 5., 5., 7., 5.,
+                                       np.nan, 5., 5., 5., 2., 5.]),
+                              ("gaussian", [5., 2.10936004, 5., 5., 7.80744334,
+                                            5., np.nan, 5., 5., 5., 1.28216332, 5.]),
+                              ("centroid", [4.27108332, 2.24060342, 4.27108332,
+                                            4.27108332, 6.66827608, 4.27108332,
+                                            np.nan, 4.27108332, 4.27108332,
+                                            4.27108332, 1.19673467, 4.27108332])])
+    def test_mask_treatment_filter(self, peak_method, expected):
+        """
+        Test for mask_treatment=filter for FitTrace.
+        With this masking option, masked and nonfinite data should be filtered
+        when determining bin/column peak. Fully masked bins should be omitted
+        from the final all-bin-peak fit for the Trace. Parametrized over different
+        `peak_method` options.
+        """
+
+        # Make an image with some nonfinite values.
+        image1 = mk_img(nan_slices=[np.s_[4:8, 1:2], np.s_[2:7, 4:5],
+                                    np.s_[:, 6:7], np.s_[3:9, 10:11]],
+                        nrows=10, ncols=12, add_noise=False)
+
+        # Also make an image that doesn't have nonf data values, but has masked
+        # values at the same locations, to make sure they give the same results.
+        mask = ~np.isfinite(image1)
+        dat = mk_img(nrows=10, ncols=12, add_noise=False)
+        image2 = NDData(dat, mask=mask)
+
+        for imgg in [image1, image2]:
+            # run FitTrace, with the testing-only flag _save_bin_peaks_testing set
+            # to True to return the bin peak values before fitting the trace
+            trace = FitTrace(imgg, peak_method=peak_method,
+                             _save_bin_peaks_testing=True)
+            x_bins, y_bins = trace._bin_peaks_testing
+            np.testing.assert_allclose(y_bins, expected)
+
+            # check that final fit to all bins, accouting for fully-masked bins,
+            # matches the trace
+            fitter = fitting.LevMarLSQFitter()
+            mask = np.isfinite(y_bins)
+            all_bin_fit = fitter(trace.trace_model, x_bins[mask], y_bins[mask])
+            all_bin_fit = all_bin_fit((np.arange(12)))
+
+            np.testing.assert_allclose(trace.trace, all_bin_fit)
+
+    @pytest.mark.filterwarnings("ignore:All pixels in bins")
+    @pytest.mark.parametrize("peak_method,expected",
+                             [("max", [5., 3., 5., 5., 7., 5.,
+                                       0., 5., 5., 5., 2., 5.]),
+                              ("gaussian", [5., 1.09382384, 5., 5., 7.81282206,
+                                            5., 0., 5., 5., 5., 1.28216332, 5.]),
+                              ("centroid", [4.27108332, 2.24060342, 4.27108332,
+                                            4.27108332, 6.66827608, 4.27108332,
+                                            9., 4.27108332, 4.27108332,
+                                            4.27108332, 1.19673467, 4.27108332])])
+    def test_mask_treatment_zero_fill(self, peak_method, expected):
+        """
+        Test for mask_treatment=`zero_fill` for FitTrace.
+        Masked and nonfinite data are replaced with zero in the data array,
+        and the input mask is then dropped. Parametrized over different
+        `peak_method` options.
+        """
+
+        # Make an image with some nonfinite values.
+        image1 = mk_img(nan_slices=[np.s_[4:8, 1:2], np.s_[2:7, 4:5],
+                                    np.s_[:, 6:7], np.s_[3:9, 10:11]],
+                        nrows=10, ncols=12, add_noise=False)
+
+        # Also make an image that doesn't have nonf data values, but has masked
+        # values at the same locations, to make sure they give the same results.
+        mask = ~np.isfinite(image1)
+        dat = mk_img(nrows=10, ncols=12, add_noise=False)
+        image2 = NDData(dat, mask=mask)
+
+        for imgg in [image1, image2]:
+            # run FitTrace, with the testing-only flag _save_bin_peaks_testing set
+            # to True to return the bin peak values before fitting the trace
+            trace = FitTrace(imgg, peak_method=peak_method,
+                             mask_treatment='zero-fill',
+                             _save_bin_peaks_testing=True)
+            x_bins, y_bins = trace._bin_peaks_testing
+            np.testing.assert_allclose(y_bins, expected)
+
+            # check that final fit to all bins, accouting for fully-masked bins,
+            # matches the trace
+            fitter = fitting.LevMarLSQFitter()
+            mask = np.isfinite(y_bins)
+            all_bin_fit = fitter(trace.trace_model, x_bins[mask], y_bins[mask])
+            all_bin_fit = all_bin_fit((np.arange(12)))
+
+            np.testing.assert_allclose(trace.trace, all_bin_fit)
+
+    @pytest.mark.filterwarnings("ignore:All pixels in bins")
+    @pytest.mark.parametrize("peak_method,expected",
+                             [("max", [5., np.nan, 5., 5., np.nan, 5.,
+                                       np.nan, 5., 5., 5., np.nan,  5.]),
+                              ("gaussian", [5., np.nan, 5., 5., np.nan, 5.,
+                                            np.nan, 5., 5., 5., np.nan, 5.]),
+                              ("centroid", [4.27108332, np.nan, 4.27108332,
+                                            4.27108332, np.nan, 4.27108332,
+                                            np.nan, 4.27108332, 4.27108332,
+                                            4.27108332, np.nan, 4.27108332])])
+    def test_mask_treatment_omit(self, peak_method, expected):
+        """
+        Test for mask_treatment=`omit` for FitTrace. Columns (assuming
+        disp_axis==1) with any masked data values will be fully masked and
+        therefore not contribute to the bin peaks. Parametrized over different
+        `peak_method` options.
+        """
+
+        # Make an image with some nonfinite values.
+        image1 = mk_img(nan_slices=[np.s_[4:8, 1:2], np.s_[2:7, 4:5],
+                                    np.s_[:, 6:7], np.s_[3:9, 10:11]],
+                        nrows=10, ncols=12, add_noise=False)
+
+        # Also make an image that doesn't have nonfinite data values, but has masked
+        # values at the same locations, to make sure those cases are equivalent
+        mask = ~np.isfinite(image1)
+        dat = mk_img(nrows=10, ncols=12, add_noise=False)
+        image2 = NDData(dat, mask=mask)
+
+        for imgg in [image1, image2]:
+
+            # run FitTrace, with the testing-only flag _save_bin_peaks_testing set
+            # to True to return the bin peak values before fitting the trace
+            trace = FitTrace(imgg, peak_method=peak_method,
+                             mask_treatment='omit',
+                             _save_bin_peaks_testing=True)
+            x_bins, y_bins = trace._bin_peaks_testing
+            np.testing.assert_allclose(y_bins, expected)
+
+            # check that final fit to all bins, accouting for fully-masked bins,
+            # matches the trace
+            fitter = fitting.LevMarLSQFitter()
+            mask = np.isfinite(y_bins)
+            all_bin_fit = fitter(trace.trace_model, x_bins[mask], y_bins[mask])
+            all_bin_fit = all_bin_fit((np.arange(12)))
+
+            np.testing.assert_allclose(trace.trace, all_bin_fit)
diff --git a/specreduce/tracing.py b/specreduce/tracing.py
index 7964480..f124ddb 100644
--- a/specreduce/tracing.py
+++ b/specreduce/tracing.py
@@ -36,6 +36,13 @@ def __post_init__(self):
         self.trace_pos = self.image.shape[0] / 2
         self.trace = np.ones_like(self.image[0]) * self.trace_pos
 
+        # masking options not relevant for basic Trace
+        self._mask_treatment = None
+        self._valid_mask_treatment_methods = [None]
+
+        # eventually move this to common SpecreduceOperation base class
+        self.validate_masking_options()
+
     def __getitem__(self, i):
         return self.trace[i]
 
@@ -43,6 +50,11 @@ def __getitem__(self, i):
     def shape(self):
         return self.trace.shape
 
+    def validate_masking_options(self):
+        if self.mask_treatment not in self.valid_mask_treatment_methods:
+            raise ValueError(
+                f'`mask_treatment` {self.mask_treatment} not one of {self.valid_mask_treatment_methods}')  # noqa
+
     def shift(self, delta):
         """
         Shift the trace by delta pixels perpendicular to the axis being traced
@@ -61,7 +73,7 @@ def _bound_trace(self):
         Mask trace positions that are outside the upper/lower bounds of the image.
         """
         ny = self.image.shape[0]
-        self.trace = np.ma.masked_outside(self.trace, 0, ny-1)
+        self.trace = np.ma.masked_outside(self.trace, 0, ny - 1)
 
     def __add__(self, delta):
         """
@@ -79,6 +91,14 @@ def __sub__(self, delta):
         """
         return self.__add__(-delta)
 
+    @property
+    def mask_treatment(self):
+        return self._mask_treatment
+
+    @property
+    def valid_mask_treatment_methods(self):
+        return self._valid_mask_treatment_methods
+
 
 @dataclass
 class FlatTrace(Trace, _ImageParser):
@@ -101,6 +121,10 @@ def __post_init__(self):
 
         self.set_position(self.trace_pos)
 
+        # masking options not relevant for basic Trace
+        self._mask_treatment = None
+        self._valid_mask_treatment_methods = [None]
+
     def set_position(self, trace_pos):
         """
         Set the trace position within the image
@@ -124,12 +148,33 @@ class ArrayTrace(Trace, _ImageParser):
 
     Parameters
     ----------
-    trace : `numpy.ndarray`
-        Array containing trace positions
+    trace : `numpy.ndarray` or `numpy.ma.MaskedArray`
+        Array containing trace positions.
     """
     trace: np.ndarray
 
     def __post_init__(self):
+
+        # masking options not relevant for ArrayTrace. any non-finite or masked
+        # data in `image` will not affect array trace
+        self._mask_treatment = None
+        self._valid_mask_treatment_methods = [None]
+
+        # masked array will have a .data, regular array will not.
+        trace_data = getattr(self.trace, 'data', self.trace)
+
+        # but we do need to mask uncaught non-finite values in input trace array
+        # which should also be combined with any existing mask in the input `trace`
+        if hasattr(self.trace, 'mask'):
+            total_mask = np.logical_or(self.trace.mask, ~np.isfinite(trace_data))
+        else:
+            total_mask = ~np.isfinite(trace_data)
+
+        # always work with masked array, even if there is no masked
+        # or nonfinite data, in case padding is needed. if not, mask will be
+        # dropped at the end and a regular array will be returned.
+        self.trace = np.ma.MaskedArray(trace_data, total_mask)
+
         self.image = self._parse_image(self.image)
 
         nx = self.image.shape[1]
@@ -143,8 +188,17 @@ def __post_init__(self):
                 # padding will be the last value of the trace, but will be masked out.
                 padding = np.ma.MaskedArray(np.ones(nx - nt) * self.trace[-1], mask=True)
                 self.trace = np.ma.hstack([self.trace, padding])
+
         self._bound_trace()
 
+        # warn if entire trace is masked
+        if np.all(self.trace.mask):
+            warnings.warn("Entire trace array is masked.")
+
+        # and return plain array if nothing is masked
+        if not np.any(self.trace.mask):
+            self.trace = self.trace.data
+
 
 @dataclass
 class FitTrace(Trace, _ImageParser):
@@ -201,6 +255,21 @@ class FitTrace(Trace, _ImageParser):
         ``centroid``: Takes the centroid of the window within in bin.
         ``max``: Saves the position with the maximum flux in each bin.
         [default: ``max``]
+    mask_treatment : string, optional
+        The method for handling masked or non-finite data. Choice of `filter`,
+        `omit`, or `zero-fill`. If `filter` is chosen, masked/non-finite data
+        will be filtered during the fit to each bin/column (along disp. axis) to
+        find the peak. If `omit` is chosen, columns along disp_axis with any
+        masked/non-finite data values will be fully masked (i.e, 2D mask is
+        collapsed to 1D and applied). If `zero-fill` is chosen, masked/non-finite
+        data will be replaced with 0.0 in the input image, and the mask will then
+        be dropped. For all three options, the input mask (optional on input
+        NDData object) will be combined with a mask generated from any non-finite
+        values in the image data. Also note that because binning is an option in
+        FitTrace, that masked data will contribute zero to the sum when binning
+        adjacent columns.
+        [default: ``filter``]
+
     """
     bins: int = None
     guess: float = None
@@ -209,24 +278,29 @@ class FitTrace(Trace, _ImageParser):
     peak_method: str = 'max'
     _crossdisp_axis = 0
     _disp_axis = 1
+    mask_treatment: str = 'filter'
+    _valid_mask_treatment_methods = ('filter', 'omit', 'zero-fill')
+    # for testing purposes only, save bin peaks if requested
+    _save_bin_peaks_testing: bool = False
 
     def __post_init__(self):
 
-        # parse image
+        # Parse image, including masked/nonfinite data handling based on
+        # choice of `mask_treatment`. returns a Spectrum1D
         self.image = self._parse_image(self.image)
 
-        # mask any previously uncaught invalid values
-        or_mask = np.logical_or(self.image.mask, ~np.isfinite(self.image.data))
-        img = np.ma.masked_array(self.image.data, or_mask)
+        # _parse_image returns a Spectrum1D. convert this to a masked array
+        # for ease of calculations here (even if there is no masked data).
+        # Note: uncertainties are dropped, this should also be addressed at
+        # some point probably across the package.
+        img = np.ma.masked_array(self.image.data, self.image.mask)
+        self._mask_temp = self.image.mask
 
-        # validate arguments
+        # validate input arguments
         valid_peak_methods = ('gaussian', 'centroid', 'max')
         if self.peak_method not in valid_peak_methods:
             raise ValueError(f"peak_method must be one of {valid_peak_methods}")
 
-        if img.mask.all():
-            raise ValueError('image is fully masked. Check for invalid values')
-
         if self._crossdisp_axis != 0:
             raise ValueError('cross-dispersion axis must equal 0')
 
@@ -317,7 +391,7 @@ def _fit_trace(self, img):
 
             # binned columns, summed along disp. axis.
             # or just a single, unbinned column if no bins
-            z_i = img[ilum2, x_bins[i]:x_bins[i+1]].sum(axis=self._disp_axis)
+            z_i = img[ilum2, x_bins[i]:x_bins[i + 1]].sum(axis=self._disp_axis)
 
             # if this bin is fully masked, set bin peak to NaN so it can be
             # filtered in the final fit to all bin peaks for the trace
@@ -359,7 +433,7 @@ def _fit_trace(self, img):
                 z_i_cumsum = np.cumsum(z_i)
                 # find the interpolated index where the cumulative array reaches
                 # half the total cumulative values
-                y_bins[i] = np.interp(z_i_cumsum[-1]/2., z_i_cumsum, ilum2)
+                y_bins[i] = np.interp(z_i_cumsum[-1] / 2., z_i_cumsum, ilum2)
 
                 # NOTE this reflects current behavior, should eventually be changed
                 # to set to nan by default (or zero fill / interpoate option once
@@ -388,6 +462,12 @@ def _fit_trace(self, img):
         x_bins = (x_bins[:-1] + x_bins[1:]) / 2
 
         # interpolate the fitted trace over the entire wavelength axis
+
+        # for testing purposes only, save bin peaks if requested
+        if self._save_bin_peaks_testing:
+            self._bin_peaks_testing = (x_bins, y_bins)
+
+        # filter non-finite bin peaks before filtering to all bin peaks
         y_finite = np.where(np.isfinite(y_bins))[0]
         if y_finite.size > 0:
             x_bins = x_bins[y_finite]
@@ -397,6 +477,7 @@ def _fit_trace(self, img):
             fitter = (fitting.SplineSmoothingFitter()
                       if isinstance(self.trace_model, models.Spline1D)
                       else fitting.LevMarLSQFitter())
+            self._y_bins = y_bins
             self.trace_model_fit = fitter(self.trace_model, x_bins, y_bins)
 
             trace_x = np.arange(img.shape[self._disp_axis])