Source code for pygmt.src.blockm

"""
blockm - Block average (x,y,z) data tables by mean, median, or mode estimation.
"""
import pandas as pd
from pygmt.clib import Session
from pygmt.helpers import (
    GMTTempFile,
    build_arg_string,
    check_data_input_order,
    fmt_docstring,
    kwargs_to_strings,
    use_alias,
)

__doctest_skip__ = ["blockmean", "blockmedian", "blockmode"]


def _blockm(block_method, data, x, y, z, outfile, **kwargs):
    r"""
    Block average (x,y,z) data tables by mean, median, or mode estimation.

    Reads arbitrarily located (x,y,z) triples [or optionally weighted
    quadruples (x,y,z,w)] from a table and writes to the output a mean,
    median, or mode (depending on ``block_method``) position and value for
    every non-empty block in a grid region defined by the ``region`` and
    ``spacing`` parameters.

    Parameters
    ----------
    block_method : str
        Name of the GMT module to call. Must be "blockmean", "blockmedian" or
        "blockmode".

    Returns
    -------
    output : pandas.DataFrame or None
        Return type depends on whether the ``outfile`` parameter is set:

        - :class:`pandas.DataFrame` table with (x, y, z) columns if ``outfile``
          is not set
        - None if ``outfile`` is set (filtered output will be stored in file
          set by ``outfile``)
    """
    with GMTTempFile(suffix=".csv") as tmpfile:
        with Session() as lib:
            # Choose how data will be passed into the module
            table_context = lib.virtualfile_from_data(
                check_kind="vector", data=data, x=x, y=y, z=z, required_z=True
            )
            # Run blockm* on data table
            with table_context as infile:
                if outfile is None:
                    outfile = tmpfile.name
                lib.call_module(
                    module=block_method,
                    args=build_arg_string(kwargs, infile=infile, outfile=outfile),
                )

        # Read temporary csv output to a pandas table
        if outfile == tmpfile.name:  # if user did not set outfile, return pd.DataFrame
            try:
                column_names = data.columns.to_list()
                result = pd.read_csv(tmpfile.name, sep="\t", names=column_names)
            except AttributeError:  # 'str' object has no attribute 'columns'
                result = pd.read_csv(tmpfile.name, sep="\t", header=None, comment=">")
        elif outfile != tmpfile.name:  # return None if outfile set, output in outfile
            result = None

    return result


[docs]@fmt_docstring
@check_data_input_order("v0.5.0", remove_version="v0.7.0")
@use_alias(
    I="spacing",
    R="region",
    S="summary",
    V="verbose",
    a="aspatial",
    b="binary",
    d="nodata",
    e="find",
    f="coltypes",
    h="header",
    i="incols",
    o="outcols",
    r="registration",
    w="wrap",
)
@kwargs_to_strings(I="sequence", R="sequence", i="sequence_comma", o="sequence_comma")
def blockmean(data=None, x=None, y=None, z=None, outfile=None, **kwargs):
    r"""
    Block average (x,y,z) data tables by mean estimation.

    Reads arbitrarily located (x,y,z) triples [or optionally weighted
    quadruples (x,y,z,w)] and writes to the output a mean position and value
    for every non-empty block in a grid region defined by the ``region`` and
    ``spacing`` parameters.

    Takes a matrix, xyz triplets, or a file name as input.

    Must provide either ``data`` or ``x``, ``y``, and ``z``.

    Full option list at :gmt-docs:`blockmean.html`

    {aliases}

    Parameters
    ----------
    data : str or {table-like}
        Pass in (x, y, z) or (longitude, latitude, elevation) values by
        providing a file name to an ASCII data table, a 2D
        {table-classes}.
    x/y/z : 1d arrays
        Arrays of x and y coordinates and values z of the data points.

    {I}

    summary : str
        [**m**\|\ **n**\|\ **s**\|\ **w**].
        Type of summary values calculated by blockmean.

        - **m** - reports mean value [Default]
        - **n** - report the number of input points inside each block
        - **s** - report the sum of all z-values inside a block
        - **w** - report the sum of weights

    {R}

    outfile : str
        The file name for the output ASCII file.

    {V}
    {a}
    {b}
    {d}
    {e}
    {i}
    {f}
    {h}
    {o}
    {r}
    {w}

    Returns
    -------
    output : pandas.DataFrame or None
        Return type depends on whether the ``outfile`` parameter is set:

        - :class:`pandas.DataFrame` table with (x, y, z) columns if ``outfile``
          is not set.
        - None if ``outfile`` is set (filtered output will be stored in file
          set by ``outfile``).

    Example
    -------
    >>> import pygmt
    >>> # Load a table of ship observations of bathymetry off Baja California
    >>> data = pygmt.datasets.load_sample_data(name="bathymetry")
    >>> # Calculate block mean values within 5 by 5 minute bins
    >>> data_bmean = pygmt.blockmean(
    ...     data=data, region=[245, 255, 20, 30], spacing="5m"
    ... )
    """
    return _blockm(
        block_method="blockmean", data=data, x=x, y=y, z=z, outfile=outfile, **kwargs
    )


[docs]@fmt_docstring
@check_data_input_order("v0.5.0", remove_version="v0.7.0")
@use_alias(
    I="spacing",
    R="region",
    V="verbose",
    a="aspatial",
    b="binary",
    d="nodata",
    e="find",
    f="coltypes",
    h="header",
    i="incols",
    o="outcols",
    r="registration",
    w="wrap",
)
@kwargs_to_strings(I="sequence", R="sequence", i="sequence_comma", o="sequence_comma")
def blockmedian(data=None, x=None, y=None, z=None, outfile=None, **kwargs):
    r"""
    Block average (x,y,z) data tables by median estimation.

    Reads arbitrarily located (x,y,z) triples [or optionally weighted
    quadruples (x,y,z,w)] and writes to the output a median position and value
    for every non-empty block in a grid region defined by the ``region`` and
    ``spacing`` parameters.

    Takes a matrix, xyz triplets, or a file name as input.

    Must provide either ``data`` or ``x``, ``y``, and ``z``.

    Full option list at :gmt-docs:`blockmedian.html`

    {aliases}

    Parameters
    ----------
    data : str or {table-like}
        Pass in (x, y, z) or (longitude, latitude, elevation) values by
        providing a file name to an ASCII data table, a 2D
        {table-classes}.
    x/y/z : 1d arrays
        Arrays of x and y coordinates and values z of the data points.

    {I}

    {R}

    outfile : str
        The file name for the output ASCII file.

    {V}
    {a}
    {b}
    {d}
    {e}
    {f}
    {h}
    {i}
    {o}
    {r}
    {w}

    Returns
    -------
    output : pandas.DataFrame or None
        Return type depends on whether the ``outfile`` parameter is set:

        - :class:`pandas.DataFrame` table with (x, y, z) columns if ``outfile``
          is not set.
        - None if ``outfile`` is set (filtered output will be stored in file
          set by ``outfile``).

    Example
    -------
    >>> import pygmt
    >>> # Load a table of ship observations of bathymetry off Baja California
    >>> data = pygmt.datasets.load_sample_data(name="bathymetry")
    >>> # Calculate block median values within 5 by 5 minute bins
    >>> data_bmedian = pygmt.blockmedian(
    ...     data=data, region=[245, 255, 20, 30], spacing="5m"
    ... )
    """
    return _blockm(
        block_method="blockmedian", data=data, x=x, y=y, z=z, outfile=outfile, **kwargs
    )


[docs]@fmt_docstring
@check_data_input_order("v0.5.0", remove_version="v0.7.0")
@use_alias(
    I="spacing",
    R="region",
    V="verbose",
    a="aspatial",
    b="binary",
    d="nodata",
    e="find",
    f="coltypes",
    h="header",
    i="incols",
    o="outcols",
    r="registration",
    w="wrap",
)
@kwargs_to_strings(I="sequence", R="sequence", i="sequence_comma", o="sequence_comma")
def blockmode(data=None, x=None, y=None, z=None, outfile=None, **kwargs):
    r"""
    Block average (x,y,z) data tables by mode estimation.

    Reads arbitrarily located (x,y,z) triples [or optionally weighted
    quadruples (x,y,z,w)] and writes to the output a mode position and value
    for every non-empty block in a grid region defined by the ``region`` and
    ``spacing`` parameters.

    Takes a matrix, xyz triplets, or a file name as input.

    Must provide either ``data`` or ``x``, ``y``, and ``z``.

    Full option list at :gmt-docs:`blockmode.html`

    {aliases}

    Parameters
    ----------
    data : str or {table-like}
        Pass in (x, y, z) or (longitude, latitude, elevation) values by
        providing a file name to an ASCII data table, a 2D
        {table-classes}.
    x/y/z : 1d arrays
        Arrays of x and y coordinates and values z of the data points.

    {I}

    {R}

    outfile : str
        The file name for the output ASCII file.

    {V}
    {a}
    {b}
    {d}
    {e}
    {f}
    {h}
    {i}
    {o}
    {r}
    {w}

    Returns
    -------
    output : pandas.DataFrame or None
        Return type depends on whether the ``outfile`` parameter is set:

        - :class:`pandas.DataFrame` table with (x, y, z) columns if ``outfile``
          is not set.
        - None if ``outfile`` is set (filtered output will be stored in file
          set by ``outfile``).

    Example
    -------
    >>> import pygmt
    >>> # Load a table of ship observations of bathymetry off Baja California
    >>> data = pygmt.datasets.load_sample_data(name="bathymetry")
    >>> # Calculate block mode values within 5 by 5 minute bins
    >>> data_bmode = pygmt.blockmode(
    ...     data=data, region=[245, 255, 20, 30], spacing="5m"
    ... )
    """
    return _blockm(
        block_method="blockmode", data=data, x=x, y=y, z=z, outfile=outfile, **kwargs
    )