Station¶

riweather.Station ¶

Station(usaf_id, *, load_metadata_on_init=True)

ISD Station object.

Examples:

>>> s = Station("720534")
>>> s
Station("720534")
>>> print(s.name, s.latitude, s.longitude)
ERIE MUNICIPAL AIRPORT 40.017 -105.05

Parameters:

usaf_id (str) –

USAF identifier
load_metadata_on_init (bool, default: True ) –

If True, station metadata will be retrieved from the local data store and loaded into the object as properties.

Source code in src/riweather/stations.py

def __init__(self, usaf_id: str, *, load_metadata_on_init: bool = True) -> None:
    """Initialize a station.

    Args:
        usaf_id: USAF identifier
        load_metadata_on_init: If `True`, station metadata will be retrieved
            from the local data store and loaded into the object as
            properties.
    """
    self.usaf_id = usaf_id

    if load_metadata_on_init:
        self._station = self._load_metadata()
    else:
        self._station = {}

wban_ids `property` ¶

wban_ids

List of valid WBAN (Weather Bureau Army Navy) identifiers.

recent_wban_id `property` ¶

recent_wban_id

Most recent WBAN (Weather Bureau Army Navy) identifier.

name `property` ¶

name

Station name.

Examples:

>>> s = Station("720534")
>>> s.name
'ERIE MUNICIPAL AIRPORT'

icao_code `property` ¶

icao_code

ICAO airport code.

latitude `property` ¶

latitude

Station latitude.

longitude `property` ¶

longitude

Station longitude.

elevation `property` ¶

elevation

Elevation of the station, in meters.

state `property` ¶

state

US state in which the station is located.

Examples:

>>> s = Station("720534")
>>> s.state
'CO'

years `property` ¶

years

Years for which data exists for the station.

get_filenames ¶

get_filenames(year=None)

Construct the names of ISD files corresponding to this station.

Parameters:

year (int | None, default: None ) –

Limit the filenames to the one corresponding to the given year. If None, filenames for all years are returned.

Returns:

list[str] –

List of filenames

Examples:

>>> s = Station("720534")
>>> print(s.get_filenames(2022))
['/pub/data/noaa/2022/720534-00161-2022.gz']

Source code in src/riweather/stations.py

def get_filenames(self, year: int | None = None) -> list[str]:
    """Construct the names of ISD files corresponding to this station.

    Args:
        year: Limit the filenames to the one corresponding to the given year.
            If `None`, filenames for all years are returned.

    Returns:
        List of filenames

    Examples:
        >>> s = Station("720534")
        >>> print(s.get_filenames(2022))
        ['/pub/data/noaa/2022/720534-00161-2022.gz']
    """
    stmt = select(models.FileCount).where(models.FileCount.station_id == self._station.get("id"))
    if year is not None:
        stmt = stmt.where(models.FileCount.year == year)

    filename_template = "/pub/data/noaa/{2}/{0}-{1}-{2}.gz"
    filenames = []
    with MetadataSession() as session:
        for row in session.scalars(stmt):
            filenames.append(  # noqa: PERF401
                filename_template.format(self.usaf_id, row.wban_id, row.year)
            )

    if len(filenames) == 0:
        filenames = [filename_template.format(self.usaf_id, self.recent_wban_id, year)]
        msg = (
            "A record for station {} and year {} was not found in riweather's metadata. "
            "Trying to fetch data directly from the following URL, which may not exist: {}"
        ).format(self._station.get("usaf_id"), year, filenames[0])
        warnings.warn(msg, UserWarning, stacklevel=3)

    return filenames

quality_report ¶

quality_report(year=None)

Retrieve information on data quality.

Parameters:

year (int | None, default: None ) –

Limit the report to information concerning the given year. If None, all years are included.

Returns:

DataFrame | Series –

Data quality report

Source code in src/riweather/stations.py

def quality_report(self, year: int | None = None) -> pd.DataFrame | pd.Series:
    """Retrieve information on data quality.

    Args:
        year: Limit the report to information concerning the given year.
            If `None`, all years are included.

    Returns:
        Data quality report
    """
    stmt = select(models.FileCount).where(models.FileCount.station_id == self._station.get("id"))
    if year is not None:
        stmt = stmt.where(models.FileCount.year == year)

    with MetadataSession() as session:
        results = [
            {
                "usaf_id": r.station.usaf_id,
                "wban_id": r.wban_id,
                "year": r.year,
                "quality": r.quality,
                "jan": r.jan,
                "feb": r.feb,
                "mar": r.mar,
                "apr": r.apr,
                "may": r.may,
                "jun": r.jun,
                "jul": r.jul,
                "aug": r.aug,
                "sep": r.sep,
                "oct": r.oct,
                "nov": r.nov,
                "dec": r.dec,
                "count": r.count,
                "n_zero_months": r.n_zero_months,
            }
            for r in session.scalars(stmt).all()
        ]

    return pd.DataFrame(results).squeeze()

fetch_raw_data ¶

fetch_raw_data(year, *, use_http=False)

Fetch data from ISD.

Parameters:

year (int | list[int]) –

Year or years of data to fetch.
use_http (bool, default: False ) –

Use NOAA’s HTTP server instead of their FTP server. Set this to True if you are running into issues with FTP.

Returns:

list[ISDRecord] –

A list of data records from the ISD database.

Source code in src/riweather/stations.py

def fetch_raw_data(self, year: int | list[int], *, use_http: bool = False) -> list[ISDRecord]:
    """Fetch data from ISD.

    Args:
        year: Year or years of data to fetch.
        use_http: Use NOAA's HTTP server instead of their FTP server. Set
            this to ``True`` if you are running into issues with FTP.

    Returns:
        A list of data records from the ISD database.
    """
    if not isinstance(year, list):
        year = [year]

    filenames = [f for y in year for f in self.get_filenames(y)]
    connector = NOAAHTTPConnection if use_http else NOAAFTPConnection

    with connector() as conn:
        return [
            parser.parse_line(line.decode("utf-8"))
            for filename in filenames
            for line in conn.read_file_as_bytes(filename)
        ]

fetch_data ¶

fetch_data(
    year,
    datum=None,
    *,
    period=None,
    rollup="ending",
    upsample_first=True,
    tz="UTC",
    include_control=False,
    include_quality_codes=True,
    temp_scale=None,
    model_dump_include=None,
    model_dump_exclude=None,
    use_http=False,
)

Fetch data from ISD and return it as a DataFrame.

Parameters:

year (int | list[int]) –

Year or years of data to fetch.
datum (str | list[str] | None, default: None ) –
Data elements to include in the results. Must be one or more of the mandatory data fields:
- 'wind'
- 'ceiling'
- 'visibility'
- 'air_temperature'
- 'dew_point'
- 'sea_level_pressure'
If not specified, all data are returned.
period (str | None, default: None ) –

The time step at which the data will be returned. If None, the default, the data is returned at the original times they were observed. If specified, it must be a frequency string recognized by Pandas such as 'h' for hourly and '15min' for every 15 minutes (see the docs on frequency strings). The data will be resampled to the given frequency.
rollup (str, default: 'ending' ) –

How to align values to the period. Defaults to 'ending', meaning that values over the previous time period are averaged. Can also be 'starting', 'midpoint', or 'instant'. If period=None, this value is ignored.
upsample_first (bool, default: True ) –

Whether to upsample the data to the minute level prior to resampling. Upsampling is recommended because it gives more accurate representations of the weather observations, so it defaults to True.
tz (str, default: 'UTC' ) –

The timestamps of each observation are returned from the ISD in UTC. If this parameter is set, the data will be converted to the specified timezone. The timezone string should match one of the standard TZ identifiers, e.g. 'US/Eastern', 'US/Central', 'US/Mountain', 'US/Pacific', etc.
include_control (bool, default: False ) –

If True, include the control data fields in the results.
include_quality_codes (bool, default: True ) –

If False, filter out all the quality code fields from the results. These are columns that end in the string 'quality_code'.
temp_scale (str | None, default: None ) –

By default, when 'air_temperature' or 'dew_point' are specified as a datum, temperatures are returned in both degrees Celsius and degrees Fahrenheit. To only include one or the other, set temp_scale to 'C' or 'F'. Ignored if no temperature values are meant to be retrieved.
model_dump_include (IncEx | None, default: None ) –

Fine-grained control over the fields that are returned. Passed directly to pydantic.BaseModel.model_dump as the include parameter; see the docs for details. Takes precendence over datum.
model_dump_exclude (IncEx | None, default: None ) –

Fine-grained control over the fields that are returned. Passed directly to pydantic.BaseModel.model_dump as the exclude parameter; see the docs for details. Takes precendence over datum.
use_http (bool, default: False ) –

Use NOAA’s HTTP server instead of their FTP server. Set this to True if you are running into issues with FTP.

Returns:

DataFrame –

Weather observations from the station.

Source code in src/riweather/stations.py

def fetch_data(
    self,
    year: int | list[int],
    datum: str | list[str] | None = None,
    *,
    period: str | None = None,
    rollup: str = "ending",
    upsample_first: bool = True,
    tz: str = "UTC",
    include_control: bool = False,
    include_quality_codes: bool = True,
    temp_scale: str | None = None,
    model_dump_include: IncEx | None = None,
    model_dump_exclude: IncEx | None = None,
    use_http: bool = False,
) -> pd.DataFrame:
    """Fetch data from ISD and return it as a [DataFrame][pandas.DataFrame].

    Args:
        year: Year or years of data to fetch.
        datum: Data elements to include in the results. Must be one or more of the
            [mandatory data fields][riweather.parser.MandatoryData]:

            * ``'wind'``
            * ``'ceiling'``
            * ``'visibility'``
            * ``'air_temperature'``
            * ``'dew_point'``
            * ``'sea_level_pressure'``

            If not specified, all data are returned.
        period: The time step at which the data will be returned. If ``None``, the default, the
            data is returned at the original times they were observed. If specified, it must be
            a frequency string recognized by [Pandas][] such as ``'h'`` for hourly and ``'15min'``
            for every 15 minutes (see the [docs on frequency strings](https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#dateoffset-objects)).
            The data will be [resampled](https://pandas.pydata.org/docs/user_guide/timeseries.html#resampling)
            to the given frequency.
        rollup: How to align values to the ``period``. Defaults to ``'ending'``, meaning that values
            over the previous time period are averaged. Can also be ``'starting'``, ``'midpoint'``,
            or ``'instant'``. If ``period=None``, this value is ignored.
        upsample_first: Whether to upsample the data to the minute level prior to resampling.
            Upsampling is recommended because it gives more accurate representations of the
            weather observations, so it defaults to ``True``.
        tz: The timestamps of each observation are returned from the ISD in
            [UTC](https://en.wikipedia.org/wiki/Coordinated_Universal_Time). If this parameter
            is set, the data will be converted to the specified timezone. The timezone string
            should match one of the
            [standard TZ identifiers](https://en.wikipedia.org/wiki/List_of_tz_database_time_zones),
            e.g. ``'US/Eastern'``, ``'US/Central'``, ``'US/Mountain'``, ``'US/Pacific'``, etc.
        include_control: If ``True``, include the [control data fields][riweather.parser.ControlData]
            in the results.
        include_quality_codes: If ``False``, filter out all the quality code fields from the
            results. These are columns that end in the string ``'quality_code'``.
        temp_scale: By default, when ``'air_temperature'`` or ``'dew_point'`` are specified as
            a datum, temperatures are returned in both degrees Celsius and degrees Fahrenheit.
            To only include one or the other, set ``temp_scale`` to ``'C'`` or ``'F'``. Ignored
            if no temperature values are meant to be retrieved.
        model_dump_include: Fine-grained control over the fields that are returned. Passed
            directly to [``pydantic.BaseModel.model_dump``][] as the `include` parameter; see the
            docs for details. Takes precendence over ``datum``.
        model_dump_exclude: Fine-grained control over the fields that are returned. Passed
            directly to [``pydantic.BaseModel.model_dump``][] as the `exclude` parameter; see the
            docs for details. Takes precendence over ``datum``.
        use_http: Use NOAA's HTTP server instead of their FTP server. Set this to ``True`` if
            you are running into issues with FTP.

    Returns:
        Weather observations from the station.
    """
    if datum is not None:
        if not isinstance(datum, list):
            datum = [datum]

        if not all(d in MandatoryData.model_fields for d in datum):
            msg = f"datum must be a subset of the following: {list(MandatoryData.model_fields)}"
            raise ValueError(msg)

    if rollup not in ("starting", "ending", "midpoint", "instant"):
        msg = "Invalid rollup"
        raise ValueError(msg)

    data = self.fetch_raw_data(year, use_http=use_http)
    timestamps = pd.DatetimeIndex([d.control.dt for d in data])

    if include_control:
        df_control = pd.json_normalize([d.control.model_dump(exclude={"dt"}) for d in data])
        df_control.index = timestamps
    else:
        df_control = pd.DataFrame()

    if model_dump_include is not None or model_dump_exclude is not None:
        data_pydantic_dumps = [
            d.mandatory.model_dump(include=model_dump_include, exclude=model_dump_exclude) for d in data
        ]
    else:
        data_pydantic_dumps = [d.mandatory.model_dump(include=datum) for d in data]

    df_mandatory = pd.json_normalize(data_pydantic_dumps)
    df_mandatory.index = timestamps

    df = pd.concat([df_control, df_mandatory], axis=1)
    if not include_quality_codes:
        df = df.loc[:, df.columns[~df.columns.str.contains("quality_code")]]
    if temp_scale is not None:
        if temp_scale.lower() == "c":
            df = df.loc[:, df.columns[~df.columns.str.contains("temperature_f")]]
        elif temp_scale.lower() == "f":
            df = df.loc[:, df.columns[~df.columns.str.contains("temperature_c")]]

    if period is not None:
        cols_to_keep = [f"{k}.{vv}" for k, v in _AGGREGABLE_FIELDS.items() for vv in v]
        df = df.loc[:, [c for c in df.columns if c in cols_to_keep]]

        if rollup == "starting":
            df = rollup_starting(df, period, upsample_first=upsample_first)
        elif rollup == "ending":
            df = rollup_ending(df, period, upsample_first=upsample_first)
        elif rollup == "midpoint":
            df = rollup_midpoint(df, period, upsample_first=upsample_first)
        elif rollup == "instant":
            df = rollup_instant(df, period, upsample_first=upsample_first)

    if tz != "UTC":
        df = df.tz_convert(tz)
    return df

fetch_raw_temp_data ¶

fetch_raw_temp_data(year=None, scale='C', *, use_http=False)

Retrieve raw weather data from the ISD.

Warning

This has been deprecated and will be removed in a future release. Please consider using riweather.Station.fetch_data instead.

Parameters:

year (int | list[int] | None, default: None ) –

Returned data will be limited to the year(s) specified. If None, data for all years is returned.
scale (str, default: 'C' ) –

Return the temperature in Celsius ("C", the default) or Fahrenheit ("F").
use_http (bool, default: False ) –

Use NOAA’s HTTP server instead of their FTP server. Set this to True if you are running into issues with FTP.

Returns:

DataFrame –

A DataFrame, indexed on the timestamp, with two columns: air temperature and dew point temperature.

Examples:

>>> s = Station("720534")
>>> print(s.fetch_raw_temp_data(2022).head(2))
                           wind_dir  wind_speed  tempC  dewC
2022-01-01 00:15:00+00:00      80.0         4.6   -2.8  -4.0
2022-01-01 00:35:00+00:00      60.0         4.1   -4.2  -5.5

Source code in src/riweather/stations.py

def fetch_raw_temp_data(
    self,
    year: int | list[int] | None = None,
    scale: str = "C",
    *,
    use_http: bool = False,
) -> pd.DataFrame:
    """Retrieve raw weather data from the ISD.

    !!! warning
        This has been deprecated and will be removed in a future release. Please consider using
        [`riweather.Station.fetch_data`][] instead.

    Args:
        year: Returned data will be limited to the year(s) specified. If
            `None`, data for all years is returned.
        scale: Return the temperature in Celsius (`"C"`, the default) or
            Fahrenheit (`"F"`).
        use_http: Use NOAA's HTTP server instead of their FTP server. Set
            this to ``True`` if you are running into issues with FTP.

    Returns:
        A [DataFrame][pandas.DataFrame], indexed on the timestamp, with two columns:
            air temperature and dew point temperature.

    Examples:
        >>> s = Station("720534")
        >>> print(s.fetch_raw_temp_data(2022).head(2))  # doctest: +SKIP
                                   wind_dir  wind_speed  tempC  dewC
        2022-01-01 00:15:00+00:00      80.0         4.6   -2.8  -4.0
        2022-01-01 00:35:00+00:00      60.0         4.1   -4.2  -5.5
    """
    msg = "fetch_raw_temp_data is deprecated. Please use fetch_raw_data() in the future."
    warnings.warn(DeprecationWarning(msg), stacklevel=2)

    data = []
    filenames = self.get_filenames(year)
    connector = NOAAHTTPConnection if use_http else NOAAFTPConnection

    if scale not in ("C", "F"):
        msg = 'Scale must be "C" (Celsius) or "F" (Fahrenheit).'
        raise ValueError(msg)

    with connector() as conn:
        for filename in filenames:
            datastream = conn.read_file_as_bytes(filename)
            for line in datastream.readlines():
                date_str = line[15:27].decode("utf-8")
                dt = pytz.UTC.localize(datetime.strptime(date_str, "%Y%m%d%H%M"))  # noqa: DTZ007
                wind_dir = int(line[60:63]) if line[60:63].decode("utf-8") != "999" else float("nan")
                wind_speed = float(line[65:69]) / 10.0 if line[65:69].decode("utf-8") != "9999" else float("nan")
                tempC = _parse_temp(line[87:92])
                dewC = _parse_temp(line[93:98])
                data.append([dt, wind_dir, wind_speed, tempC, dewC])

    timestamps, wind_dirs, wind_speeds, temps, dews = zip(*sorted(data), strict=True)
    ts = pd.DataFrame(
        {"wind_dir": wind_dirs, "wind_speed": wind_speeds, "tempC": temps, "dewC": dews}, index=timestamps
    )

    if scale == "F":
        ts["tempF"] = ts["tempC"] * 1.8 + 32
        ts["dewF"] = ts["dewC"] * 1.8 + 32
        ts = ts.drop(["tempC", "dewC"], axis="columns")

    return ts.groupby(ts.index).mean()

fetch_temp_data ¶

fetch_temp_data(
    year=None,
    value=None,
    scale="C",
    period="h",
    rollup="ending",
    *,
    upsample_first=True,
    use_http=False,
)

Retrieve temperature data from the ISD.

Warning

This has been deprecated and will be removed in a future release. Please consider using riweather.Station.fetch_data instead.

Parameters:

year (int | list[int] | None, default: None ) –

Returned data will be limited to the year specified. If None, data for all years is returned.
value (str | None, default: None ) –

"temperature" to retrieve the air temperature only, or "dew_point" to retrieve the dew point temperature only. None returns both temperatures in a DataFrame.
scale (str, default: 'C' ) –

Return the value(s) in Celsius ("C", the default) or Fahrenheit ("F").
period (str, default: 'h' ) –

The time step at which the data will be returned. Defaults to "h", which corresponds to hourly data. Other possible values are "30min" for half-hourly data, "15min" for quarter-hourly data, and so on. See the Pandas documentation on frequency strings for more details on possible values.
rollup (str, default: 'ending' ) –

How to align values to the period. Defaults to "ending", meaning that values over the previous time period are averaged.
upsample_first (bool, default: True ) –

Whether to upsample the data to the minute level prior to resampling. Usually results in more accurate representations of the true weather data.
use_http (bool, default: False ) –

Use NOAA’s HTTP server instead of their FTP server. Set this to True if you are running into issues with FTP.

Returns:

DataFrame | Series –

Either a DataFrame containing both air temperature and dew point temperature, or, if value was supplied, a Series containing one or the other.

Examples:

>>> s = Station("720534")
>>> print(s.fetch_temp_data(2022).head(2))
                            wind_dir  wind_speed     tempC      dewC
2022-01-01 01:00:00+00:00  63.913043    4.197826 -4.328261 -5.539674
2022-01-01 02:00:00+00:00  17.583333    3.656250 -6.585833 -7.717917

Source code in src/riweather/stations.py

def fetch_temp_data(
    self,
    year: int | list[int] | None = None,
    value: str | None = None,
    scale: str = "C",
    period: str = "h",
    rollup: str = "ending",
    *,
    upsample_first: bool = True,
    use_http: bool = False,
) -> pd.DataFrame | pd.Series:
    """Retrieve temperature data from the ISD.

    !!! warning
        This has been deprecated and will be removed in a future release. Please consider using
        [`riweather.Station.fetch_data`][] instead.

    Args:
        year: Returned data will be limited to the year specified. If
            `None`, data for all years is returned.
        value: `"temperature"` to retrieve the air temperature only,
            or `"dew_point"` to retrieve the dew point temperature only.
            `None` returns both temperatures in a [DataFrame][pandas.DataFrame].
        scale: Return the value(s) in Celsius (`"C"`, the default) or
            Fahrenheit (`"F"`).
        period: The time step at which the data will be returned. Defaults
            to `"h"`, which corresponds to hourly data. Other possible
            values are `"30min"` for half-hourly data, `"15min"`
            for quarter-hourly data, and so on. See the [Pandas documentation
            on frequency strings](https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#dateoffset-objects)
            for more details on possible values.
        rollup: How to align values to the `period`. Defaults to `"ending"`,
            meaning that values over the previous time period are averaged.
        upsample_first: Whether to upsample the data to the minute level prior to
            resampling. Usually results in more accurate representations of the
            true weather data.
        use_http: Use NOAA's HTTP server instead of their FTP server. Set
            this to ``True`` if you are running into issues with FTP.

    Returns:
        Either a [DataFrame][pandas.DataFrame] containing both air temperature
            and dew point temperature, or, if `value` was supplied, a
            [Series][pandas.Series] containing one or the other.

    Examples:
        >>> s = Station("720534")
        >>> print(s.fetch_temp_data(2022).head(2))  # doctest: +SKIP
                                    wind_dir  wind_speed     tempC      dewC
        2022-01-01 01:00:00+00:00  63.913043    4.197826 -4.328261 -5.539674
        2022-01-01 02:00:00+00:00  17.583333    3.656250 -6.585833 -7.717917
    """
    msg = "fetch_temp_data is deprecated. Please use fetch_data(year, datum='air_temperature') instead."
    warnings.warn(DeprecationWarning(msg), stacklevel=2)

    if value is None:
        value = "both"
    elif value not in ("temperature", "dew_point"):
        msg = 'Value must be "temperature" or "dew_point"'
        raise ValueError(msg)

    if rollup not in ("starting", "ending", "midpoint", "instant"):
        msg = "Invalid rollup"
        raise ValueError(msg)

    with warnings.catch_warnings():
        warnings.simplefilter("ignore")
        raw_ts = self.fetch_raw_temp_data(year, scale=scale, use_http=use_http)
    if rollup == "starting":
        ts = rollup_starting(raw_ts, period, upsample_first=upsample_first)
    elif rollup == "ending":
        ts = rollup_ending(raw_ts, period, upsample_first=upsample_first)
    elif rollup == "midpoint":
        ts = rollup_midpoint(raw_ts, period, upsample_first=upsample_first)
    else:  # rollup == "instant"
        ts = rollup_instant(raw_ts, period, upsample_first=upsample_first)

    if value == "temperature":
        return ts.loc[:, f"temp{scale}"]
    if value == "dew_point":
        return ts.loc[:, f"dew{scale}"]

    return ts

riweather.plot_stations ¶

plot_stations(lat, lon, ranked_stations, *, n=None, distance_unit='m')

Plot stations relative to a location.

Raises:

ImportError –

If matplotlib and folium are not installed.

Parameters:

lat (float) –

Site latitude
lon (float) –

Site longitude
ranked_stations (DataFrame) –

Ranked stations
n (int | None, default: None ) –

The n top-ranked stations of ranked_stations will be plotted
distance_unit (str, default: 'm' ) –

Distance unit to use on the plot. Must be meters (m), kilometers (km), or miles (mi)

Source code in src/riweather/viz.py

def plot_stations(
    lat: float,
    lon: float,
    ranked_stations: pd.DataFrame,
    *,
    n: int | None = None,
    distance_unit: str = "m",
):
    """Plot stations relative to a location.

    Raises:
        ImportError: If [matplotlib][] and
            [folium](https://python-visualization.github.io/folium/latest) are not installed.

    Args:
        lat: Site latitude
        lon: Site longitude
        ranked_stations: Ranked stations
        n: The ``n`` top-ranked stations of ``ranked_stations`` will be plotted
        distance_unit: Distance unit to use on the plot. Must be meters (``m``),
            kilometers (``km``), or miles (``mi``)
    """
    try:
        import matplotlib.pyplot as plt  # noqa
    except ImportError:
        msg = "Plotting stations requires matplotlib"
        raise ImportError(msg) from None

    try:
        import folium
    except ImportError:
        msg = "Plotting stations requires folium"
        raise ImportError(msg) from None

    if n is None:
        n = ranked_stations.shape[0]
    station_info = ranked_stations.head(n)

    m = folium.Map(location=[lat, lon])
    folium.Marker([lat, lon], popup="Site").add_to(m)
    for row in station_info.itertuples():
        folium.Marker(
            [row.latitude, row.longitude],
            popup=row.name,
            icon=folium.Icon(icon="cloud"),
        ).add_to(m)
        folium.PolyLine(
            [[lat, lon], [row.latitude, row.longitude]],
            popup=_calculate_distance_labels(row.distance, distance_unit),
        ).add_to(m)

    return m

riweather.rank_stations ¶

rank_stations(lat=None, lon=None, *, year=None, max_distance_m=None, zipcode=None)

Rank stations by distance to a point.

Parameters:

lat (float | None, default: None ) –

Site latitude
lon (float | None, default: None ) –

Site longitude
year (int | None, default: None ) –

If specified, only include stations with data for the given year(s).
max_distance_m (int | None, default: None ) –

If specified, only include stations within this distance (in meters) from the site.
zipcode (str | None, default: None ) –

Site zip code. If lat and/or lon are not given and zipcode is, then stations will be ranked according to the distance from the center point of the zip code.

Returns:

DataFrame –

A DataFrame of station information.

Source code in src/riweather/stations.py

def rank_stations(
    lat: float | None = None,
    lon: float | None = None,
    *,
    year: int | None = None,
    max_distance_m: int | None = None,
    zipcode: str | None = None,
) -> pd.DataFrame:
    """Rank stations by distance to a point.

    Args:
        lat: Site latitude
        lon: Site longitude
        year: If specified, only include stations with data for the given year(s).
        max_distance_m: If specified, only include stations within this distance
            (in meters) from the site.
        zipcode: Site zip code. If ``lat`` and/or ``lon`` are not given and ``zipcode`` is, then
            stations will be ranked according to the distance from the center point of the zip code.

    Returns:
        A [DataFrame][pandas.DataFrame] of station information.
    """
    if lat is None or lon is None:
        if zipcode is None:
            msg = "Either lat and lon must both be provided, or zipcode must be provided."
            raise ValueError(msg)
        lat, lon = zcta_to_lat_lon(zipcode)

    station_info = {info["usaf_id"]: info for info in _calculate_distances(lat, lon)}

    results = (
        select(
            models.Station.usaf_id,
            models.Station.name,
            models.FileCount.year,
            models.FileCount.quality,
        )
        .join_from(
            models.Station,
            models.FileCount,
        )
        .where(models.Station.usaf_id.in_(station_info.keys()))
    )

    data = {}
    with MetadataSession() as session:
        for row in session.execute(results):
            if row.usaf_id not in data:
                data[row.usaf_id] = {
                    **station_info[row.usaf_id],
                    "years": [],
                    "quality": [],
                }

            data[row.usaf_id]["years"].append(row.year)
            data[row.usaf_id]["quality"].append(row.quality)

    data = pd.DataFrame(sorted(data.values(), key=operator.itemgetter("distance"))).set_index("usaf_id")

    if year is not None:

        def _filter_years(x):
            if isinstance(year, list):
                return all(y in x for y in year)
            return year in x

        data = data.loc[data["years"].apply(_filter_years), :]

    if max_distance_m is not None:
        data = data.loc[data["distance"] <= max_distance_m, :]

    return data

riweather.select_station ¶

select_station(ranked_stations, rank=0)

Return a Station object out of a ranked set of stations.

Parameters:

ranked_stations (DataFrame) –

A DataFrame returned by riweather.rank_stations.
rank (int, default: 0 ) –

Which station to return. Defaults to rank=0, which corresponds to the first (i.e. nearest) station.

Returns:

Station –

A Station object.

Source code in src/riweather/stations.py

def select_station(ranked_stations: pd.DataFrame, rank: int = 0) -> Station:
    """Return a Station object out of a ranked set of stations.

    Args:
        ranked_stations: A [DataFrame][pandas.DataFrame] returned by
            [`riweather.rank_stations`][].
        rank: Which station to return. Defaults to `rank=0`, which corresponds to
            the first (i.e. nearest) station.

    Returns:
        A [`Station`][riweather.Station] object.
    """
    if len(ranked_stations) <= rank:
        msg = "Rank too large, not enough stations"
        raise ValueError(msg)

    ranked_stations = ranked_stations.sort_values("distance")
    station = ranked_stations.iloc[rank]
    return Station(usaf_id=station.name)

riweather.zcta_to_lat_lon ¶

zcta_to_lat_lon(zcta)

Convert zip code to lat/lon.

Parameters:

zcta (str) –

Five-digit zip code

Returns:

(float, float) –

The center point of the ZCTA (Zip Code Tabulation Area).

Source code in src/riweather/stations.py

def zcta_to_lat_lon(zcta: str) -> (float, float):
    """Convert zip code to lat/lon.

    Args:
        zcta: Five-digit zip code

    Returns:
        The center point of the ZCTA (Zip Code Tabulation Area).
    """
    with MetadataSession() as session:
        zcta = session.scalars(select(models.Zcta).where(models.Zcta.zip == zcta)).first()

    return zcta.latitude, zcta.longitude

Station¶

riweather.Station ¶

wban_ids property ¶

recent_wban_id property ¶

name property ¶

icao_code property ¶

latitude property ¶

longitude property ¶

elevation property ¶

state property ¶

years property ¶

get_filenames ¶

quality_report ¶

fetch_raw_data ¶

fetch_data ¶

fetch_raw_temp_data ¶

fetch_temp_data ¶

riweather.plot_stations ¶

riweather.rank_stations ¶

riweather.select_station ¶

riweather.zcta_to_lat_lon ¶

wban_ids `property` ¶

recent_wban_id `property` ¶

name `property` ¶

icao_code `property` ¶

latitude `property` ¶

longitude `property` ¶

elevation `property` ¶

state `property` ¶

years `property` ¶