Skip to content

API Reference

riweather

riweather.

Grab publicly available weather data.

plot_stations

plot_stations(
    lat, lon, ranked_stations, *, n=None, distance_unit="m"
)

Plot stations relative to a location.

Raises:

Type Description
ImportError

If matplotlib and folium are not installed.

Parameters:

Name Type Description Default
lat float

Site latitude

required
lon float

Site longitude

required
ranked_stations pd.DataFrame

Ranked stations

required
n int

The n top-ranked stations of ranked_stations will be plotted

None
distance_unit str

Distance unit to use on the plot. Must be meters (m), kilometers (km), or miles (mi)

'm'
Source code in riweather/viz.py
def plot_stations(
    lat: float,
    lon: float,
    ranked_stations: pd.DataFrame,
    *,
    n: int = None,
    distance_unit: str = "m",
):
    """Plot stations relative to a location.

    Raises:
        ImportError: If [matplotlib][] and
            [folium](https://python-visualization.github.io/folium/) are not installed.

    Args:
        lat: Site latitude
        lon: Site longitude
        ranked_stations: Ranked stations
        n: The ``n`` top-ranked stations of ``ranked_stations`` will be plotted
        distance_unit: Distance unit to use on the plot. Must be meters (``m``),
            kilometers (``km``), or miles (``mi``)
    """
    try:
        import matplotlib.pyplot as plt  # noqa
    except ImportError:
        raise ImportError("Plotting stations requires matplotlib") from None

    try:
        import folium
    except ImportError:
        raise ImportError("Plotting stations requires folium") from None

    if n is None:
        n = ranked_stations.shape[0]
    station_info = ranked_stations.head(n)

    m = folium.Map(location=[lat, lon])
    folium.Marker([lat, lon], popup="Site").add_to(m)
    for row in station_info.itertuples():
        folium.Marker(
            [row.latitude, row.longitude],
            popup=row.name,
            icon=folium.Icon(icon="cloud"),
        ).add_to(m)
        folium.PolyLine(
            [[lat, lon], [row.latitude, row.longitude]],
            popup=_calculate_distance_labels(row.distance, distance_unit),
        ).add_to(m)

    return m

rank_stations

rank_stations(lat, lon, *, year=None, max_distance_m=None)

Rank stations by distance to a point.

Parameters:

Name Type Description Default
lat float

Site latitude

required
lon float

Site longitude

required
year int

If specified, only include stations with data for the given year(s).

None
max_distance_m int

If specified, only include stations within this distance (in meters) from the site.

None

Returns:

Type Description
pd.DataFrame

A DataFrame of station information.

Source code in riweather/stations.py
def rank_stations(
    lat: float, lon: float, *, year: int = None, max_distance_m: int = None
) -> pd.DataFrame:
    """Rank stations by distance to a point.

    Args:
        lat: Site latitude
        lon: Site longitude
        year: If specified, only include stations with data for the given year(s).
        max_distance_m: If specified, only include stations within this distance
            (in meters) from the site.

    Returns:
        A [DataFrame][pandas.DataFrame] of station information.
    """
    station_info = {info["usaf_id"]: info for info in _calculate_distances(lat, lon)}

    results = (
        select(
            models.Station.usaf_id,
            models.Station.name,
            models.FileCount.year,
            models.FileCount.quality,
        )
        .join_from(
            models.Station,
            models.FileCount,
        )
        .where(models.Station.usaf_id.in_(station_info.keys()))
    )

    data = {}
    with MetadataSession() as session:
        for row in session.execute(results):
            if row.usaf_id not in data.keys():
                data[row.usaf_id] = {
                    **station_info[row.usaf_id],
                    "years": [],
                    "quality": [],
                }

            data[row.usaf_id]["years"].append(row.year)
            data[row.usaf_id]["quality"].append(row.quality)

    data = pd.DataFrame(
        sorted(data.values(), key=operator.itemgetter("distance"))
    ).set_index("usaf_id")

    if year is not None:

        def _filter_years(x):
            if isinstance(year, list):
                return all(y in x for y in year)
            else:
                return year in x

        data = data.loc[data["years"].apply(_filter_years), :]

    if max_distance_m is not None:
        data = data.loc[data["distance"] <= max_distance_m, :]

    return data

select_station

select_station(ranked_stations, rank=0)

Return a Station object out of a ranked set of stations.

Parameters:

Name Type Description Default
ranked_stations pd.DataFrame

A DataFrame returned by [riweather.rank_stations].

required
rank int

Which station to return. Defaults to rank=0, which corresponds to the first (i.e. nearest) station.

0

Returns:

Type Description
Station

A Station object.

Source code in riweather/stations.py
def select_station(ranked_stations: pd.DataFrame, rank: int = 0) -> Station:
    """Return a Station object out of a ranked set of stations.

    Args:
        ranked_stations: A [DataFrame][pandas.DataFrame] returned by
            [`riweather.rank_stations`].
        rank: Which station to return. Defaults to `rank=0`, which corresponds to
            the first (i.e. nearest) station.

    Returns:
        A [`Station`][riweather.Station] object.
    """
    if len(ranked_stations) <= rank:
        raise ValueError("Rank too large, not enough stations")

    ranked_stations = ranked_stations.sort_values("distance")
    station = ranked_stations.iloc[rank]
    return Station(usaf_id=station.name)

zcta_to_lat_lon

zcta_to_lat_lon(zcta)

Convert zip code to lat/lon.

Parameters:

Name Type Description Default
zcta str

Five-digit zip code

required

Returns:

Type Description
float, float

The center point of the ZCTA (Zip Code Tabulation Area).

Source code in riweather/stations.py
def zcta_to_lat_lon(zcta: str) -> (float, float):
    """Convert zip code to lat/lon.

    Args:
        zcta: Five-digit zip code

    Returns:
        The center point of the ZCTA (Zip Code Tabulation Area).
    """
    with MetadataSession() as session:
        zcta = session.scalars(
            select(models.Zcta).where(models.Zcta.zip == zcta)
        ).first()

    return zcta.latitude, zcta.longitude

Station

Station(usaf_id, load_metadata_on_init=True)

ISD Station object.

Parameters:

Name Type Description Default
usaf_id str

USAF identifier

required
load_metadata_on_init bool

If True, station metadata will be retrieved from the local data store and loaded into the object as properties.

True

Examples:

>>> s = Station("720534")
>>> print(s.name, s.latitude, s.longitude)
ERIE MUNICIPAL AIRPORT 40.017 -105.05
Source code in riweather/stations.py
def __init__(self, usaf_id: str, load_metadata_on_init: bool = True):
    """ISD Station object.

    Args:
        usaf_id: USAF identifier
        load_metadata_on_init: If `True`, station metadata will be retrieved
            from the local data store and loaded into the object as
            properties.

    Examples:
        >>> s = Station("720534")
        >>> print(s.name, s.latitude, s.longitude)
        ERIE MUNICIPAL AIRPORT 40.017 -105.05
    """
    self.usaf_id = usaf_id

    if load_metadata_on_init:
        self._station = self._load_metadata()
    else:
        self._station = {}

elevation property

elevation: float

Elevation of the station, in meters.

icao_code property

icao_code: str

ICAO airport code.

latitude property

latitude: float

Station latitude.

longitude property

longitude: float

Station longitude.

name property

name: str

Station name.

recent_wban_id property

recent_wban_id: str

Most recent WBAN (Weather Bureau Army Navy) identifier.

state property

state: str

US state in which the station is located.

wban_ids property

wban_ids: list[str]

List of valid WBAN (Weather Bureau Army Navy) identifiers.

years property

years: list[int]

Years for which data exists for the station.

__repr__

__repr__()

String representation of a Station.

Source code in riweather/stations.py
def __repr__(self):
    """String representation of a Station."""
    return f'Station("{self.usaf_id}")'

fetch_raw_temp_data

fetch_raw_temp_data(year=None, scale='C')

Retrieve raw weather data from the ISD.

Parameters:

Name Type Description Default
year int

Returned data will be limited to the year specified. If None, data for all years is returned.

None
scale str

Return the temperature in Celsius ("C", the default) or Fahrenheit ("F").

'C'

Returns:

Type Description
pd.DataFrame

A DataFrame, indexed on the timestamp, with two columns: air temperature and dew point temperature.

Examples:

>>> s = Station("720534")
>>> print(s.fetch_raw_temp_data(2022).head(2))
                           tempC  dewC
2022-01-01 00:15:00+00:00   -2.8  -4.0
2022-01-01 00:35:00+00:00   -4.2  -5.5
Source code in riweather/stations.py
def fetch_raw_temp_data(self, year: int = None, scale: str = "C") -> pd.DataFrame:
    """Retrieve raw weather data from the ISD.

    Args:
        year: Returned data will be limited to the year specified. If
            `None`, data for all years is returned.
        scale: Return the temperature in Celsius (`"C"`, the default) or
            Fahrenheit (`"F"`).

    Returns:
        A [DataFrame][pandas.DataFrame], indexed on the timestamp, with two columns:
            air temperature and dew point temperature.

    Examples:
        >>> s = Station("720534")
        >>> print(s.fetch_raw_temp_data(2022).head(2))
                                   tempC  dewC
        2022-01-01 00:15:00+00:00   -2.8  -4.0
        2022-01-01 00:35:00+00:00   -4.2  -5.5
    """
    data = []
    filenames = self.get_filenames(year)

    if scale not in ("C", "F"):
        raise ValueError('Scale must be "C" (Celsius) or "F" (Fahrenheit).')

    with NOAAFTPConnection() as conn:
        for filename in filenames:
            datastream = conn.read_file_as_bytes(filename)
            for line in datastream.readlines():
                tempC = _parse_temp(line[87:92])
                dewC = _parse_temp(line[93:98])
                date_str = line[15:27].decode("utf-8")
                dt = pytz.UTC.localize(datetime.strptime(date_str, "%Y%m%d%H%M"))
                data.append([dt, tempC, dewC])

    timestamps, temps, dews = zip(*sorted(data), strict=True)
    ts = pd.DataFrame({"tempC": temps, "dewC": dews}, index=timestamps)

    if scale == "F":
        ts["tempF"] = ts["tempC"] * 1.8 + 32
        ts["dewF"] = ts["dewC"] * 1.8 + 32
        ts = ts.drop(["tempC", "dewC"], axis="columns")

    ts = ts.groupby(ts.index).mean()
    return ts

fetch_temp_data

fetch_temp_data(
    year=None,
    value=None,
    scale="C",
    period="H",
    rollup="ending",
    upsample_first=True,
)

Retrieve temperature data from the ISD.

Parameters:

Name Type Description Default
year int

Returned data will be limited to the year specified. If None, data for all years is returned.

None
value int

"temperature" to retrieve the air temperature only, or "dew_point" to retrieve the dew point temperature only. None returns both temperatures in a DataFrame.

None
scale str

Return the value(s) in Celsius ("C", the default) or Fahrenheit ("F").

'C'
period str

The time step at which the data will be returned. Defaults to "H", which corresponds to hourly data. Other possible values are "30T" or "30min" for half-hourly data, "15T"/"15min" for quarter-hourly data, and so on. See the Pandas documentation on frequency strings for more details on possible values.

'H'
rollup str

How to align values to the period. Defaults to "ending", meaning that values over the previous time period are averaged.

'ending'
upsample_first bool

Whether to upsample the data to the minute level prior to resampling. Usually results in more accurate representations of the true weather data.

True

Returns:

Type Description
pd.DataFrame | pd.Series

Either a DataFrame containing both air temperature and dew point temperature, or, if value was supplied, a Series containing one or the other.

Examples:

>>> s = Station("720534")
>>> print(s.fetch_temp_data(2022).head(2))
                              tempC      dewC
2022-01-01 00:00:00+00:00 -4.298889 -5.512222
2022-01-01 01:00:00+00:00 -6.555833 -7.688333
Source code in riweather/stations.py
def fetch_temp_data(
    self,
    year: int = None,
    value: int = None,
    scale: str = "C",
    period: str = "H",
    rollup: str = "ending",
    upsample_first: bool = True,
) -> pd.DataFrame | pd.Series:
    """Retrieve temperature data from the ISD.

    Args:
        year: Returned data will be limited to the year specified. If
            `None`, data for all years is returned.
        value: `"temperature"` to retrieve the air temperature only,
            or `"dew_point"` to retrieve the dew point temperature only.
            `None` returns both temperatures in a [DataFrame][pandas.DataFrame].
        scale: Return the value(s) in Celsius (`"C"`, the default) or
            Fahrenheit (`"F"`).
        period: The time step at which the data will be returned. Defaults
            to `"H"`, which corresponds to hourly data. Other possible
            values are `"30T"` or `"30min"` for half-hourly data, `"15T"`/`"15min"`
            for quarter-hourly data, and so on. See the [Pandas documentation
            on frequency strings](https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#dateoffset-objects)
            for more details on possible values.
        rollup: How to align values to the `period`. Defaults to `"ending"`,
            meaning that values over the previous time period are averaged.
        upsample_first: Whether to upsample the data to the minute level prior to
            resampling. Usually results in more accurate representations of the
            true weather data.

    Returns:
        Either a [DataFrame][pandas.DataFrame] containing both air temperature
            and dew point temperature, or, if `value` was supplied, a
            [Series][pandas.Series] containing one or the other.

    Examples:
        >>> s = Station("720534")
        >>> print(s.fetch_temp_data(2022).head(2))
                                      tempC      dewC
        2022-01-01 00:00:00+00:00 -4.298889 -5.512222
        2022-01-01 01:00:00+00:00 -6.555833 -7.688333
    """  # noqa
    if value is None:
        value = "both"
    elif value not in ("temperature", "dew_point"):
        raise ValueError('Value must be "temperature" or "dew_point"')

    if rollup not in ("starting", "ending", "midpoint", "instant"):
        raise ValueError("Invalid rollup")

    raw_ts = self.fetch_raw_temp_data(year, scale=scale)
    if rollup == "starting":
        ts = rollup_starting(raw_ts, period, upsample_first=upsample_first)
    elif rollup == "ending":
        ts = rollup_ending(raw_ts, period, upsample_first=upsample_first)
    elif rollup == "midpoint":
        ts = rollup_midpoint(raw_ts, period, upsample_first=upsample_first)
    else:  # rollup == "instant"
        ts = rollup_instant(raw_ts, period, upsample_first=upsample_first)

    if value == "temperature":
        return ts.loc[:, f"temp{scale}"]
    if value == "dew_point":
        return ts.loc[:, f"dew{scale}"]
    else:
        return ts

get_filenames

get_filenames(year=None)

Construct the names of ISD files corresponding to this station.

Parameters:

Name Type Description Default
year int

Limit the filenames to the one corresponding to the given year. If None, filenames for all years are returned.

None

Returns:

Type Description
list[str]

List of filenames

Examples:

>>> s = Station("720534")
>>> print(s.get_filenames(2022))
['/pub/data/noaa/2022/720534-00161-2022.gz']
Source code in riweather/stations.py
def get_filenames(self, year: int = None) -> list[str]:
    """Construct the names of ISD files corresponding to this station.

    Args:
        year: Limit the filenames to the one corresponding to the given year.
            If `None`, filenames for all years are returned.

    Returns:
        List of filenames

    Examples:
        >>> s = Station("720534")
        >>> print(s.get_filenames(2022))
        ['/pub/data/noaa/2022/720534-00161-2022.gz']
    """
    stmt = select(models.FileCount).where(
        models.FileCount.station_id == self._station.get("id")
    )
    if year is not None:
        stmt = stmt.where(models.FileCount.year == year)

    filename_template = "/pub/data/noaa/{2}/{0}-{1}-{2}.gz"
    filenames = []
    with MetadataSession() as session:
        for row in session.scalars(stmt):
            filenames.append(
                filename_template.format(self.usaf_id, row.wban_id, row.year)
            )

    return filenames

quality_report

quality_report(year=None)

Retrieve information on data quality.

Parameters:

Name Type Description Default
year int

Limit the report to information concerning the given year. If None, all years are included.

None

Returns:

Type Description
pd.DataFrame | pd.Series

Data quality report

Source code in riweather/stations.py
def quality_report(self, year: int = None) -> pd.DataFrame | pd.Series:
    """Retrieve information on data quality.

    Args:
        year: Limit the report to information concerning the given year.
            If `None`, all years are included.

    Returns:
        Data quality report
    """
    stmt = select(models.FileCount).where(
        models.FileCount.station_id == self._station.get("id")
    )
    if year is not None:
        stmt = stmt.where(models.FileCount.year == year)

    with MetadataSession() as session:
        results = [
            {
                "usaf_id": r.station.usaf_id,
                "wban_id": r.wban_id,
                "year": r.year,
                "quality": r.quality,
                "jan": r.jan,
                "feb": r.feb,
                "mar": r.mar,
                "apr": r.apr,
                "may": r.may,
                "jun": r.jun,
                "jul": r.jul,
                "aug": r.aug,
                "sep": r.sep,
                "oct": r.oct,
                "nov": r.nov,
                "dec": r.dec,
                "count": r.count,
                "n_zero_months": r.n_zero_months,
            }
            for r in session.scalars(stmt).all()
        ]

    return pd.DataFrame(results).squeeze()