API Reference¶

riweather ¶

riweather.

Grab publicly available weather data.

plot_stations ¶

plot_stations(
    lat, lon, ranked_stations, *, n=None, distance_unit="m"
)

Plot stations relative to a location.

Raises:

Type	Description
`ImportError`	If matplotlib and folium are not installed.

Parameters:

Name	Type	Description	Default
`lat`	`float`	Site latitude	required
`lon`	`float`	Site longitude	required
`ranked_stations`	`pd.DataFrame`	Ranked stations	required
`n`	`int`	The `n` top-ranked stations of `ranked_stations` will be plotted	`None`
`distance_unit`	`str`	Distance unit to use on the plot. Must be meters (`m`), kilometers (`km`), or miles (`mi`)	`'m'`

Source code in riweather/viz.py

def plot_stations(
    lat: float,
    lon: float,
    ranked_stations: pd.DataFrame,
    *,
    n: int = None,
    distance_unit: str = "m",
):
    """Plot stations relative to a location.

    Raises:
        ImportError: If [matplotlib][] and
            [folium](https://python-visualization.github.io/folium/) are not installed.

    Args:
        lat: Site latitude
        lon: Site longitude
        ranked_stations: Ranked stations
        n: The ``n`` top-ranked stations of ``ranked_stations`` will be plotted
        distance_unit: Distance unit to use on the plot. Must be meters (``m``),
            kilometers (``km``), or miles (``mi``)
    """
    try:
        import matplotlib.pyplot as plt  # noqa
    except ImportError:
        raise ImportError("Plotting stations requires matplotlib") from None

    try:
        import folium
    except ImportError:
        raise ImportError("Plotting stations requires folium") from None

    if n is None:
        n = ranked_stations.shape[0]
    station_info = ranked_stations.head(n)

    m = folium.Map(location=[lat, lon])
    folium.Marker([lat, lon], popup="Site").add_to(m)
    for row in station_info.itertuples():
        folium.Marker(
            [row.latitude, row.longitude],
            popup=row.name,
            icon=folium.Icon(icon="cloud"),
        ).add_to(m)
        folium.PolyLine(
            [[lat, lon], [row.latitude, row.longitude]],
            popup=_calculate_distance_labels(row.distance, distance_unit),
        ).add_to(m)

    return m

rank_stations ¶

rank_stations(lat, lon, *, year=None, max_distance_m=None)

Rank stations by distance to a point.

Parameters:

Name	Type	Description	Default
`lat`	`float`	Site latitude	required
`lon`	`float`	Site longitude	required
`year`	`int`	If specified, only include stations with data for the given year(s).	`None`
`max_distance_m`	`int`	If specified, only include stations within this distance (in meters) from the site.	`None`

Returns:

Type	Description
`pd.DataFrame`	A DataFrame of station information.

Source code in riweather/stations.py

def rank_stations(
    lat: float, lon: float, *, year: int = None, max_distance_m: int = None
) -> pd.DataFrame:
    """Rank stations by distance to a point.

    Args:
        lat: Site latitude
        lon: Site longitude
        year: If specified, only include stations with data for the given year(s).
        max_distance_m: If specified, only include stations within this distance
            (in meters) from the site.

    Returns:
        A [DataFrame][pandas.DataFrame] of station information.
    """
    station_info = {info["usaf_id"]: info for info in _calculate_distances(lat, lon)}

    results = (
        select(
            models.Station.usaf_id,
            models.Station.name,
            models.FileCount.year,
            models.FileCount.quality,
        )
        .join_from(
            models.Station,
            models.FileCount,
        )
        .where(models.Station.usaf_id.in_(station_info.keys()))
    )

    data = {}
    with MetadataSession() as session:
        for row in session.execute(results):
            if row.usaf_id not in data.keys():
                data[row.usaf_id] = {
                    **station_info[row.usaf_id],
                    "years": [],
                    "quality": [],
                }

            data[row.usaf_id]["years"].append(row.year)
            data[row.usaf_id]["quality"].append(row.quality)

    data = pd.DataFrame(
        sorted(data.values(), key=operator.itemgetter("distance"))
    ).set_index("usaf_id")

    if year is not None:

        def _filter_years(x):
            if isinstance(year, list):
                return all(y in x for y in year)
            else:
                return year in x

        data = data.loc[data["years"].apply(_filter_years), :]

    if max_distance_m is not None:
        data = data.loc[data["distance"] <= max_distance_m, :]

    return data

select_station ¶

select_station(ranked_stations, rank=0)

Return a Station object out of a ranked set of stations.

Parameters:

Name	Type	Description	Default
`ranked_stations`	`pd.DataFrame`	A DataFrame returned by [`riweather.rank_stations`].	required
`rank`	`int`	Which station to return. Defaults to `rank=0`, which corresponds to the first (i.e. nearest) station.	`0`

Returns:

Type	Description
`Station`	A `Station` object.

Source code in riweather/stations.py

def select_station(ranked_stations: pd.DataFrame, rank: int = 0) -> Station:
    """Return a Station object out of a ranked set of stations.

    Args:
        ranked_stations: A [DataFrame][pandas.DataFrame] returned by
            [`riweather.rank_stations`].
        rank: Which station to return. Defaults to `rank=0`, which corresponds to
            the first (i.e. nearest) station.

    Returns:
        A [`Station`][riweather.Station] object.
    """
    if len(ranked_stations) <= rank:
        raise ValueError("Rank too large, not enough stations")

    ranked_stations = ranked_stations.sort_values("distance")
    station = ranked_stations.iloc[rank]
    return Station(usaf_id=station.name)

zcta_to_lat_lon ¶

zcta_to_lat_lon(zcta)

Convert zip code to lat/lon.

Parameters:

Name	Type	Description	Default
`zcta`	`str`	Five-digit zip code	required

Returns:

Type	Description
`float, float`	The center point of the ZCTA (Zip Code Tabulation Area).

Source code in riweather/stations.py

def zcta_to_lat_lon(zcta: str) -> (float, float):
    """Convert zip code to lat/lon.

    Args:
        zcta: Five-digit zip code

    Returns:
        The center point of the ZCTA (Zip Code Tabulation Area).
    """
    with MetadataSession() as session:
        zcta = session.scalars(
            select(models.Zcta).where(models.Zcta.zip == zcta)
        ).first()

    return zcta.latitude, zcta.longitude

Station ¶

Station(usaf_id, load_metadata_on_init=True)

ISD Station object.

Parameters:

Name	Type	Description	Default
`usaf_id`	`str`	USAF identifier	required
`load_metadata_on_init`	`bool`	If `True`, station metadata will be retrieved from the local data store and loaded into the object as properties.	`True`

Examples:

>>> s = Station("720534")
>>> print(s.name, s.latitude, s.longitude)
ERIE MUNICIPAL AIRPORT 40.017 -105.05

Source code in riweather/stations.py

def __init__(self, usaf_id: str, load_metadata_on_init: bool = True):
    """ISD Station object.

    Args:
        usaf_id: USAF identifier
        load_metadata_on_init: If `True`, station metadata will be retrieved
            from the local data store and loaded into the object as
            properties.

    Examples:
        >>> s = Station("720534")
        >>> print(s.name, s.latitude, s.longitude)
        ERIE MUNICIPAL AIRPORT 40.017 -105.05
    """
    self.usaf_id = usaf_id

    if load_metadata_on_init:
        self._station = self._load_metadata()
    else:
        self._station = {}

elevation `property` ¶

elevation: float

Elevation of the station, in meters.

icao_code `property` ¶

icao_code: str

ICAO airport code.

latitude `property` ¶

latitude: float

Station latitude.

longitude `property` ¶

longitude: float

Station longitude.

name `property` ¶

name: str

Station name.

recent_wban_id `property` ¶

recent_wban_id: str

Most recent WBAN (Weather Bureau Army Navy) identifier.

state `property` ¶

state: str

US state in which the station is located.

wban_ids `property` ¶

wban_ids: list[str]

List of valid WBAN (Weather Bureau Army Navy) identifiers.

years `property` ¶

years: list[int]

Years for which data exists for the station.

repr ¶

__repr__()

String representation of a Station.

Source code in riweather/stations.py

def __repr__(self):
    """String representation of a Station."""
    return f'Station("{self.usaf_id}")'

fetch_raw_temp_data ¶

fetch_raw_temp_data(year=None, scale='C')

Retrieve raw weather data from the ISD.

Parameters:

Name	Type	Description	Default
`year`	`int`	Returned data will be limited to the year specified. If `None`, data for all years is returned.	`None`
`scale`	`str`	Return the temperature in Celsius (`"C"`, the default) or Fahrenheit (`"F"`).	`'C'`

Returns:

Type	Description
`pd.DataFrame`	A DataFrame, indexed on the timestamp, with two columns: air temperature and dew point temperature.

Examples:

>>> s = Station("720534")
>>> print(s.fetch_raw_temp_data(2022).head(2))
                           tempC  dewC
2022-01-01 00:15:00+00:00   -2.8  -4.0
2022-01-01 00:35:00+00:00   -4.2  -5.5

Source code in riweather/stations.py

def fetch_raw_temp_data(self, year: int = None, scale: str = "C") -> pd.DataFrame:
    """Retrieve raw weather data from the ISD.

    Args:
        year: Returned data will be limited to the year specified. If
            `None`, data for all years is returned.
        scale: Return the temperature in Celsius (`"C"`, the default) or
            Fahrenheit (`"F"`).

    Returns:
        A [DataFrame][pandas.DataFrame], indexed on the timestamp, with two columns:
            air temperature and dew point temperature.

    Examples:
        >>> s = Station("720534")
        >>> print(s.fetch_raw_temp_data(2022).head(2))
                                   tempC  dewC
        2022-01-01 00:15:00+00:00   -2.8  -4.0
        2022-01-01 00:35:00+00:00   -4.2  -5.5
    """
    data = []
    filenames = self.get_filenames(year)

    if scale not in ("C", "F"):
        raise ValueError('Scale must be "C" (Celsius) or "F" (Fahrenheit).')

    with NOAAFTPConnection() as conn:
        for filename in filenames:
            datastream = conn.read_file_as_bytes(filename)
            for line in datastream.readlines():
                tempC = _parse_temp(line[87:92])
                dewC = _parse_temp(line[93:98])
                date_str = line[15:27].decode("utf-8")
                dt = pytz.UTC.localize(datetime.strptime(date_str, "%Y%m%d%H%M"))
                data.append([dt, tempC, dewC])

    timestamps, temps, dews = zip(*sorted(data), strict=True)
    ts = pd.DataFrame({"tempC": temps, "dewC": dews}, index=timestamps)

    if scale == "F":
        ts["tempF"] = ts["tempC"] * 1.8 + 32
        ts["dewF"] = ts["dewC"] * 1.8 + 32
        ts = ts.drop(["tempC", "dewC"], axis="columns")

    ts = ts.groupby(ts.index).mean()
    return ts

fetch_temp_data ¶

fetch_temp_data(
    year=None,
    value=None,
    scale="C",
    period="H",
    rollup="ending",
    upsample_first=True,
)

Retrieve temperature data from the ISD.

Parameters:

Name	Type	Description	Default
`year`	`int`	Returned data will be limited to the year specified. If `None`, data for all years is returned.	`None`
`value`	`int`	`"temperature"` to retrieve the air temperature only, or `"dew_point"` to retrieve the dew point temperature only. `None` returns both temperatures in a DataFrame.	`None`
`scale`	`str`	Return the value(s) in Celsius (`"C"`, the default) or Fahrenheit (`"F"`).	`'C'`
`period`	`str`	The time step at which the data will be returned. Defaults to `"H"`, which corresponds to hourly data. Other possible values are `"30T"` or `"30min"` for half-hourly data, `"15T"`/`"15min"` for quarter-hourly data, and so on. See the Pandas documentation on frequency strings for more details on possible values.	`'H'`
`rollup`	`str`	How to align values to the `period`. Defaults to `"ending"`, meaning that values over the previous time period are averaged.	`'ending'`
`upsample_first`	`bool`	Whether to upsample the data to the minute level prior to resampling. Usually results in more accurate representations of the true weather data.	`True`

Returns:

Type	Description
`pd.DataFrame \| pd.Series`	Either a DataFrame containing both air temperature and dew point temperature, or, if `value` was supplied, a Series containing one or the other.

Examples:

>>> s = Station("720534")
>>> print(s.fetch_temp_data(2022).head(2))
                              tempC      dewC
2022-01-01 00:00:00+00:00 -4.298889 -5.512222
2022-01-01 01:00:00+00:00 -6.555833 -7.688333

Source code in riweather/stations.py

def fetch_temp_data(
    self,
    year: int = None,
    value: int = None,
    scale: str = "C",
    period: str = "H",
    rollup: str = "ending",
    upsample_first: bool = True,
) -> pd.DataFrame | pd.Series:
    """Retrieve temperature data from the ISD.

    Args:
        year: Returned data will be limited to the year specified. If
            `None`, data for all years is returned.
        value: `"temperature"` to retrieve the air temperature only,
            or `"dew_point"` to retrieve the dew point temperature only.
            `None` returns both temperatures in a [DataFrame][pandas.DataFrame].
        scale: Return the value(s) in Celsius (`"C"`, the default) or
            Fahrenheit (`"F"`).
        period: The time step at which the data will be returned. Defaults
            to `"H"`, which corresponds to hourly data. Other possible
            values are `"30T"` or `"30min"` for half-hourly data, `"15T"`/`"15min"`
            for quarter-hourly data, and so on. See the [Pandas documentation
            on frequency strings](https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#dateoffset-objects)
            for more details on possible values.
        rollup: How to align values to the `period`. Defaults to `"ending"`,
            meaning that values over the previous time period are averaged.
        upsample_first: Whether to upsample the data to the minute level prior to
            resampling. Usually results in more accurate representations of the
            true weather data.

    Returns:
        Either a [DataFrame][pandas.DataFrame] containing both air temperature
            and dew point temperature, or, if `value` was supplied, a
            [Series][pandas.Series] containing one or the other.

    Examples:
        >>> s = Station("720534")
        >>> print(s.fetch_temp_data(2022).head(2))
                                      tempC      dewC
        2022-01-01 00:00:00+00:00 -4.298889 -5.512222
        2022-01-01 01:00:00+00:00 -6.555833 -7.688333
    """  # noqa
    if value is None:
        value = "both"
    elif value not in ("temperature", "dew_point"):
        raise ValueError('Value must be "temperature" or "dew_point"')

    if rollup not in ("starting", "ending", "midpoint", "instant"):
        raise ValueError("Invalid rollup")

    raw_ts = self.fetch_raw_temp_data(year, scale=scale)
    if rollup == "starting":
        ts = rollup_starting(raw_ts, period, upsample_first=upsample_first)
    elif rollup == "ending":
        ts = rollup_ending(raw_ts, period, upsample_first=upsample_first)
    elif rollup == "midpoint":
        ts = rollup_midpoint(raw_ts, period, upsample_first=upsample_first)
    else:  # rollup == "instant"
        ts = rollup_instant(raw_ts, period, upsample_first=upsample_first)

    if value == "temperature":
        return ts.loc[:, f"temp{scale}"]
    if value == "dew_point":
        return ts.loc[:, f"dew{scale}"]
    else:
        return ts

get_filenames ¶

get_filenames(year=None)

Construct the names of ISD files corresponding to this station.

Parameters:

Name	Type	Description	Default
`year`	`int`	Limit the filenames to the one corresponding to the given year. If `None`, filenames for all years are returned.	`None`

Returns:

Type	Description
`list[str]`	List of filenames

Examples:

>>> s = Station("720534")
>>> print(s.get_filenames(2022))
['/pub/data/noaa/2022/720534-00161-2022.gz']

Source code in riweather/stations.py

def get_filenames(self, year: int = None) -> list[str]:
    """Construct the names of ISD files corresponding to this station.

    Args:
        year: Limit the filenames to the one corresponding to the given year.
            If `None`, filenames for all years are returned.

    Returns:
        List of filenames

    Examples:
        >>> s = Station("720534")
        >>> print(s.get_filenames(2022))
        ['/pub/data/noaa/2022/720534-00161-2022.gz']
    """
    stmt = select(models.FileCount).where(
        models.FileCount.station_id == self._station.get("id")
    )
    if year is not None:
        stmt = stmt.where(models.FileCount.year == year)

    filename_template = "/pub/data/noaa/{2}/{0}-{1}-{2}.gz"
    filenames = []
    with MetadataSession() as session:
        for row in session.scalars(stmt):
            filenames.append(
                filename_template.format(self.usaf_id, row.wban_id, row.year)
            )

    return filenames

quality_report ¶

quality_report(year=None)

Retrieve information on data quality.

Parameters:

Name	Type	Description	Default
`year`	`int`	Limit the report to information concerning the given year. If `None`, all years are included.	`None`

Returns:

Type	Description
`pd.DataFrame \| pd.Series`	Data quality report

Source code in riweather/stations.py

def quality_report(self, year: int = None) -> pd.DataFrame | pd.Series:
    """Retrieve information on data quality.

    Args:
        year: Limit the report to information concerning the given year.
            If `None`, all years are included.

    Returns:
        Data quality report
    """
    stmt = select(models.FileCount).where(
        models.FileCount.station_id == self._station.get("id")
    )
    if year is not None:
        stmt = stmt.where(models.FileCount.year == year)

    with MetadataSession() as session:
        results = [
            {
                "usaf_id": r.station.usaf_id,
                "wban_id": r.wban_id,
                "year": r.year,
                "quality": r.quality,
                "jan": r.jan,
                "feb": r.feb,
                "mar": r.mar,
                "apr": r.apr,
                "may": r.may,
                "jun": r.jun,
                "jul": r.jul,
                "aug": r.aug,
                "sep": r.sep,
                "oct": r.oct,
                "nov": r.nov,
                "dec": r.dec,
                "count": r.count,
                "n_zero_months": r.n_zero_months,
            }
            for r in session.scalars(stmt).all()
        ]

    return pd.DataFrame(results).squeeze()

API Reference¶

riweather ¶

plot_stations ¶

rank_stations ¶

select_station ¶

zcta_to_lat_lon ¶

Station ¶

elevation property ¶

icao_code property ¶

latitude property ¶

longitude property ¶

name property ¶

recent_wban_id property ¶

state property ¶

wban_ids property ¶

years property ¶

__repr__ ¶

fetch_raw_temp_data ¶

fetch_temp_data ¶

get_filenames ¶

quality_report ¶

elevation `property` ¶

icao_code `property` ¶

latitude `property` ¶

longitude `property` ¶

name `property` ¶

recent_wban_id `property` ¶

state `property` ¶

wban_ids `property` ¶

years `property` ¶

repr ¶