Source code for farmnet.data.datasets.distance

# -*- coding: utf-8 -*-

"""
Various functions to get distances and azimuth angles between two points etc.

:Usage:

Get distance matrix for all turbines and met masts if a dict with location name and tuple(lat, long) is used

>>> locations = {
...    "Metmast": (-4.19, -38.08),  
...    "WTG1": (-4.18523, -38.08384),
...    "WTG2": (-4.182794, -38.085799),
...    "WTG6": (-4.185351, -38.087376),
...    "WTG11": (-4.18908, -38.08262),
...    "WTG12": (-4.189967, -38.082009),
...    "WTG13": (-4.190953, -38.081391),
...    "WTG14": (-4.191046, -38.079538),
...    "WTG15": (-4.19211, -38.07853),
...    "WTG17": (-4.19395, -38.07708),
...    "WTG20": (-4.19679, -38.07462),
...    "WTG26": (-4.20256, -38.07288),
...    "WTG30": (-4.20724, -38.06941),
...    "WTG25": (-4.20965, -38.06663),
... }
>>> dist_matrix = distance_matrix_from_dict(locations)

"""

import itertools

import haversine
import numpy as np
import pandas as pd
import pyproj


[docs] def distance_matrix_from_df( df: pd.DataFrame, lat_col: str, long_col: str, name_col: str | None = None ) -> pd.DataFrame: """ Compute a distance matrix between points in a DataFrame using the Haversine formula. The function takes a DataFrame with latitude and longitude columns and returns a square distance matrix as a pandas DataFrame. Distances are in kilometers. :param df: Input DataFrame containing geographic coordinates. :type df: pd.DataFrame :param lat_col: Name of the column containing latitude values. :type lat_col: str :param long_col: Name of the column containing longitude values. :type long_col: str :param name_col: Optional name of the column to use for row/column labels in the output matrix. If None, a numeric index is used. :type name_col: str or None :return: A square DataFrame representing the pairwise distance matrix (in kilometers). :rtype: pd.DataFrame .. rubric:: Example .. code-block:: python >>> import pandas as pd >>> df = pd.DataFrame({ ... 'lat': [47.226624, 47.38454096, 47.499950], ... 'lon': [8.818437, 8.529927493, 8.737565], ... 'name': ['Rappi', 'Zuerich', 'Winterthur'] ... }) >>> distance_matrix_from_df(df, 'lat', 'lon', 'name').round(0) name Rappi Zuerich Winterthur name Rappi 0.0 28.0 31.0 Zuerich 28.0 0.0 20.0 Winterthur 31.0 20.0 0.0 .. warning:: Drop name?? """ lat_long = tuple(zip(df[lat_col], df[long_col])) dim = df.shape[0] distance_matrix = np.array( [ haversine.haversine(i, j) for i, j in itertools.product(lat_long, repeat=2) ] ).reshape(dim, dim) columns = df[name_col] if name_col else pd.RangeIndex(0, dim) return pd.DataFrame(data=distance_matrix, index=columns, columns=columns)
[docs] def distance_matrix_from_dict( locations: dict[str, tuple[float, float]], ) -> pd.DataFrame: """ Compute a distance matrix between named geographic locations using the Haversine formula. The function takes a dictionary of named locations with (latitude, longitude) coordinates and returns a square DataFrame representing pairwise distances in kilometers. :param locations: Dictionary where keys are location names and values are (latitude, longitude) tuples. :type locations: dict[str, tuple[float, float]] :return: A square DataFrame with pairwise distances (in kilometers) between locations. :rtype: pd.DataFrame :Example: .. code-block:: python >>> locations = { ... "Rappi": (47.226624, 8.818437), ... "Zuerich": (47.38454096, 8.529927493), ... "Winterthur": (47.499950, 8.737565) ... } >>> distance_matrix_from_dict(locations).round(0) Rappi Zuerich Winterthur Rappi 0.0 28.0 31.0 Zuerich 28.0 0.0 20.0 Winterthur 31.0 20.0 0.0 """ dim = len(locations) distance_matrix = np.array( [ haversine.haversine(i[1], j[1]) for i, j in itertools.product(locations.items(), repeat=2) ] ).reshape(dim, dim) columns = list(locations.keys()) return pd.DataFrame(data=distance_matrix, index=columns, columns=columns)
[docs] def azimuth_matrix_from_df( df: pd.DataFrame, lat_col: str, long_col: str, name_col: str | None = None ) -> pd.DataFrame: """ Compute an azimuth matrix between geographic points in a DataFrame. The function calculates forward azimuths from one location to another using the WGS84 ellipsoid model via `pyproj.Geod.inv`. :param df: Input DataFrame containing latitude and longitude coordinates. :type df: pd.DataFrame :param lat_col: Column name containing latitude values. :type lat_col: str :param long_col: Column name containing longitude values. :type long_col: str :param name_col: Optional column name for row/column labels in the output matrix. If None, integer indices are used. :type name_col: str or None :return: A square DataFrame of azimuths (degrees) from each point to every other. :rtype: pd.DataFrame :Example: .. code-block:: python >>> import pandas as pd >>> import pandas as pd >>> df = pd.DataFrame({ ... 'lat': [47.226624, 47.38454096, 47.499950], ... 'lon': [8.818437, 8.529927493, 8.737565], ... 'name': ['Rappi', 'Zuerich', 'Winterthur'] ... }) >>> azimuth_matrix_from_df(df, 'lat', 'lon', 'name').round(1) name Rappi Zuerich Winterthur name Rappi 180.0 -51.1 -11.3 Zuerich 128.7 180.0 50.6 Winterthur 168.6 -129.3 180.0 .. warning:: Drop name?? """ long_lat = tuple(zip(df[long_col], df[lat_col])) dim = df.shape[0] geodesic = pyproj.Geod(ellps="WGS84") azimuth_matrix = np.array( [ geodesic.inv(*i, *j)[0] for i, j in itertools.product(long_lat, repeat=2) ] ).reshape(dim, dim) columns = df[name_col] if name_col else pd.RangeIndex(0, dim) return pd.DataFrame(data=azimuth_matrix, index=columns, columns=columns)
[docs] def azimuth_matrix_from_dict( locations: dict[str, tuple[float, float]], ) -> pd.DataFrame: """ Compute a matrix of forward azimuths (initial compass bearings) between named geographic locations. The function takes a dictionary of named locations with (latitude, longitude) coordinates and returns a square DataFrame containing the azimuths (in degrees) from each location to every other using the WGS84 ellipsoid via `pyproj.Geod.inv`. Azimuths are wrapped to [0, 360) and the diagonal is set to 0°. :param locations: Dictionary where keys are location names and values are (latitude, longitude) tuples. :type locations: dict[str, tuple[float, float]] :return: A square DataFrame with azimuths (in degrees) between locations. :rtype: pd.DataFrame :Example: .. code-block:: python >>> locations = { ... "Rappi": (47.226624, 8.818437), ... "Zuerich": (47.38454096, 8.529927493), ... "Winterthur": (47.499950, 8.737565) ... } >>> azimuth_matrix_from_dict(locations).round(0) Rappi Zuerich Winterthur Rappi 0.0 309.0 349.0 Zuerich 129.0 0.0 51.0 Winterthur 169.0 231.0 0.0 """ dim = len(locations) # Important: Need to change order from (lat, long) to (long, lat) # TODO: Find more elagant way locations = {k: (v[1], v[0]) for k, v in locations.items()} geodesic = pyproj.Geod(ellps="WGS84") azimuth_matrix = np.array( [ geodesic.inv(*i, *j)[0] for i, j in itertools.product(locations.values(), repeat=2) ] ).reshape(dim, dim) columns = list(locations.keys()) return ( pd.DataFrame(data=azimuth_matrix, index=columns, columns=columns) - np.eye(dim) * 180.0 ) % 360.0
[docs] def lat_lon_to_xy(locations: dict[str, tuple[float, float]]) -> list[tuple]: """ Convert a dictionary of (latitude, longitude) coordinates to normalized (x, y) Cartesian coordinates. This function uses a `WGS84` projection via `pyproj` to convert geographic coordinates into Cartesian coordinates, then normalizes them by dividing by the minimum x and y values. :param locations: Dictionary where keys are location names and values are (latitude, longitude) tuples. :type locations: dict[str, tuple[float, float]] :return: A list of tuples in the format (location_name, x, y) with normalized coordinates. :rtype: list[tuple] :Example: .. code-block:: python >>> locations = { ... "Rappi": (47.226624, 8.818437), ... "Zuerich": (47.38454096, 8.529927493), ... "Winterthur": (47.499950, 8.737565) ... } >>> lat_lon_to_xy(locations) [('Rappi', np.float64(0.03382320743485345), np.float64(0.0)), ('Zuerich', np.float64(0.0), np.float64(0.0033438121683226907)), ('Winterthur', np.float64(0.024342235871335882), np.float64(0.0057875405195171314))] """ P = pyproj.Proj("WGS84", preserve_units=False) lats = [v[0] for v in locations.values()] longs = [v[1] for v in locations.values()] x, y = P(longs, lats) x = x / np.min(x) - 1.0 y = y / np.min(y) - 1.0 return list(zip(locations.keys(), x, y))
[docs] def flatten(list_of_lists: list[list]) -> list: """ Flatten a list of lists into a single list. :param list_of_lists: A list containing sublists. :type list_of_lists: list[list] :return: A single flattened list with all elements from the sublists. :rtype: list :Example: .. code-block:: python >>> flatten([[1, 2], [3, 4], [5]]) [1, 2, 3, 4, 5] >>> flatten([['a'], ['b', 'c'], []]) ['a', 'b', 'c'] """ return [item for sublist in list_of_lists for item in sublist]
[docs] def stack(li: list) -> list[list]: """ Convert a flat list into a list of single-item lists (i.e., "stack" each element). :param li: A flat list of elements. :type li: list :return: A list where each original element is wrapped in its own sublist. :rtype: list[list] :Example: .. code-block:: python >>> stack([1, 2, 3]) [[1], [2], [3]] >>> stack(['a', 'b']) [['a'], ['b']] """ return [[item] for item in li]