Source code for farmnet.utils

# -*- coding: utf-8 -*-
"""Functions for describing wind farm layouts and creating wind farm representations"""

import os
from pathlib import Path
import pickle
from typing import Union, Any, TypeVar
import pandas as pd
import tomllib
import datetime

T = TypeVar("T")


[docs] def read_config(file_path: str | Path) -> dict: """Read and parse a TOML configuration file for wind turbine data processing. This function loads a configuration file that defines the structure and metadata of wind turbine datasets, including CSV/Parquet file formats, column mappings, and dataset locations. :param file_path: Path to the configuration file in TOML format. Can be either a string or :class:`pathlib.Path` object. :type file_path: Union[str, Path] :return: A dictionary containing the parsed configuration :rtype: dict :raises FileNotFoundError: If the specified configuration file doesn't exist. :raises tomllib.TOMLDecodeError: If the TOML file is malformed. .. rubric:: Example Configuration Structure .. code-block:: python { 'csv': { 'encoding': 'utf8', 'sep': ',', 'header': 0 }, 'index': { 'name-from-source': '# Date and time', 'time-zone-from-source': 'UTC', 'name': 'datetime', 'unit': 'ns', 'time-zone': 'UTC' }, 'columns': [ {'name-from-source': 'Wind direction (°)', 'name': 'wind_direction'}, # ... more column mappings ], 'dataset': { 'root_dir': 'kelmarsh_data_imputation', 'data': 'featured_windeurope_data.parquet', 'static': 'Kelmarsh_WT_static.csv' } } .. rubric:: Example Basic usage: .. code-block:: python >>> import json >>> default_cfg_path = Path(getenv("CONFIG_PATH", "examples/kelmarsh.toml")) >>> config = read_config(default_cfg_path) >>> print(json.dumps(config, indent=4, sort_keys=True, ensure_ascii=False)) { "columns": [ { "name": "wind_direction", "name-from-source": "Wind direction (°)" }, { "name": "nacelle_direction", "name-from-source": "Nacelle position (°)" }, { "name": "wind_speed", "name-from-source": "Wind speed (m/s)" }, { "name": "power", "name-from-source": "Power (kW)" }, { "name": "wt_id", "name-from-source": "Wind turbine ID" } ], "csv": { "encoding": "utf8", "header": 0, "sep": "," }, "dataset": { "data": "featured_windeurope_data.parquet", "root_dir": "kelmarsh_data_imputation", "static": "Kelmarsh_WT_static.csv" }, "index": { "name": "datetime", "name-from-source": "# Date and time", "time-zone": "UTC", "time-zone-from-source": "UTC", "unit": "ns" } } .. code-block:: python >>> config = read_config("not_existing_config.toml") Traceback (most recent call last): ... FileNotFoundError: [Errno 2] No such file or directory: 'not_existing_config.toml' .. seealso:: :mod:`tomllib` Python standard library module for TOML parsing. """ with open(file_path, "rb") as f: config = tomllib.load(f) return config
[docs] def save_data(df: pd.DataFrame, filename: Union[str, Path]): """Save a pandas DataFrame or Series to a pickle file. :param df: DataFrame or Series to be saved :type df: Union[pd.DataFrame, pd.Series] :param filename: Output filename (without .pkl extension) :type filename: Union[str, Path] .. rubric:: Example .. code-block:: python >>> import pandas as pd >>> from pathlib import Path >>> import os >>> # Create test data >>> test_df = pd.DataFrame({'A': [1, 2], 'B': ['x', 'y']}) >>> # Save the data >>> save_data(test_df, "test_dataframe") >>> # Verify file exists >>> Path("test_dataframe.pkl").exists() True >>> # Clean up >>> os.remove("test_dataframe.pkl") """ df.to_pickle(f"{filename}.pkl")
[docs] def load_data(filename: Union[str, Path]) -> Union[pd.DataFrame, pd.Series]: """Load a pandas DataFrame or Series from a pickle file. :param filename: Path to the pickle file (without .pkl extension) :type filename: Union[str, Path] :return: The loaded DataFrame or Series :rtype: Union[pd.DataFrame, pd.Series] :raises FileNotFoundError: If the specified file doesn't exist .. rubric:: Example .. code-block:: python >>> import pandas as pd >>> from pathlib import Path >>> import os >>> # Create and save test data >>> test_df = pd.DataFrame({'A': [1, 2], 'B': ['x', 'y']}) >>> test_df.to_pickle("test_load.pkl") >>> # Load and verify >>> loaded = load_data("test_load") >>> isinstance(loaded, pd.DataFrame) True >>> loaded.shape (2, 2) >>> # Clean up >>> os.remove("test_load.pkl") .. code-block:: python >>> loaded = load_data("test_load") Traceback (most recent call last): ... FileNotFoundError: [Errno 2] No such file or directory: 'test_load.pkl' """ return pd.read_pickle(f"{filename}.pkl")
[docs] def date_to_str(t: datetime.datetime) -> str: """Convert a datetime object to a standardized string format. :param t: A datetime object to be converted. :type t: datetime.datetime :return: String representation in format "%Y-%m-%d %H:%M:%S" :rtype: str .. rubric:: Example .. code-block:: python >>> from datetime import datetime >>> dt = datetime(2023, 5, 15, 14, 30, 0) >>> date_to_str(dt) '2023-05-15 14:30:00' .. seealso:: :meth:`datetime.datetime.strftime` For custom formatting options. """ return t.strftime("%Y-%m-%d %H:%M:%S")
[docs] def pickle_object(obj: Any, filename: Union[str, Path]): """Save obj as a pickle file. :param obj: Object to be dumped into the pickle file :type obj: Any :param filename: Path to the pickle file (without .pkl extension) :type filename: Union[str, Path] .. rubric:: Example .. code-block:: python >>> from pathlib import Path >>> import os >>> test_data = {'Surname': 'Michael', 'Name': 'Palin'} >>> pickle_object(test_data, "test_dict") >>> Path("test_dict.pkl").exists() True >>> data = load_pickle("test_dict.pkl") >>> isinstance(data, dict) True >>> data == test_data True >>> os.remove("test_dict.pkl") """ with open(f"{filename}.pkl", "wb") as f: pickle.dump(obj, f, protocol=pickle.HIGHEST_PROTOCOL)
[docs] def load_pickle(filename: Union[str, Path]) -> Any: """Load and deserialize a Python object from a pickle file. :param filename: Path to the pickle file :type filename: Union[str, Path] :return: Deserialized Python object :rtype: Any :raises FileNotFoundError: If the specified file does not exist :raises pickle.UnpicklingError: If the file cannot be unpickled :raises EOFError: If the file is empty or truncated :raises ModuleNotFoundError: If required module is not available .. rubric:: Example .. code-block:: python >>> test_data = {'Surname': 'Michael ', 'Name': 'Palin'} >>> pickle_object(test_data, "test_dict") >>> data = load_pickle("test_dict.pkl") >>> isinstance(data, dict) True >>> data {'Surname': 'Michael ', 'Name': 'Palin'} >>> os.remove("test_dict.pkl") .. code-block:: python >>> config = load_pickle("not_existing.pkl") Traceback (most recent call last): ... FileNotFoundError: [Errno 2] No such file or directory: 'not_existing.pkl' .. seealso:: :func:`pickle.load` The underlying deserialization function :func:`pickle_object` The corresponding serialization function """ with open(f"{filename}", "rb") as f: obj = pickle.load(f) return obj
[docs] def getenv(key: str, default: T = 0) -> T: """Get an environment variable and return it as the same type as default value. :param key: Name of the environment variable :type key: str :param default: Default value if key doesn't exist (determines return type) :type default: T :return: Environment variable value converted to default's type :rtype: T .. rubric:: Examples .. code-block:: python >>> value = getenv("EXAMPLE", default=-1) >>> value, type(value) (-1, <class 'int'>) .. code-block:: python >>> os.environ["EXAMPLE"] = "1" >>> value = getenv("EXAMPLE", default="-1") >>> value, type(value) ('1', <class 'str'>) .. seealso:: :func:`os.getenv` The underlying environment variable access """ return type(default)(os.getenv(key, default))