from .download import download_from_url, zenodo_download
from zipfile import ZipFile
from pathlib import Path
import pandas as pd
[docs]
def kelmarsh_raw_data(download_path: str | Path, include: list[str]) -> None:
"""
Download and prepare raw SCADA data for the Kelmarsh wind turbine site.
This function downloads specific data files from Zenodo and processes ZIP archives
containing SCADA CSV files. If a consolidated raw CSV file does not already exist
in the specified `download_path`, it will extract and combine the relevant CSV data
from the provided ZIP files and save the result as `kelmarsh_raw.csv`.
:param download_path: Path to the directory where files should be downloaded and stored.
:type download_path: str or pathlib.Path
:param include: List of ZIP filenames to be included in the processing.
:type include: list[str]
:return: None
:rtype: None
.. rubric:: Example
.. code-block:: python
>>> from farmnet.data.download import get_data_home_folder
>>> from pathlib import Path
>>> import pathlib
>>> import os
.. code-block:: python
>>> download_path = Path("./test_folder")
>>> file_name = "Kelmarsh_SCADA_2022_4457.zip"
>>> kelmarsh_raw_data(download_path, [file_name])
>>> zip_file = download_path / "Kelmarsh_SCADA_2022_4457.zip"
>>> zip_file.exists()
True
>>> raw_csv = download_path / "kelmarsh_raw.csv"
>>> raw_csv.exists()
True
>>> static_csv = download_path / "Kelmarsh_WT_static.csv"
>>> static_csv.exists()
True
>>> os.remove(raw_csv)
>>> os.remove(static_csv)
>>> os.remove(zip_file)
>>> download_path.rmdir()
"""
download_path = Path(download_path)
download_path.mkdir(exist_ok=True, parents=True)
zenodo_download(
8252025,
download_path,
force_download=False,
include=["Kelmarsh_WT_static.csv"] + include,
)
raw_csv = download_path / "kelmarsh_raw.csv"
if not raw_csv.exists():
concat = []
for zip_file in include:
df_ = _scada_zip_to_dataframe(
download_path / zip_file,
filter_exp="Turbine_Data",
skiprows=9,
)
concat.append(df_)
df = pd.concat(concat)
df.to_csv(raw_csv, index=False)
def _scada_zip_to_dataframe(
filename: str | Path, filter_exp: str = "", skiprows: int = 0
) -> pd.DataFrame:
"""Accepts a zip file that contains csv SCADA files."""
# print("-------")
# print(f"Trying to open: {filename.normcase()}")
with ZipFile(filename) as myzip:
data_files = [f for f in myzip.namelist() if filter_exp in f]
frames = []
for f in data_files:
wind_turbine = int(Path(f).stem.split("_")[-1])
with myzip.open(f, "r") as wt:
df_tmp = pd.read_csv(wt, skiprows=skiprows)
df_tmp["Wind turbine ID"] = wind_turbine
frames.append(df_tmp)
return pd.concat(frames)