Source code for farmnet.data.data_store

from .download import download_from_url, zenodo_download
from zipfile import ZipFile
from pathlib import Path
import pandas as pd


[docs] def kelmarsh_raw_data(download_path: str | Path, include: list[str]) -> None: """ Download and prepare raw SCADA data for the Kelmarsh wind turbine site. This function downloads specific data files from Zenodo and processes ZIP archives containing SCADA CSV files. If a consolidated raw CSV file does not already exist in the specified `download_path`, it will extract and combine the relevant CSV data from the provided ZIP files and save the result as `kelmarsh_raw.csv`. :param download_path: Path to the directory where files should be downloaded and stored. :type download_path: str or pathlib.Path :param include: List of ZIP filenames to be included in the processing. :type include: list[str] :return: None :rtype: None .. rubric:: Example .. code-block:: python >>> from farmnet.data.download import get_data_home_folder >>> from pathlib import Path >>> import pathlib >>> import os .. code-block:: python >>> download_path = Path("./test_folder") >>> file_name = "Kelmarsh_SCADA_2022_4457.zip" >>> kelmarsh_raw_data(download_path, [file_name]) >>> zip_file = download_path / "Kelmarsh_SCADA_2022_4457.zip" >>> zip_file.exists() True >>> raw_csv = download_path / "kelmarsh_raw.csv" >>> raw_csv.exists() True >>> static_csv = download_path / "Kelmarsh_WT_static.csv" >>> static_csv.exists() True >>> os.remove(raw_csv) >>> os.remove(static_csv) >>> os.remove(zip_file) >>> download_path.rmdir() """ download_path = Path(download_path) download_path.mkdir(exist_ok=True, parents=True) zenodo_download( 8252025, download_path, force_download=False, include=["Kelmarsh_WT_static.csv"] + include, ) raw_csv = download_path / "kelmarsh_raw.csv" if not raw_csv.exists(): concat = [] for zip_file in include: df_ = _scada_zip_to_dataframe( download_path / zip_file, filter_exp="Turbine_Data", skiprows=9, ) concat.append(df_) df = pd.concat(concat) df.to_csv(raw_csv, index=False)
def _scada_zip_to_dataframe( filename: str | Path, filter_exp: str = "", skiprows: int = 0 ) -> pd.DataFrame: """Accepts a zip file that contains csv SCADA files.""" # print("-------") # print(f"Trying to open: {filename.normcase()}") with ZipFile(filename) as myzip: data_files = [f for f in myzip.namelist() if filter_exp in f] frames = [] for f in data_files: wind_turbine = int(Path(f).stem.split("_")[-1]) with myzip.open(f, "r") as wt: df_tmp = pd.read_csv(wt, skiprows=skiprows) df_tmp["Wind turbine ID"] = wind_turbine frames.append(df_tmp) return pd.concat(frames)