Source code for farmnet.data.data_store

from .download import download_from_url, zenodo_download
from zipfile import ZipFile
from pathlib import Path
import pandas as pd



[docs]
def kelmarsh_raw_data(download_path: str | Path, include: list[str]) -> None:
    """
    Download and prepare raw SCADA data for the Kelmarsh wind turbine site.

    This function downloads specific data files from Zenodo and processes ZIP archives
    containing SCADA CSV files. If a consolidated raw CSV file does not already exist
    in the specified `download_path`, it will extract and combine the relevant CSV data
    from the provided ZIP files and save the result as `kelmarsh_raw.csv`.

    :param download_path: Path to the directory where files should be downloaded and stored.
    :type download_path: str or pathlib.Path
    :param include: List of ZIP filenames to be included in the processing.
    :type include: list[str]
    :return: None
    :rtype: None
    
    .. rubric:: Example

    .. code-block:: python
        
        >>> from farmnet.data.download import get_data_home_folder
        >>> from pathlib import Path    
        >>> import pathlib
        >>> import os

    .. code-block:: python
    
        >>> download_path = Path("./test_folder")
        >>> file_name = "Kelmarsh_SCADA_2022_4457.zip"
        >>> kelmarsh_raw_data(download_path, [file_name])
        >>> zip_file = download_path / "Kelmarsh_SCADA_2022_4457.zip"
        >>> zip_file.exists()
        True
        >>> raw_csv = download_path / "kelmarsh_raw.csv"
        >>> raw_csv.exists()
        True
        >>> static_csv = download_path / "Kelmarsh_WT_static.csv"
        >>> static_csv.exists()
        True
        >>> os.remove(raw_csv)
        >>> os.remove(static_csv)
        >>> os.remove(zip_file)
        >>> download_path.rmdir()
    """
    download_path = Path(download_path)

    download_path.mkdir(exist_ok=True, parents=True)

    zenodo_download(
        8252025,
        download_path,
        force_download=False,
        include=["Kelmarsh_WT_static.csv"] + include,
    )

    raw_csv = download_path / "kelmarsh_raw.csv"
    
    if not raw_csv.exists():
        concat = []
        for zip_file in include:
            df_ = _scada_zip_to_dataframe(
                download_path / zip_file,
                filter_exp="Turbine_Data",
                skiprows=9,
            )
            concat.append(df_)
        df = pd.concat(concat)
        df.to_csv(raw_csv, index=False)



def _scada_zip_to_dataframe(
    filename: str | Path, filter_exp: str = "", skiprows: int = 0
) -> pd.DataFrame:
    """Accepts a zip file that contains csv SCADA files."""
    # print("-------")
    # print(f"Trying to open: {filename.normcase()}")
    with ZipFile(filename) as myzip:
        data_files = [f for f in myzip.namelist() if filter_exp in f]

        frames = []
        for f in data_files:
            wind_turbine = int(Path(f).stem.split("_")[-1])
            with myzip.open(f, "r") as wt:
                df_tmp = pd.read_csv(wt, skiprows=skiprows)
                df_tmp["Wind turbine ID"] = wind_turbine
                frames.append(df_tmp)

    return pd.concat(frames)