Source code for renewenergy.impute_split

import pandas as pd
from sklearn.model_selection import train_test_split
import numpy



[docs]
def impute_split(data, impute_val, train_size, rand_seed=0):  
    """
    Takes a data frame and creates a a training and testing split, with imputation for NA values

    Parameters:
    - data: The initial dataset (DataFrame) to be tidied
    - impute_value: The value to replace all NA values with within the dataset
    - train_size: A value from 0-1 that represents the % of data to be used in the training split (remainder will be used in testing split)
    - rand_seed: A seed value to be used for randomness, allowing for reproducible results; default value is 0

    Returns:
    train: Imputed, training dataset split
    test: Imputed, testing dataset split
    """

    numpy.random.seed(rand_seed)
    data = data.fillna(impute_val)
    train, test = train_test_split(data, train_size = train_size)
    return train, test