Source code for renewenergy.impute_split

import pandas as pd
from sklearn.model_selection import train_test_split
import numpy


[docs] def impute_split(data, impute_val, train_size, rand_seed=0): """ Takes a data frame and creates a a training and testing split, with imputation for NA values Parameters: - data: The initial dataset (DataFrame) to be tidied - impute_value: The value to replace all NA values with within the dataset - train_size: A value from 0-1 that represents the % of data to be used in the training split (remainder will be used in testing split) - rand_seed: A seed value to be used for randomness, allowing for reproducible results; default value is 0 Returns: train: Imputed, training dataset split test: Imputed, testing dataset split """ numpy.random.seed(rand_seed) data = data.fillna(impute_val) train, test = train_test_split(data, train_size = train_size) return train, test