Source code for renewenergy.plot_rmse

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
from sklearn import set_config
from sklearn.metrics import mean_squared_error


# def split_xy_columns(dataset):
#     """
#     Read a specified data file in from a URL containing a zip file.

#     Parameters
#     ----------
#     url : str
#         URL of link containing zip file. 
    
#     data_file: str
#         Specified file in .ZIP that data is to be extracted from. 
    
#     data_path: str
#         Directory to which the imported data should be saved to. 
    
#     file_name: str
#         Name of file that imported data will be saved to. 
    

#     Returns
#     -------
#     file_name.csv
#         CSV file that data is saved to. 

#     Examples
#     --------
#     >>> reading_datain("url", "WDICSV.csv", "data/raw", "downloaded_data.csv" )
    
#     """

#     #splitting the x and y columns of the data
#     dataset_x = dataset
#     dataset_x = dataset_x.drop('Renewable electricity output (% of total electricity output)', axis=1)
#     dataset_x = dataset_x.drop('Country Name', axis=1)
#     dataset_y = dataset[["Renewable electricity output (% of total electricity output)"]]

#     return dataset_x, dataset_y

#@click.command()
#@click.option('--training_data_path', help='path of training set data (csv) to read', type=str)
#@click.option('--test_data_path', help='path of test set data (csv) to read', type=str)
#@click.option('--output_path', help='folder path to save the results, need to end with.png', type=str)

[docs] def plot_rmse(training_data_path, test_data_path, output_path): """ Perform linear regression and plot the results on a graph containing Expected vs Predicted. Parameters ---------- training_data_path: str Path to training data .csv file test_data_path: str Path to test data .csv file output_path: str Directory to which the figure should be saved to. Returns ------- results.png Figure containing the Predicted vs Expected Values of the linear regression. Examples -------- >>> plot_rmse("data/energy_train.csv", "data/energy_test.csv", "results/" ) """ #read clean train and test dataset energy_train = pd.read_csv(training_data_path) energy_test = pd.read_csv(test_data_path) #splitting the x and y columns of the data energy_train_x, energy_train_y = split_xy_columns(energy_train) energy_test_x, energy_test_y = split_xy_columns(energy_test) #making the linear model lm=LinearRegression() lm.fit(energy_train_x, energy_train_y) y_true = energy_test_y['Renewable electricity output (% of total electricity output)'] y_pred = lm.predict(energy_test_x) energy_RMSE = mean_squared_error(y_true=y_true, y_pred=y_pred)**(1/2) fig = plt.figure() plt.scatter(x=y_pred, y=energy_test_y['Renewable electricity output (% of total electricity output)']) plt.title(f"Predicted vs. Ground Truth Target Value (RMSE={energy_RMSE})") plt.xlabel("Predicted Values") plt.ylabel("True Values") plt.savefig(output_path) return energy_RMSE, fig
#if __name__ == '__main__': # plot_rmse()