Source code for renewenergy.reading_data

import requests
from io import BytesIO
# import urllib.request as urllib2
import pandas as pd
import os
from zipfile import ZipFile
from urllib.request import urlopen
import pathlib


[docs]
def reading_data(url, data_file, data_path, file_name):
    """
    Read a specified data file in from a URL containing a zip file.

    Parameters
    ----------
    url : str
        URL of link containing zip file. 
    
    data_file: str
        Specified file in .ZIP that data is to be extracted from. 
    
    data_path: str
        Directory to which the imported data should be saved to. 
    
    file_name: str
        Name of file that imported data will be saved to. 
    

    Returns
    -------
    file_name.csv
        CSV file that data is saved to. 

    Examples
    --------
    >>> reading_datain("url", "WDICSV.csv", "data/raw", "downloaded_data.csv" )
    
    """

    """Simple program that reads in the data from a URL, and selects a file from the .zip."""
    request= requests.get(url)

    if request.status_code !=200:
        raise ValueError('The inputed URL does not exist.')
    
    if url[-4:] !=".zip":
        raise ValueError('The inputed URL is not a ZIP file, please input a ZIP file')
    data_url= urlopen(url)
    file = ZipFile(BytesIO(data_url.read()))
    loa=file.namelist()
    if data_file not in loa: 
        raise ValueError("The specified file is not present within the inputed ZIP file")

    data_csv = file.open(data_file)
    data = pd.read_csv(data_csv)
    os.makedirs(data_path, exist_ok=True)  
        
    path = pathlib.Path(data_path+"/"+file_name)
    if os.path.exists(path):
        raise ValueError("The filename already exists.")
    else: 
        return data.to_csv(data_path+"/"+file_name)  

       

if __name__ == '__main__':
    reading_datain()