# -*- coding: utf-8 -*-
"""
Created on Fri May 29 09:37:56 2020

@author: RILJAM
"""

import sys
import calendar
import glob
from datetime import datetime, timedelta, date
import pandas as pd
import numpy as np
import holidays

sys.path.append('../..')
import data.loadshape_data as lsd


def batch_condense(ls_in, peak_no=3, wknd_peak=True,
                   factor='M', pdaf_max=2.0, pdaf_fix=1.1,
                   wkndwkd_fix=1.0, scaled=True, year=2015):
    """
    Use ``batch_condense`` to convert loadshapes to the Lopeshape format.
    ``batch_condense`` will return two pandas DataFrames. The first
    DataFrame is the condensed loadshape: Peak, Weekday, and Weekend
    averages for each hour and month. The second DataFrame are the
    ratios associated with the DataFrame for each month.

    Parameters
    ----------
    ls_in : pandas DataFrame
        DataFrame of the loadshape to convert to the Loapshape format
    peak_no : int, default 3
        The number of peak days per month to include in the Peak Day
        average.
    wknd_peak : bool, default True
        Allow peak days to occur on weekends.
    factor : {'M','A','S','F'}, default 'M'
        Determines how to calculate the ratios.
        * M: Monthly
        * A: Annually
        * S: Seasonally
        * F: Fixed ratios
    pdaf_max : float, default 2.0
        Maximum value for the PDAF ratio.
    pdaf_fix : float, default 1.1
        Fixed value for the PDAF ratio.
    wkndwkd_fix : float, 1.0
        Fixed value for the Wknd/Wkd ratio.
    scaled : bool, default True
        Scale the condensed loadshape to a maximum of 1.
    year: int, default 2015
        The year of the 8760 profile.

    Returns
    -------
    cdls_scaled : pandas DataFrame
        DataFrame containing the condensed loadshape: Peak, Weekday, and
        Weekend averages for each hour and month, with the max scaled to
        1. This DataFrame is returned for scaled = True.
    cdls_scaled : pandas DataFrame
        DataFrame containing the condensed loadshape: Peak, Weekday, and
        Weekend averages for each hour and month. This DataFrame is
        returned for scaled = False.
    ratios : pandas DataFrame
        DataFrame containing the ratios associated with the condensed
        loadshape for each month.
    """

    # set up variables to find averages
    hours = [str(x) for x in range(1, 25)]
    us_holidays = holidays.UnitedStates()

    ls_in_copy = ls_in.copy()
    if 'Date' not in list(ls_in_copy.columns):
        ls_in_copy['Date'] = ((datetime(year, 1, 1)) +
                              ls_in_copy['daynum'].apply(
            lambda x: timedelta(x - 1)))
        ls_in_copy.drop(columns='daynum', inplace=True)

    ls_in_copy['Month'] = ls_in_copy['Date'].apply(lambda x: x.month)
    ls_in_copy['Weekday'] = ls_in_copy['Date'].apply(
        lambda x:
        True if ((x.weekday() < 5) and
                 ((x not in us_holidays) or
                  ((x == datetime(x.year,
                                  11, 11)) or
                   (x == datetime(x.year,
                                  11, 10)) or
                   (x == datetime(x.year,
                                  11, 12)))))
        else False)
    ls_in_copy['Day Sum'] = ls_in_copy[hours].sum(axis=1)

    ls_in_copy['Peak Day'] = False
    if wknd_peak:
        peaks = ls_in_copy.sort_values(by='Day Sum', ascending=False)\
            .groupby(['ID', 'Month']).head(peak_no)['Day Sum'].reset_index()
    else:
        peaks = ls_in_copy[ls_in_copy['Weekday']].sort_values(by='Day Sum',
                                                              ascending=False)\
            .groupby(['ID', 'Month']).head(peak_no)['Day Sum'].reset_index()
    ls_in_copy.loc[peaks['index'], 'Peak Day'] = True

    # get averages and prepare for merge
    lspeak = ls_in_copy[ls_in_copy['Peak Day'] == 1].groupby(
        ['Month', 'ID']).mean()\
        .reset_index().drop(columns=['Weekday', 'Peak Day', 'Day Sum'])\
        .melt(id_vars=['Month', 'ID']).rename(columns={'variable': 'Hour',
                                                       'value': 'Peak'})
    lsavg = ls_in_copy[ls_in_copy['Weekday'] == 1].groupby(
        ['Month', 'ID']).mean()\
        .reset_index().drop(columns=['Weekday', 'Peak Day', 'Day Sum'])\
        .melt(id_vars=['Month', 'ID']).rename(columns={'variable': 'Hour',
                                                       'value': 'Avg'})
    lsend = ls_in_copy[ls_in_copy['Weekday'] == 0].groupby(
        ['Month', 'ID']).mean()\
        .reset_index().drop(columns=['Weekday', 'Peak Day', 'Day Sum'])\
        .melt(id_vars=['Month', 'ID']).rename(columns={'variable': 'Hour',
                                                       'value': 'End'})

    # merge averages to create condensed load shape
    cdls = lspeak.merge(lsavg, on=['Month', 'Hour', 'ID'])\
        .merge(lsend, on=['Month', 'Hour', 'ID'])
    cdls['Hour'] = pd.to_numeric(cdls['Hour'])
    cdls.sort_values(by=['ID', 'Month', 'Hour'],
                     ignore_index=True, inplace=True)

    # scale the condensed load shapes
    cdls_scale = cdls.merge(cdls.groupby(['Month', 'ID'])
                            .max().drop(columns='Hour').reset_index(),
                            on=['Month', 'ID'])

    cdls_scale[['Peak_y', 'Avg_y', 'End_y']] = (
        cdls_scale[['Peak_y', 'Avg_y', 'End_y']].replace(0, 1))

    cdls_scale['Peak'] = cdls_scale['Peak_x']/cdls_scale['Peak_y']
    cdls_scale['Avg'] = cdls_scale['Avg_x']/cdls_scale['Avg_y']
    cdls_scale['End'] = cdls_scale['End_x']/cdls_scale['End_y']

    # cdls_scale['ID'] = ls['ID'][0]
    cdls_scale.drop(columns=['Peak_x', 'Avg_x', 'End_x',
                             'Peak_y', 'Avg_y', 'End_y'], inplace=True)

    # ratios calculations
    ratios = cdls.groupby(['Month', 'ID'], sort=False).sum()\
        .reset_index().drop(columns=['Hour'])  # Sums
    ratios['Month L'] = ls_in_copy.groupby(['Month', 'ID'], sort=False)\
        .count().reset_index()['Date']
    ratios['Avg r'] = 5/7*ratios['Month L'] - 1
    ratios['End r'] = 2/7*ratios['Month L'] + 1
    ratios['MO Tot'] = (ratios['Avg r']*ratios['Avg'] +
                        ratios['End r']*ratios['End'])
    ratios = ratios.merge(ratios.groupby(['ID'], sort=False).sum()
                          .reset_index()[['ID', 'MO Tot']]
                          .rename(columns={'MO Tot': 'MO Tot Sum'}), on='ID')
    ratios['Mo Brk'] = ratios['MO Tot']/ratios['MO Tot Sum']
    ratios['PDAF'] = ratios['Peak']/ratios['Avg']
    ratios['Wknd/Wkd'] = ratios['End']/ratios['Avg']

    if factor == 'F':
        ratios['PDAF'] = pdaf_fix
        ratios['Wknd/Wkd'] = wkndwkd_fix

    elif factor == 'A':
        ratios['PDAF'] = sum(ratios['PDAF']*ratios['Mo Brk'])
        ratios['Wknd/Wkd'] = sum(ratios['Wknd/Wkd']*ratios['Mo Brk'])

    elif factor == 'S':
        wint = pd.DataFrame(columns=['Month', 'PDAF', 'Wknd/Wkd'])
        sprfall = pd.DataFrame(columns=['Month', 'PDAF', 'Wknd/Wkd'])
        summ = pd.DataFrame(columns=['Month', 'PDAF', 'Wknd/Wkd'])
        wint['Month'] = [1, 2, 3, 11, 12]
        sprfall['Month'] = [4, 5, 10]
        summ['Month'] = [6, 7, 8, 9]

        if sum(ratios[ratios['Month'].isin([1, 2, 3, 11, 12])]['Mo Brk']) == 0:
            wint['PDAF'] = 1
            wint['Wknd/Wkd'] = 1
        else:
            wint['PDAF'] = (sum(ratios[ratios['Month']
                                       .isin([1, 2, 3, 11, 12])]['PDAF'] *
                                ratios[ratios['Month']
                                       .isin([1, 2, 3, 11, 12])]['Mo Brk']) /
                            sum(ratios[ratios['Month']
                                       .isin([1, 2, 3, 11, 12])]['Mo Brk']))
            wint['Wknd/Wkd'] = (sum(ratios[ratios['Month']
                                           .isin([1, 2, 3,
                                                  11, 12])]['Wknd/Wkd'] *
                                    ratios[ratios['Month']
                                           .isin([1, 2, 3, 11, 12])]['Mo Brk'])
                                / sum(ratios[ratios['Month']
                                             .isin([1, 2, 3,
                                                    11, 12])]['Mo Brk']))

        if sum(ratios[ratios['Month'].isin([4, 5, 10])]['E. Bkdwn']) == 0:
            sprfall['PDAF'] = 1
            sprfall['Wknd/Wkd'] = 1
        else:
            sprfall['PDAF'] = (sum(ratios[ratios['Month']
                                          .isin([4, 5, 10])]['PDAF'] *
                                   ratios[ratios['Month']
                                          .isin([4, 5, 10])]['Mo Brk']) /
                               sum(ratios[ratios['Month']
                                          .isin([4, 5, 10])]['Mo Brk']))
            sprfall['Wknd/Wkd'] = (sum(ratios[ratios['Month']
                                              .isin([4, 5, 10])]['Wknd/Wkd'] *
                                       ratios[ratios['Month']
                                              .isin([4, 5, 10])]['Mo Brk']) /
                                   sum(ratios[ratios['Month']
                                              .isin([4, 5, 10])]['Mo Brk']))

        if sum(ratios[ratios['Month'].isin([6, 7, 8, 9])]['E. Bkdwn']) == 0:
            summ['PDAF'] = 1
            summ['Wknd/Wkd'] = 1
        else:
            summ['PDAF'] = (sum(ratios[ratios['Month']
                                       .isin([6, 7, 8, 9])]['PDAF'] *
                                ratios[ratios['Month']
                                       .isin([6, 7, 8, 9])]['Mo Brk']) /
                            sum(ratios[ratios['Month']
                                       .isin([6, 7, 8, 9])]['Mo Brk']))
            summ['Wknd/Wkd'] = (sum(ratios[ratios['Month']
                                           .isin([6, 7, 8, 9])]['Wknd/Wkd'] *
                                    ratios[ratios['Month']
                                           .isin([6, 7, 8, 9])]['Mo Brk']) /
                                sum(ratios[ratios['Month']
                                           .isin([6, 7, 8, 9])]['Mo Brk']))

        season = pd.concat([wint, sprfall, summ])\
            .sort_values(by='Month', ignore_index=True)
        ratios['PDAF'] = season['PDAF']
        ratios['Wknd/Wkd'] = season['Wknd/Wkd']

    ratios['PDAF'] = ratios['PDAF'].apply(lambda x: min(x, pdaf_max))
    ratios['PDAF'].fillna(1, inplace=True)
    ratios['Wknd/Wkd'].fillna(1, inplace=True)
    ratios.drop(columns=['Peak', 'Avg', 'End', 'Month L',
                         'Avg r', 'End r', 'MO Tot', 'MO Tot Sum'],
                inplace=True)

    if scaled:
        return cdls_scale, ratios
    else:
        return cdls, ratios


def format_cpuc(df_input, year=2015, bldg_vint='all', from_baseline=False,
                base_tech_id=None, meas_tech_id=None):
    """
    Use ``format_cpuc`` function to format 365x24 CPUC loadshapes before
    conversion to Lopeshape format.

    Parameters
    ----------
    df_input : pandas DataFrame
        DataFrame of the CPUC loadshape
    year : int, default 3
        Year of the loadshape
    bldg_vint : bool, default True
        String containing the year of the building vintage to convert
    from_baseline : {'M','A','S','F'}, default 'M'
        Subtract the baseline load from the measure load to create a
        savings/impacts loadshape
    base_tech_id : str, default None
        Use when ``from_baseline = True``. TechID of the base case load shape.
    meas_tech_id : str, default None
        Use when ``from_baseline = True``. TechID of the measure case load
        shape.

    Returns
    -------
    ls_df : pandas DataFrame
        DataFrame containing the condensed loadshape: Peak, Weekday,
        and Weekend averages for each hour and month, with the max
        scaled to 1. This DataFrame is returned for scaled = True.
    md_df : pandas DataFrame
        DataFrame containing the condensed loadshape: Peak, Weekday,
        and Weekend averages for each hour and month. This DataFrame is
        returned for scaled = False.
    """

    df_cpuc = df_input.copy()
    hr_li = [('hr' + "{:02d}".format(x)) for x in range(1, 25)]
    hr_x = [h + '_x' for h in hr_li]
    hr_y = [h + '_y' for h in hr_li]

    if bldg_vint != 'all':
        mh_vint = []

        for vint in bldg_vint:
            mh_vint = mh_vint + ['MH'+str(vint)[-2:]]

        df_cpuc = df_cpuc[(df_cpuc['BldgVint'].astype(str).isin(
            list(map(str, bldg_vint)))) |
            (df_cpuc['BldgVint'].astype(str).isin(mh_vint))]\
            .reset_index(drop=True)

    df_cpuc['ID'] = (df_cpuc['TechID'] + '-' + df_cpuc['BldgHVAC'] +
                     '-' + df_cpuc['BldgType'] +
                     df_cpuc['BldgVint'].astype(str) +
                     '-' + df_cpuc['BldgLoc'])
    df_cpuc['Year'] = year
    df_cpuc['Date'] = (df_cpuc['Year'].apply(lambda x: datetime(x, 1, 1)) +
                       df_cpuc['daynum'].apply(lambda x: timedelta(x - 1)))

    if from_baseline:
        if base_tech_id:
            df_base = df_cpuc[df_cpuc.TechID == base_tech_id]\
                .drop(columns=['TechID', 'SizingID', 'enduse', 'lastmod'])\
                .reset_index(drop=True)
        else:
            df_base = df_cpuc[(~df_cpuc['TechID'].str.contains('StdForMsr')) &
                              (~df_cpuc['TechID'].str.contains('Pre'))]\
                .drop(columns=['TechID', 'SizingID', 'enduse', 'lastmod'])\
                .reset_index(drop=True)
        if meas_tech_id:
            df_measure = df_cpuc[df_cpuc.TechID == meas_tech_id]\
                .reset_index(drop=True)
        else:
            df_measure = df_cpuc[df_cpuc['TechID'].str.contains('StdForMsr')]\
                .reset_index(drop=True)

        df_merge = df_measure.merge(df_base,
                                    on=['BldgType', 'BldgVint',
                                        'BldgLoc', 'BldgLoc',
                                        'BldgHVAC', 'tstat',
                                        'daynum'], how='left')
        df_cpuc = pd.concat([df_measure.drop(columns=hr_li),
                             pd.DataFrame(data=df_merge[hr_x].values -
                                          df_merge[hr_y].values,
                                          columns=hr_li)], axis=1)

    df_cpuc.rename(columns={'hr01': '1', 'hr02': '2', 'hr03': '3',
                            'hr04': '4', 'hr05': '5', 'hr06': '6',
                            'hr07': '7', 'hr08': '8', 'hr09': '9',
                            'hr10': '10', 'hr11': '11', 'hr12': '12',
                            'hr13': '13', 'hr14': '14', 'hr15': '15',
                            'hr16': '16', 'hr17': '17', 'hr18': '18',
                            'hr19': '19', 'hr20': '20', 'hr21': '21',
                            'hr22': '22', 'hr23': '23', 'hr24': '24'},
                   inplace=True)
    ls_drop = ['enduse', 'TechID', 'SizingID', 'BldgType',
               'BldgVint', 'BldgLoc', 'BldgHVAC', 'tstat',
               'daynum', 'lastmod', 'Year']
    md_drop = ['1', '2', '3', '4', '5', '6', '7', '8', '9',
               '10', '11', '12', '13', '14', '15', '16', '17',
               '18', '19', '20', '21', '22', '23', '24', 'Date',
               'Year', 'daynum']

    ls_df = df_cpuc.drop(columns=ls_drop).reset_index(drop=True)
    md_df = df_cpuc.drop(
        columns=md_drop).drop_duplicates().reset_index(drop=True)

    md_df['Source Annual'] = ls_df.groupby(['ID'])\
        .sum().sum(axis=1).reset_index()[0]

    md_df.rename(columns={'lastmod': 'LastMod'}, inplace=True)

    return ls_df, md_df


def get_files(input_dir, tstat=0):

    files = glob.glob(input_dir+'/*.csv')

    meas_path = list(filter(lambda x: 'meas_impacts_2020' in x,
                            files))[0]

    ls_path = list(filter(lambda x: ('sim_hourly_wb.csv' in x) |
                          (('sfm_hourly_wb.csv' in x)), files))

    msr_path = list(filter(lambda x: 'current_msr_mat.csv' in x, files))[0]

    meas = pd.read_csv(meas_path)[[
        'BldgType', 'BldgLoc', 'BldgHVAC', 'BldgVint', 'EnergyImpactID',
        'Version', 'VersionSource', 'ElecImpactProfileID',
        'GasImpactProfileID', 'SourceDesc']].drop_duplicates().reset_index(
            drop=True)

    meas.BldgVint = meas.BldgVint.apply(str.capitalize)

    if len(ls_path) > 1:
        sim = pd.read_csv([path for path in ls_path if 'sim' in path][0],
                          low_memory=False)
        sfm = pd.read_csv([path for path in ls_path if 'sfm' in path][0],
                          low_memory=False)

        sfm = format_sfm(sfm, tstat=tstat)
        ls = pd.concat([sim, sfm], axis=0, ignore_index=True)
    else:
        ls = pd.read_csv(ls_path[0], low_memory=False)

    msr = pd.read_csv(msr_path)[[
        'BldgType', 'BldgLoc', 'BldgHVAC', 'BldgVint', 'MeasureID',
        'MsrTechID', 'StdTechID', 'PreTechID']].drop_duplicates().reset_index()

    return ls, meas, msr


def wgt(df_cpuc, sector='com', loadshape=True, vint=True, tstat=False):
    '''
    Use ``wgt`` function to create a weighted average of building vintages,
    and thermostat types.

    Parameters
    ----------
    df_cpuc : pandas DataFrame
        DataFrame of the CPUC load shape or the msr file
    sector : str, default 'com'
        Sector of the load shape, 'com' or 'res'
    loadshape : bool, default True
        Enter True if the DataFrame to weight is a load shape, False if it is
        the msr file
    vint : bool, default True
        Enter True to weight the DataFrame based on building vintage
    tstat : bool, default False
        Enter True to weight the DataFrame based on the tstat value

    Returns
    -------
    df_wgt : pandas DataFrame
        DataFrame containing waited average of building types.
    '''
    hr_li = [('hr' + "{:02d}".format(x)) for x in range(1, 25)]

    if sector == 'com':
        df_wgts = pd.read_csv('../../data/wts_com_vintage_statewide.csv')
    elif sector == 'res':
        df_wgts = pd.read_csv('../../data/wts_res_vintage_statewide.csv')

    floor_wts = pd.read_csv('../../data/NumStor.csv')
    tstat_wts = pd.read_csv('../../data/DEER_tstat_weights.csv')

    df_object = df_cpuc.copy()
    df_object.BldgVint = df_object.BldgVint.astype(str)
    floor_wts.BldgVint = floor_wts.BldgVint.astype(str)
    df_object['BldgType_temp'] = df_object['BldgType'].copy()
    df_object['BldgType'] = df_object.BldgType.apply(
        lambda x: 'SFm' if 'SFm' in x else x)

    if loadshape:
        if tstat:
            df_object = df_object.merge(tstat_wts[['BldgType', 'BldgVint',
                                                   'BldgLoc', 'tstat',
                                                   'tstatwt']],
                                        on=['BldgType', 'BldgVint',
                                            'BldgLoc', 'tstat'],
                                        how='left')

            df_object = df_object.merge(floor_wts, on=['BldgType', 'BldgVint',
                                                       'BldgLoc'], how='left')

            df_mf = df_object.loc[~(df_object.BldgType == 'SFm')].copy()
            df_sf = df_object.loc[(df_object.BldgType == 'SFm')].copy()

            df_mf[hr_li] = df_mf[hr_li].multiply(df_mf.tstatwt, axis='index')

            df_mf_grp = df_mf.groupby(
                ['TechID', 'SizingID', 'BldgType',
                 'BldgLoc', 'BldgHVAC', 'BldgVint',
                 'enduse', 'daynum']).sum()

            df_mf_grp['lastmod'] = df_mf.groupby(
                ['TechID', 'SizingID', 'BldgType',
                 'BldgLoc', 'BldgHVAC', 'BldgVint',
                 'enduse', 'daynum']).first().lastmod

            df_mf_grp = df_mf_grp.reset_index()

            df_sf['f_wt'] = (df_sf.BldgType_temp.apply(
                lambda x: 1 if 'a' in x else 0) * (2 - df_sf.numstor) +
                df_sf.BldgType_temp.apply(
                lambda x: 1 if 'b' in x else 0) *
                (df_sf.numstor-1)) * df_sf.tstatwt

            df_sf[hr_li] = df_sf[hr_li].multiply(df_sf.f_wt, axis='index')

            df_sf_grp = df_sf.groupby(
                ['TechID', 'SizingID', 'BldgType',
                 'BldgLoc', 'BldgHVAC', 'BldgVint',
                 'enduse', 'daynum']).sum()

            df_sf_grp['lastmod'] = df_sf.groupby(
                ['TechID', 'SizingID', 'BldgType',
                 'BldgLoc', 'BldgHVAC', 'BldgVint',
                 'enduse', 'daynum']).first().lastmod

            df_sf_grp = df_sf_grp.reset_index()

            df_object = pd.concat([df_mf_grp, df_sf_grp],
                                  axis=0, ignore_index=True)

            df_object.drop(columns=['tstatwt', 'numstor', 'f_wt'],
                           inplace=True)

            df_object.tstat = 'Any'

        if vint:
            df_object = df_object.merge(df_wgts[['BldgType', 'BldgLoc',
                                                 'BldgVint', 'wt_vint',
                                                 'era']],
                                        on=['BldgType', 'BldgLoc', 'BldgVint'],
                                        how='left')

            df_object[hr_li] = df_object[hr_li].multiply(df_object['wt_vint'],
                                                         axis='index')

            df_grp = df_object.groupby(
                ['TechID', 'SizingID', 'BldgType',
                 'BldgLoc', 'BldgHVAC', 'tstat',
                 'enduse', 'era', 'daynum']).sum().reset_index()

            df_grp['lastmod'] = df_object.groupby([
                'TechID', 'SizingID', 'BldgType',
                'BldgLoc', 'BldgHVAC', 'tstat',
                'enduse', 'era', 'daynum']).first().lastmod.values

            df_grp[hr_li] = df_grp[hr_li].divide(
                df_grp['wt_vint'], axis='index')

            df_object = df_grp.drop(columns='wt_vint').copy()
    else:
        df_object = df_object.merge(df_wgts[['BldgType', 'BldgLoc',
                                             'BldgVint', 'wt_vint',
                                             'era']],
                                    on=['BldgType', 'BldgLoc', 'BldgVint'],
                                    how='left')

        df_grp = df_object.groupby(
            ['BldgType', 'BldgLoc', 'BldgHVAC', 'era']).first().reset_index()

        df_object = df_grp.copy()
        df_object.drop(columns=['BldgVint', 'wt_vint'], inplace=True)

    df_object.rename(columns={'era': 'BldgVint'}, inplace=True)

    return df_object


def format_sql(cdls, ratios, md, impact_type='kW', IsProposed=True,
               StartDate='1/1/2022', ExpiryDate=np.nan, ClaimSpec=True,
               FilingSpec=True, LastModComment=np.nan, LastModBy=np.nan,
               Created=np.nan, CreatedComment=np.nan, CreatedBy=np.nan,
               impact_profile_id=np.nan, MeasureID=np.nan, LastMod=np.nan,
               BldgVint=np.nan, BldgType=np.nan, BldgHVAC=np.nan,
               BldgLoc=np.nan, Version='DEER2022', Sector=np.nan,
               VersionSource=np.nan, wh_calc=False, deer2011=False,
               PDAF_mfactor='M', WKWE_mfactor='M'):
    """
    Use ``format_sql`` to prepare the condensed loadshape and
    corresponding ratios for export into the CPUC SQL database.

    Parameters
    ----------
    clds : pandas DataFrame
        Condensed loadshape, output of ``batch_condense``
    ratios : pandas DataFrame
        Ratios, output of ``batch_condense``
    impact_type : str, default 'kW'
        Impact type of the load shape, 'kW' or 'therms'
    wh_calc : bool, default False
        Enter True if the loadshape is from the water heater calculator
    deer2011 : bool, default False
        Enter True if the loadshape is from the DEER2011 shapes

    Other inputs are all values that can be manually entered by the user.
    The following values can but input by the user:
    - IsProposed
    - StartDate
    - ExpiryDate
    - ClaimSpec
    - FilingSpec
    - LastModComment
    - LastModBy
    - Created
    - CreatedComment
    - CreatedBy
    - impact_profile_id
    - MeasureID
    - LastMod
    - BldgVint
    - BldgType
    - BldgHVAC
    - BldgLoc
    - Version
    - Sector
    - VersionSource

    Returns
    -------
    sql_tot : pandas DataFrame
        DataFrame containing the condensed loadshape, formatted for
        export into the CPUC SQL database.
    """
    cdls_copy = cdls.copy()
    ratios_copy = ratios.copy()

    if impact_type == 'kW':
        impact_profile = 'ElecImpactProfileID'
    elif impact_type == 'therm':
        impact_profile = 'GasImpactProfileID'

    cdls_copy['m-hr Peak'] = (impact_type + 'MaxDay' + cdls_copy['Month']
                              .apply(lambda x: datetime(2020, x, 1)
                                     .strftime("%B")[0:3]) + 'Hr' +
                              cdls_copy['Hour'].apply("{:02d}".format))
    cdls_copy['m-hr Avg'] = (impact_type + 'AvgWkday' + cdls_copy['Month']
                             .apply(lambda x: datetime(2020, x, 1)
                                    .strftime("%B")[0:3]) + 'Hr' +
                             cdls_copy['Hour'].apply("{:02d}".format))
    cdls_copy['m-hr End'] = (impact_type + 'AvgWkend' + cdls_copy['Month']
                             .apply(lambda x: datetime(2020, x, 1)
                                    .strftime("%B")[0:3]) + 'Hr' +
                             cdls_copy['Hour'].apply("{:02d}".format))
    ratios_copy['m mo brk'] = (impact_type + 'pct' + ratios_copy['Month']
                               .apply(lambda x: datetime(2020, x, 1)
                                      .strftime("%B")[0:3]))
    ratios_copy['m pdaf'] = (impact_type + 'PDAF' + ratios_copy['Month']
                             .apply(lambda x: datetime(2020, x, 1)
                                    .strftime("%B")[0:3]))
    ratios_copy['m wkwkd'] = (impact_type + 'WkendWkdayRatio' +
                              ratios_copy['Month']
                              .apply(lambda x: datetime(2020, x, 1)
                                     .strftime("%B")[0:3]))

    cdls_sql = (cdls_copy.pivot(index='ID', columns='m-hr Peak', values='Peak')
                .merge(cdls_copy.pivot(index='ID', columns='m-hr Avg',
                                       values='Avg'), how='left', on='ID')
                .merge(cdls_copy.pivot(index='ID', columns='m-hr End',
                                       values='End'), how='left', on='ID'))

    ratios_sql = (ratios_copy.pivot(index='ID', columns='m mo brk',
                                    values='Mo Brk')
                  .merge(ratios_copy.pivot(index='ID', columns='m pdaf',
                                           values='PDAF'), how='left', on='ID')
                  .merge(ratios_copy.pivot(index='ID', columns='m wkwkd',
                                           values='Wknd/Wkd'), how='left',
                         on='ID'))

    sql_tot = cdls_sql.merge(ratios_sql, on='ID')

    if not wh_calc:
        if not deer2011:
            sql_tot = sql_tot.merge(md[['ID', 'MeasureID', 'TechID', 'LastMod',
                                        'BldgType', 'BldgLoc', 'BldgVint',
                                        'BldgHVAC']],
                                    on='ID', how='left')

            sql_tot[['IsProposed', 'StartDate', 'ExpiryDate', 'ClaimSpec',
                     'FilingSpec', 'LastModComment', 'LastModBy', 'Created',
                     'CreatedComment', 'CreatedBy', 'Version']] = (
                [IsProposed, StartDate, ExpiryDate, ClaimSpec, FilingSpec,
                 LastModComment, LastModBy, Created, CreatedComment, CreatedBy,
                 Version])
        else:
            sql_tot = sql_tot.merge(md[['ID', 'TechID', 'Version', 'Sector']],
                                    on='ID', how='left')

            sql_tot[['IsProposed', 'StartDate', 'ExpiryDate', 'ClaimSpec',
                     'FilingSpec', 'LastModComment', 'LastModBy', 'Created',
                     'CreatedComment', 'CreatedBy', 'MeasureID', 'BldgVint',
                     'LastMod', 'BldgType', 'BldgLoc', 'BldgHVAC',
                     'VersionSource']] = (
                [IsProposed, StartDate, ExpiryDate, ClaimSpec, FilingSpec,
                 LastModComment, LastModBy, Created, CreatedComment, CreatedBy,
                 MeasureID, BldgVint, LastMod, BldgType, BldgLoc, BldgHVAC,
                 VersionSource]
            )
    else:
        sql_tot = sql_tot.merge(md[['ID', 'TechID', 'BldgType', 'BldgLoc',
                                    'Sector']],
                                on='ID', how='left')

        sql_tot[['IsProposed', 'StartDate', 'ExpiryDate', 'ClaimSpec',
                 'FilingSpec', 'LastModComment', 'LastModBy', 'Created',
                 'CreatedComment', 'CreatedBy', impact_profile,
                 'MeasureID', 'LastMod', 'BldgVint', 'BldgHVAC',
                 'Version', 'VersionSource']] = (
            [IsProposed, StartDate, ExpiryDate, ClaimSpec, FilingSpec,
             LastModComment, LastModBy, Created, CreatedComment, CreatedBy,
             impact_profile_id, MeasureID, LastMod, BldgVint, BldgHVAC,
             Version, VersionSource])

    if impact_type == 'kW':
        sql_tot.rename(columns={'ID': 'LoadShapeElecID'}, inplace=True)
        sql_tot['LoadShapeElecID'] = np.nan
    elif impact_type == 'therm':
        sql_tot.rename(columns={'ID': 'LoadShapeGasID'}, inplace=True)
        sql_tot['LoadShapeGasID'] = np.nan

    if ~sql_tot.MeasureID.isnull().values.any():
        categorical_data = pd.read_csv(
            '../../../data/measure_id_with_categorical_data.csv')

        sql_tot = sql_tot.merge(categorical_data[[
            'MeasureID', 'Sector', 'VersionSource']],
            on='MeasureID', how='left')

    sql_tot['PDAF_mfactor'] = PDAF_mfactor
    sql_tot['WKWE_mfactor'] = WKWE_mfactor

    cols = lsd.sql_cols()[impact_type]

    return sql_tot[cols]


def format_8760(df_8760, id_num='None', load='load',
                hour='Hour', daynum='daynum'):
    """
    Use ``format_8760`` to format an 8760 loadshape before conversion to
    the Lopeshape format.

    Parameters
    ----------
    df_ls : pandas DataFrame
        DataFrame of the 8760 load shape
    id_num : str, default 'None'
        Identifier for the load shape
    load : str, default 'load
        Name of the column with the load values
    hour : str, default 'Hour
        Name of the column with the hour values
    daynum : str, default 'daynum'
        Name of the column with the daynum values

    Returns
    -------
    ls_fmt : pandas DataFrame
        DataFrame containing 365 x 24 load shape for conversion to the
        Lopeshape format
    """

    ls_fmt = df_8760.pivot(index=daynum, columns=hour, values=load)\
        .reset_index().rename_axis('', axis=1)
    ls_fmt.columns = ls_fmt.columns.map(str)
    if id_num != 'None':
        ls_fmt['ID'] = id_num

    ls_fmt.columns.name = None

    return ls_fmt


def format_sfm(df_cpuc, tstat=1):
    """
    Use ``format_sfm`` to format CPUC single family loadshapes before
    conversion to the Lopeshape format.

    Parameters
    ----------
    df_cpuc : pandas DataFrame
        DataFrame of the CPUC single family load shape
    tsat : pandas DataFrame
        Specify tstat value to filter by. To include all tstat values
        enter 0 or 'all'

    Returns
    -------
    ls_fmt : pandas DataFrame
        DataFrame of formatted load shape, ready for conversion to the
        Lopeshape format
    """

    hra = [('hr' + "{:02d}".format(x) + 'a') for x in range(1, 25)]
    hrb = [('hr' + "{:02d}".format(x) + 'b') for x in range(1, 25)]
    li_a = ['TechID', 'SizingID', 'BldgType',
            'BldgVint', 'BldgLoc', 'BldgHVAC',
            'tstat', 'enduse', 'daynum', 'lastmod'] + hra
    li_b = ['TechID', 'SizingID', 'BldgType',
            'BldgVint', 'BldgLoc', 'BldgHVAC',
            'tstat', 'enduse', 'daynum', 'lastmod'] + hrb
    hr_li = [('hr' + "{:02d}".format(x)) for x in range(1, 25)]

    if (tstat == 0) | (tstat == 'all'):
        df_a = df_cpuc[li_a].rename(columns=dict(zip(hra, hr_li)))
        df_b = df_cpuc[li_b].rename(columns=dict(zip(hrb, hr_li)))
    else:
        df_a = df_cpuc[li_a][df_cpuc['tstat'] == tstat]\
            .rename(columns=dict(zip(hra, hr_li)))
        df_b = df_cpuc[li_b][df_cpuc['tstat'] == tstat]\
            .rename(columns=dict(zip(hrb, hr_li)))

    df_a['BldgType'] = df_a['BldgType'] + 'a'
    df_b['BldgType'] = df_b['BldgType'] + 'b'

    ls_fmt = pd.concat([df_a, df_b]).reset_index(drop=True)
    return ls_fmt


def unlopeshape(cdls, ratios, peak_hr=16, off_hr=9, year=2015,
                source_annual=1):
    """
    Use ``unlopeshape`` to convert a condensed load shape to the
    365 x 24 format.

    Parameters
    ----------
    clds : pandas DataFrame
        Condensed loadshape, output of ``batch_condense``
    ratios : pandas DataFrame
        Ratios, output of ``batch_condense`
    md : pandas DataFrame
        Metadata, output of ``fromatCPUC``
    peak_hr : int, default 16
        The peak hour for the expanded load shape
    off_hr : int, default 9
        The off hour for the expanded load shape
    year : year, default 2015
        Year of the expanded load shape

    Returns
    -------
    df : pandas DataFrame
        DataFrame of 365 x 24 loadshape
    """

    df_ls = pd.DataFrame([],
                         columns=['ID', 'Month', 'Monthly Usage',
                                  'Peak Day Usage', 'Avg Weekday Usage',
                                  'Avg Weekend Usage', 'Non-Coin Peak Demand',
                                  'Coin Peak Demand', 'Mo Factor',
                                  'CoPk/Sum', 'CoPk/Sum 1', 'CoPk/Sum 2'])

    df_hol = pd.DataFrame({'Month': list(range(1, 13)),
                           'Holidays': [2, 1, 0, 0, 1, 0, 1, 0, 1, 1, 1, 1],
                           'NumDays': [31, 28, 31, 30, 31, 30, 31, 31, 30, 31,
                                       30, 31],
                           'Season': ['W', 'W', 'W', 'W', 'S', 'S', 'S', 'S',
                                      'S', 'S', 'W', 'W']})

    df_ls['ID'] = ratios['ID']
    df_ls['Month'] = ratios['Month']

    df_ls = df_ls.merge(df_hol, on='Month', how='left')
    df_ls['Monthly Usage'] = (ratios['Mo Brk'] * source_annual)
    df_ls['Mo Factor'] = ((df_ls['NumDays'] - (5/7 * df_ls['NumDays'] -
                                               df_ls['Holidays'])) *
                          ratios['Wknd/Wkd'] + (5/7*df_ls['NumDays'] -
                                                df_ls['Holidays']))
    df_ls['Avg Weekday Usage'] = df_ls['Monthly Usage'] / df_ls['Mo Factor']
    df_ls['Peak Day Usage'] = (df_ls['Avg Weekday Usage'] *
                               ratios['PDAF'])
    df_ls['Avg Weekend Usage'] = (df_ls['Avg Weekday Usage'] *
                                  ratios['Wknd/Wkd'])
    df_ls['Non-Coin Peak Demand'] = (df_ls['Peak Day Usage'] *
                                     cdls.groupby(['ID', 'Month']).sum()
                                     .reset_index()['Peak'] /
                                     cdls.groupby(['ID', 'Month'])
                                     .sum().reset_index()['Peak'])
    df_ls['CoPk/Sum 1'] = (cdls.loc[cdls['Hour'] == peak_hr]
                           .groupby(['ID', 'Month'])
                           .sum().reset_index()['Peak'] /
                           cdls.groupby(['ID', 'Month']).sum()
                           .reset_index()['Peak'])
    df_ls['CoPk/Sum 2'] = (cdls.loc[cdls['Hour'] == off_hr]
                           .groupby(['ID', 'Month'])
                           .sum().reset_index()['Peak'] /
                           cdls.groupby(['ID', 'Month']).sum()
                           .reset_index()['Peak'])
    df_ls['CoPk/Sum'] = (df_ls['CoPk/Sum 1'].loc[df_ls['Season'] == 'S']
                         .combine(df_ls['CoPk/Sum 2']
                                  .loc[df_ls['Season'] == 'W'],
                                  max, fill_value=0))
    df_ls['Coin Peak Demand'] = df_ls['Peak Day Usage'] * df_ls['CoPk/Sum']
    df_ls.drop(columns=['Mo Factor', 'Holidays', 'NumDays',
                        'Season', 'CoPk/Sum', 'CoPk/Sum 1',
                        'CoPk/Sum 2'], inplace=True)

    df_usage = pd.DataFrame([], columns=['ID', 1, 2, 3, 4, 5, 6, 7, 8])
    df_usage['ID'] = df_ls['ID']
    df_usage['Month'] = df_ls['Month']
    df_usage[1] = df_ls['Peak Day Usage']
    df_usage[2] = df_ls['Avg Weekday Usage']
    df_usage[3] = df_ls['Avg Weekend Usage']
    df_usage[4] = df_usage[2] * 2-df_usage[1]
    df_usage[5] = (df_usage[4]-df_usage[2]) * 2 / 3+df_usage[2]
    df_usage[6] = (df_usage[4]-df_usage[2]) / 3+df_usage[2]
    df_usage[7] = (df_usage[1]-df_usage[2]) * 2/3+df_usage[2]
    df_usage[8] = (df_usage[1]-df_usage[2]) / 3+df_usage[2]

    cal, calnum = perpetual_calendar(year)

    if calendar.isleap(year):
        new_ls = pd.DataFrame([], columns=['ID', 'Month', 'DayType',
                                           'LS Temp', 'Hour', 'Daynum',
                                           'Load'])
        new_ls['ID'] = np.repeat(df_usage['ID'], 2*366).reset_index(drop=True)
        new_ls['Hour'] = list(range(1, 25)) * 366 * df_usage['ID'].nunique()
        new_ls['DayType'] = (list(np.repeat(cal[str(calnum)], 24)) *
                             df_usage['ID'].nunique())
        new_ls['Month'] = (list(np.repeat(cal['Mon'], 24)) *
                           df_usage['ID'].nunique())
        new_ls['LS Temp'] = new_ls['DayType'].apply(lambda x: 'Peak' if x == 1
                                                    else ('End' if x == 3
                                                          else 'Avg'))
        new_ls['Daynum'] = (list(np.repeat(list(range(1, 367)), 24)) *
                            df_usage['ID'].nunique())

        new_ls = new_ls.merge(df_usage.melt(id_vars=['ID', 'Month'])
                              .rename(columns={'variable': 'DayType',
                                               'value': 'Daysum'}),
                              on=['ID', 'Month', 'DayType'], how='left')
        new_ls = new_ls.merge(cdls.groupby(['Month', 'ID'])
                              .sum().reset_index().drop(columns='Hour')
                              .melt(id_vars=['ID', 'Month'])
                              .rename(columns={'variable': 'LS Temp',
                                               'value': 'Den Temp'}),
                              on=['ID', 'Month', 'LS Temp'], how='left')
        new_ls = new_ls.merge(cdls.melt(id_vars=['ID', 'Month', 'Hour'])
                              .rename(columns={'variable': 'LS Temp',
                                               'value': 'Num Temp'}),
                              on=['ID', 'Month', 'Hour', 'LS Temp'],
                              how='left')
        new_ls['Load'] = (new_ls['Num Temp']*new_ls['Daysum'] /
                          new_ls['Den Temp'])
        new_ls.drop(columns=['LS Temp', 'Daysum',
                             'Num Temp', 'Den Temp'], inplace=True)
    else:
        new_ls = pd.DataFrame([],
                              columns=['ID', 'Month', 'DayType',
                                       'LS Temp', 'Hour', 'Daynum',
                                       'Load'])
        new_ls['ID'] = np.repeat(df_usage['ID'], 2*365).reset_index(drop=True)
        new_ls['Hour'] = list(range(1, 25)) * 365 * df_usage['ID'].nunique()
        new_ls['DayType'] = (list(np.repeat(cal[str(calnum)], 24)) *
                             df_usage['ID'].nunique())
        new_ls['Month'] = (list(np.repeat(cal['Mon'], 24)) *
                           df_usage['ID'].nunique())
        new_ls['LS Temp'] = new_ls['DayType'].apply(lambda x: 'Peak' if x == 1
                                                    else ('End' if x == 3
                                                          else 'Avg'))
        new_ls['Daynum'] = (list(np.repeat(list(range(1, 366)), 24)) *
                            df_usage['ID'].nunique())

        new_ls = new_ls.merge(df_usage.melt(id_vars=['ID', 'Month'])
                              .rename(columns={'variable': 'DayType',
                                               'value': 'Daysum'}),
                              on=['ID', 'Month', 'DayType'], how='left')
        new_ls = new_ls.merge(cdls.groupby(['Month', 'ID']).sum().reset_index()
                              .drop(columns='Hour').melt(id_vars=['ID',
                                                                  'Month'])
                              .rename(columns={'variable': 'LS Temp',
                                               'value': 'Den Temp'}),
                              on=['ID', 'Month', 'LS Temp'], how='left')
        new_ls = new_ls.merge(cdls.melt(id_vars=['ID', 'Month', 'Hour'])
                              .rename(columns={'variable': 'LS Temp',
                                               'value': 'Num Temp'}),
                              on=['ID', 'Month', 'Hour', 'LS Temp'],
                              how='left')

        new_ls['Load'] = (new_ls['Num Temp'].astype(np.float64) *
                          new_ls['Daysum'].astype(np.float64) /
                          new_ls['Den Temp']).replace([np.inf, -np.inf], 0)\
            .astype(float)

        new_ls.drop(columns=['LS Temp', 'Daysum',
                             'Num Temp', 'Den Temp'],
                    inplace=True)

    out_df = pd.pivot_table(new_ls, index=['ID', 'Month', 'Daynum', 'DayType'],
                            columns='Hour', values='Load').reset_index()

    # out_df = out_df.rename_axis('', axis='columns').reset_index()

    return out_df


def perpetual_calendar(year):
    """
    Use ``perpetual_calendar`` to generate the calendar and day types
    for any year.

    Parameters
    ----------
    year : int
        Specify the year to grab the calendar

    Returns
    -------
    cal : pandas DataFrame
        Calendar with day types for the associated year
    calnum: int
        Numerical identifier for the type of calendar, 1-14
    """

    pcal = pd.read_csv('../../../data/Perpetual_Calendar.csv')
    cal = pcal[['Mon 1', 'Daynum 1', '1', '2',
                '3', '4', '5', '6', '7']].drop(365)

    if datetime(year, 1, 1).weekday() == 6:
        if calendar.isleap(year):
            calnum = 8
            cal = pcal[['Mon 2', 'Daynum 2', '8', '9', '10',
                        '11', '12', '13', '14']]\
                .rename(columns={'Mon 2': 'Mon', 'Daynum 2': 'Daynum'})
        else:
            calnum = 1
            cal = pcal[['Mon 1', 'Daynum 1', '1', '2', '3',
                        '4', '5', '6', '7']]\
                .rename(columns={'Mon 1': 'Mon', 'Daynum 1': 'Daynum'})\
                .drop(365)
    elif datetime(year, 1, 1).weekday() == 0:
        if calendar.isleap(year):
            calnum = 9
            cal = pcal[['Mon 2', 'Daynum 2', '8', '9', '10',
                        '11', '12', '13', '14']]\
                .rename(columns={'Mon 2': 'Mon', 'Daynum 2': 'Daynum'})
        else:
            calnum = 2
            cal = pcal[['Mon 1', 'Daynum 1', '1', '2', '3',
                        '4', '5', '6', '7']]\
                .rename(columns={'Mon 1': 'Mon',
                                 'Daynum 1': 'Daynum'}).drop(365)
    elif datetime(year, 1, 1).weekday() == 1:
        if calendar.isleap(year):
            calnum = 10
            cal = pcal[['Mon 2', 'Daynum 2', '8', '9', '10',
                        '11', '12', '13', '14']]\
                .rename(columns={'Mon 2': 'Mon', 'Daynum 2': 'Daynum'})
        else:
            calnum = 3
            cal = pcal[['Mon 1', 'Daynum 1', '1', '2', '3',
                        '4', '5', '6', '7']]\
                .rename(columns={'Mon 1': 'Mon', 'Daynum 1': 'Daynum'})\
                .drop(365)
    elif datetime(year, 1, 1).weekday() == 2:
        if calendar.isleap(year):
            calnum = 11
            cal = pcal[['Mon 2', 'Daynum 2', '8', '9', '10',
                        '11', '12', '13', '14']]\
                .rename(columns={'Mon 2': 'Mon', 'Daynum 2': 'Daynum'})
        else:
            calnum = 4
            cal = pcal[['Mon 1', 'Daynum 1', '1', '2', '3',
                        '4', '5', '6', '7']]\
                .rename(columns={'Mon 1': 'Mon', 'Daynum 1': 'Daynum'})\
                .drop(365)
    elif datetime(year, 1, 1).weekday() == 3:
        if calendar.isleap(year):
            calnum = 12
            cal = pcal[['Mon 2', 'Daynum 2', '8', '9', '10',
                        '11', '12', '13', '14']]\
                .rename(columns={'Mon 2': 'Mon', 'Daynum 2': 'Daynum'})
        else:
            calnum = 5
            cal = pcal[['Mon 1', 'Daynum 1', '1', '2', '3',
                        '4', '5', '6', '7']]\
                .rename(columns={'Mon 1': 'Mon', 'Daynum 1': 'Daynum'})\
                .drop(365)
    elif datetime(year, 1, 1).weekday() == 4:
        if calendar.isleap(year):
            calnum = 13
            cal = pcal[['Mon 2', 'Daynum 2', '8', '9', '10',
                        '11', '12', '13', '14']]\
                .rename(columns={'Mon 2': 'Mon', 'Daynum 2': 'Daynum'})
        else:
            calnum = 6
            cal = pcal[['Mon 1', 'Daynum 1', '1', '2', '3',
                        '4', '5', '6', '7']]\
                .rename(columns={'Mon 1': 'Mon', 'Daynum 1': 'Daynum'})\
                .drop(365)
    else:
        if calendar.isleap(year):
            calnum = 14
            cal = pcal[['Mon 2', 'Daynum 2', '8', '9', '10',
                        '11', '12', '13', '14']]\
                .rename(columns={'Mon 2': 'Mon', 'Daynum 2': 'Daynum'})
        else:
            calnum = 7
            cal = pcal[['Mon 1', 'Daynum 1', '1', '2', '3',
                        '4', '5', '6', '7']]\
                .rename(columns={'Mon 1': 'Mon', 'Daynum 1': 'Daynum'})\
                .drop(365)

    return cal, calnum


def sql_to_cdls(sql_df, field_id='LoadShapeElecID'):
    """
    Use ``sql_to_cdls`` to convert the sql tables to the cdls format.

    Parameters
    ----------
    sql_df : pandas DataFrame
        DataFrame with the sql formatted shapes
    field_id : str, Default 'LoadShapeElecID'
        Name of the ID field

    Returns
    -------
    df : pandas DataFrame
        DataFrame with cdls formatted load shape
    """
    cdls_cols = [hr for hr in list(sql_df.columns) if 'hr' in hr.lower()]
    ratios_cols = [r for r in list(sql_df.columns) if (('pct' in r) |
                   ('WkendWkday' in r) | ('PDAF' in r)) & ('mfactor' not in r)]
    md_cols = [m for m in list(sql_df.columns) if
               (m not in ratios_cols) & (m not in cdls_cols)]
    sql_cdls = sql_df.loc[:, [field_id] + cdls_cols]
    sql_ratios = sql_df.loc[:, [field_id] + ratios_cols]
    sql_md = sql_df.loc[:, md_cols]
    max_day = [day for day in cdls_cols if 'MaxDay' in day]
    avg_wday = [day for day in cdls_cols if 'AvgWkday' in day]
    avg_wdend = [day for day in cdls_cols if 'AvgWkend' in day]
    sql_max_day = pd.melt(sql_cdls, id_vars=field_id,
                          value_vars=max_day)
    sql_avg_day = pd.melt(sql_cdls, id_vars=field_id,
                          value_vars=avg_wday)
    sql_avg_end = pd.melt(sql_cdls, id_vars=field_id,
                          value_vars=avg_wdend)
    sql_max_day['Month'] = sql_max_day.variable.apply(
        lambda x: 1 if 'Jan' in x else(
            2 if 'Feb' in x else(
                3 if 'Mar' in x else(
                    4 if 'Apr' in x else(
                        5 if 'May' in x else(
                            6 if 'Jun' in x else(
                                7 if 'Jul' in x else(
                                    8 if 'Aug' in x else(
                                        9 if 'Sep' in x else(
                                            10 if 'Oct' in x else(
                                                11 if 'Nov' in x else 12
                                            )))))))))))
    sql_max_day['Hour'] = sql_max_day.variable.apply(lambda x: int(x[-2:]))
    sql_max_day = sql_max_day.drop(columns='variable').rename(
        columns={field_id: 'ID', 'value': 'Peak'})
    sql_to_cdls = sql_max_day.copy()
    sql_to_cdls['Avg'] = sql_avg_day.value.values
    sql_to_cdls['End'] = sql_avg_end.value.values
    mobrk = [day for day in ratios_cols if 'pct' in day]
    pdaf = [day for day in ratios_cols if 'PDAF' in day]
    wkndwkday = [day for day in ratios_cols if 'WkendWkday' in day]
    sql_mobrk = pd.melt(
        sql_ratios, id_vars=field_id, value_vars=mobrk)
    sql_pdaf = pd.melt(sql_ratios, id_vars=field_id, value_vars=pdaf)
    sql_wkndwkday = pd.melt(
        sql_ratios, id_vars=field_id, value_vars=wkndwkday)

    sql_mobrk['Month'] = sql_mobrk.variable.apply(
        lambda x: 1 if 'Jan' in x else(
            2 if 'Feb' in x else(
                3 if 'Mar' in x else(
                    4 if 'Apr' in x else(
                        5 if 'May' in x else(
                            6 if 'Jun' in x else(
                                7 if 'Jul' in x else(
                                    8 if 'Aug' in x else(
                                        9 if 'Sep' in x else(
                                            10 if 'Oct' in x else(
                                                11 if 'Nov' in x else 12
                                            )))))))))))
    sql_mobrk = sql_mobrk.drop(columns='variable').rename(
        columns={field_id: 'ID', 'value': 'Mo Brk'})
    sql_to_ratios = sql_mobrk.copy()
    sql_to_ratios['PDAF'] = sql_pdaf.value.values
    sql_to_ratios['Wknd/Wkd'] = sql_wkndwkday.value.values
    return (
        sql_to_cdls[['Month', 'ID', 'Hour',
                     'Peak', 'Avg', 'End']].sort_values(
                         by=['ID', 'Month', 'Hour']).reset_index(drop=True),
        sql_to_ratios[['Month', 'ID', 'Mo Brk',
                       'PDAF', 'Wknd/Wkd']].sort_values(
                           by=['ID', 'Month']).reset_index(drop=True),
        sql_md.rename(columns={field_id: 'ID'}).sort_values(
            by='ID').reset_index(drop=True))


def format_wh_calc(df_ls, df_info):
    """
    Use ``format_wh_calc`` to format shapes from the WH calculator.

    Parameters
    ----------
    df_ls : pandas DataFrame
        DataFrame with WH shapes
    df_info : pandas DataFrame
        DataFrame with WH info

    Returns
    -------
    df_out : pandas DataFrame
        DataFrame with formatted load shapes
    md : pandas DataFrame
        DataFrame with metadata on the shapes
    """
    df_info_cleaned = df_info.drop(
        columns=[col for col in list(df_info.columns)
                 if 'Unnamed' in col] + ['896'])

    id_arr = (df_info_cleaned.iloc[0] + '__' +
              df_info_cleaned.iloc[1] + '__' +
              df_info_cleaned.iloc[2]).values[1:]

    df_shapes = df_ls.drop(
        columns=[col for col in list(df_ls.columns) if
                 'CZ' in col] + ['Index', '8760s ---->'])

    df_shapes = df_shapes.loc[:, (df_shapes != 0).any(axis=0)]
    df_shapes['daynum'] = np.repeat(range(1, 366), 24)

    df_out = pd.DataFrame([])
    for load_col in [col for col in list(df_shapes.columns)
                     if ('tot' in col) | ('therm' in col)]:
        if len(load_col.split('.')) > 1:
            i = int(load_col.split('.')[-1])
        else:
            i = 0

        df_out = pd.concat([df_out, format_8760(
            df_shapes.loc[:, ['daynum', 'Hour', load_col]],
            load=load_col,
            id_num=(id_arr[i] + '__' + load_col.split('.')[0].split('_')[0]))],
            axis=0, ignore_index=True)

    md = pd.DataFrame(list(df_out.ID.apply(lambda x: x.split('__')).values),
                      columns=['TechID', 'BldgType', 'BldgLoc', 'Impact'])

    md['ID'] = (
        md.TechID + '__' + md.BldgType + '__' + md.BldgLoc + '__' + md.Impact)

    md.drop_duplicates(inplace=True, ignore_index=True)

    md['Sector'] = md.BldgType.apply(lambda x: 'Res' if ((x == 'DMo') |
                                                         (x == 'SFm') |
                                                         (x == 'MFm'))
                                     else 'Com')

    return df_out, md
