import etdmap
import etdtransform
import etdanalyze

# Set the standard etdmap options
etdmap.options.mapped_folder_path = "../../demodata/mapped_analyze" # change to path to folder where you would like to store mapped data
etdmap.options.bsv_metadata_file = "../../demodata/bsv_metadata_after_mapping.xlsx" # change to path to the Excel file with the data processor metadata (BSV is the data processor in this case)
etdmap.options.aggregate_folder_path = "../../demodata/aggregate"

etdtransform.options.mapped_folder_path = "../../demodata/mapped_analyze" # change to path to folder where you would like to store mapped data
etdtransform.options.aggregate_folder_path = "../../demodata/aggregate"

etdtransform.options.weather_data_folder_path = "../../demodata/weather_data/yearly"
etdtransform.options.weather_stations_summary_file = "../../demodata/weather_data/SummaryPerYear.xlsx"

import ibis
from ibis import _
import ibis.selectors as s

household_tables = etdtransform.load_data.get_household_tables()

# get 5 minute data for households
household_table_5min = household_tables['5min']

project = 1

# select a single project for analysis
household_table_5min = household_table_5min.filter(_.ProjectIdBSV == project)

household_table_5min.head().to_pandas()

# Choose variables of interest for our analysis
vars = [
    "Zelfgebruik",
    "ZonopwekBruto",
    "ElektriciteitsgebruikTotaalBruto",
    "TerugleveringTotaalNetto"
]

household_table_5min = household_table_5min.select(
    ['ReadingDate', 'ProjectIdBSV', 'HuisIdBSV'] + 
    vars + 
    ['Oppervlakte', 'Warmtebehoefte']
)


# Divide by surface area to normalize the data to 100m2 and convert to KWh
hh_table_normalized = etdanalyze.analysis_helpers.normalize_100m2(
    hh_table=household_table_5min, 
    interval="5min",
    col_names = vars
)

project_normalized = hh_table_normalized.group_by("ReadingDate", "ProjectIdBSV").aggregate([
    hh_table_normalized.ZelfgebruikPer100M2KW.mean().name("ZelfgebruikPer100M2KWH"),
    hh_table_normalized.ZonopwekBrutoPer100M2KW.mean().name("ZonopwekBrutoPer100M2KWH"),
    hh_table_normalized.ElektriciteitsgebruikTotaalBrutoPer100M2KW.mean().name("ElektriciteitsgebruikTotaalBrutoPer100M2KWH"),
    hh_table_normalized.TerugleveringTotaalNettoPer100M2KW.mean().name("TerugleveringTotaalNettoPer100M2KWH")
])

print(project_normalized.schema())

ibis.Schema {
  ReadingDate                                  timestamp(9)
  ProjectIdBSV                                 int64
  ZelfgebruikPer100M2KWH                       float64
  ZonopwekBrutoPer100M2KWH                     float64
  ElektriciteitsgebruikTotaalBrutoPer100M2KWH  float64
  TerugleveringTotaalNettoPer100M2KWH          float64
}

project_normalized.head().to_pandas()

summer_table = project_normalized.filter(
    (_["ReadingDate"].month() == 6)
    | (_["ReadingDate"].month() == 7)
    | (_["ReadingDate"].month() == 8)
)

winter_table = project_normalized.filter(
    (_["ReadingDate"].month() == 12)
    | (_["ReadingDate"].month() == 1)
    | (_["ReadingDate"].month() == 2)
)

summer_table.head().to_pandas()

# plot_var = "ElektriciteitsgebruikTotaalBrutoPer100M2KW"
# plot_var_name = "Elektriciteitsgebruik Totaal Bruto"

plot_var = "ZonopwekBrutoPer100M2KWH"
plot_var_name = "ZonopwekBruto (KWh) Per 100m2 "


fig = etdanalyze.plot_functions.plot_daily_profile(
    df = summer_table.to_pandas(),
    title = f"Summer - Project {project} - {plot_var_name}",
    plot_var=plot_var,
    plot_var_name=plot_var_name
)

plot_vars = {
    "ZelfgebruikPer100M2KWH": "Zelfgebruik woning per 100m2 (kWh)",
    "ZonopwekBrutoPer100M2KWH": "Zonopwek Bruto per 100m2 (kWh)",
    "ElektriciteitsgebruikTotaalBrutoPer100M2KWH": "Elektriciteitsgebruik Bruto per 100m2 (kWh)",
    "TerugleveringTotaalNettoPer100M2KWH": "Teruglevering Netto per 100m2 (kWh)",
}

fig = etdanalyze.plot_functions.plot_daily_profile_mean_combined(
df = winter_table.to_pandas(),
title = f"Winter - Project {project} - {plot_var_name}",
plot_vars=plot_vars.keys(),
# plot_interval=plot_interval,
plot_var_names=plot_vars.values()
)

# It is also possible to load project data directly
project_tables = etdtransform.load_data.get_project_tables()

project_5min_table = project_tables["5min"]
print(project_5min_table.schema())
print(project)

ibis.Schema {
  index                                      int64
  ReadingDate                                timestamp(9)
  ProjectIdBSV                               int64
  ElektriciteitNetgebruikHoogDiff            float64
  ElektriciteitNetgebruikLaagDiff            float64
  ElektriciteitTerugleveringHoogDiff         float64
  ElektriciteitTerugleveringLaagDiff         float64
  ElektriciteitsgebruikWTWDiff               float64
  ElektriciteitsgebruikWarmtepompDiff        float64
  ElektriciteitsgebruikBoosterDiff           float64
  ElektriciteitsgebruikBoilervatDiff         float64
  ElektriciteitsgebruikRadiatorDiff          float64
  ZonopwekBruto                              float64
  TerugleveringTotaalNetto                   float64
  ElektriciteitsgebruikTotaalNetto           float64
  Netuitwisseling                            float64
  ElektriciteitsgebruikTotaalWarmtepomp      float64
  ElektriciteitsgebruikTotaalGebouwgebonden  float64
  ElektriciteitsgebruikTotaalHuishoudelijk   float64
  Zelfgebruik                                float64
  ElektriciteitsgebruikTotaalBruto           float64
  n                                          int64
  Weerstation                                string
  STN                                        int64
  HH                                         int64
  YYYYMMDD                                   int64
  STN_right                                  int64
  YYYYMMDD_right                             int64
  HH_right                                   int64
  DD                                         int64
  FH                                         int64
  FF                                         int64
  FX                                         int64
  T                                          int64
  T10N                                       string
  TD                                         int64
  SQ                                         int64
  Q                                          int64
  DR                                         int64
  RH                                         int64
  P                                          int64
  VV                                         string
  N                                          string
  U                                          int64
  WW                                         string
  IX                                         int64
  M                                          string
  R                                          string
  S                                          string
  O                                          string
  Y                                          string
  Temperatuur                                float64
  Windsnelheid                               float64
  Vochtigheid                                int64
  Dampdruk                                   float64
  Gevoelstemperatuur                         float64
  TemperatuurRA                              float64
  GevoelstemperatuurRA                       float64
  Koudste2WkTemperatuur                      boolean
  Koudste2WkGevoelstemperatuur               boolean
  temp_date_string                           string
  date_column                                date
  datetime_column                            timestamp
  year                                       int32
  week_of_year                               int32
  day_of_week                                int16
  TemperatuurISOWk                           float64
  GevoelstemperatuurISOWk                    float64
  days_in_week                               int64
  Koudste2ISOWkTemperatuur                   boolean
  Koudste2ISOWkGevoelstemperatuur            boolean
}
1

plot_var = "ElektriciteitsgebruikTotaalNetto"

filtered_table = project_5min_table.filter(
    _.ProjectIdBSV == project
).select(["ReadingDate", "ProjectIdBSV"] + [plot_var])

fig = etdanalyze.plot_functions.plot_load_duration_curve(
    df=filtered_table.to_pandas(),
    diff_column=plot_var,
    interval="5min"
)

ElektriciteitsgebruikTotaalNetto stats for 5min - Project None: 0.30% above 1.5 kW

coldest_table = project_5min_table.filter(
    _.ProjectIdBSV == project,
    _.Koudste2WkTemperatuur == True
).select(["ReadingDate", "ProjectIdBSV", "Temperatuur"] + [plot_var])


coldest_table.schema()

ibis.Schema {
  ReadingDate                       timestamp(9)
  ProjectIdBSV                      int64
  Temperatuur                       float64
  ElektriciteitsgebruikTotaalNetto  float64
}

fig = etdanalyze.plot_functions.plot_var_vs_temp(
    coldest_table.to_pandas(),
    var = plot_var,
    title = "Coldest two weeks"
)

	ReadingDate	ProjectIdBSV	HuisIdBSV	ElektriciteitNetgebruikLaagDiff	ElektriciteitsgebruikWTWDiff	ElektriciteitsgebruikWarmtepompDiff	...	date_column	datetime_column	year	week_of_year	day_of_week	TemperatuurISOWk	GevoelstemperatuurISOWk	days_in_week	Koudste2ISOWkTemperatuur	Koudste2ISOWkGevoelstemperatuur
0	2019-01-01 00:05:00	1	1	0.000	0.00	0.00	...	2019-01-01	2019-01-01 01:00:00	2019	1	1	5.83125	1.242552	8	False	False
1	2019-01-01 00:10:00	1	1	0.135	0.00	0.05	...	2019-01-01	2019-01-01 01:00:00	2019	1	1	5.83125	1.242552	8	False	False
2	2019-01-01 00:15:00	1	1	0.116	0.00	0.01	...	2019-01-01	2019-01-01 01:00:00	2019	1	1	5.83125	1.242552	8	False	False
3	2019-01-01 00:20:00	1	1	0.106	0.00	0.01	...	2019-01-01	2019-01-01 01:00:00	2019	1	1	5.83125	1.242552	8	False	False
4	2019-01-01 00:25:00	1	1	0.106	0.01	0.01	...	2019-01-01	2019-01-01 01:00:00	2019	1	1	5.83125	1.242552	8	False	False

	ReadingDate	ProjectIdBSV	ZelfgebruikPer100M2KWH	ZonopwekBrutoPer100M2KWH	ElektriciteitsgebruikTotaalBrutoPer100M2KWH	TerugleveringTotaalNettoPer100M2KWH
0	2019-10-11 07:30:00	1	0.002132	0.002132	0.543175	0.000000
1	2019-10-11 07:50:00	1	0.067145	0.067145	0.525649	0.000000
2	2019-10-11 08:30:00	1	0.144552	0.160935	0.461456	0.016383
3	2019-10-11 10:40:00	1	0.277472	0.593646	0.425542	0.316174
4	2019-10-11 11:40:00	1	0.341063	0.810002	0.491637	0.468939

	ReadingDate	ProjectIdBSV	ElektriciteitsgebruikTotaalBrutoPer100M2KWH
0	2019-06-01 00:25:00	1	0.312193
1	2019-06-01 00:35:00	1	0.347553
2	2019-06-01 00:50:00	1	0.311570
3	2019-06-01 01:15:00	1	0.338389
4	2019-06-01 03:15:00	1	0.393386

Analysis of aggregated ETD data¶

Configuration¶

Load data¶

Set variables of interest and normalize by surface area¶

Produce a seasonal day profile¶

Produce a load duration curve¶

Produce a graph of the coldest week¶