In [1]:
import etdmap
import etdtransform
import etdanalyze
# Set the standard etdmap options
etdmap.options.mapped_folder_path = "../../demodata/mapped_analyze" # change to path to folder where you would like to store mapped data
etdmap.options.bsv_metadata_file = "../../demodata/bsv_metadata_after_mapping.xlsx" # change to path to the Excel file with the data processor metadata (BSV is the data processor in this case)
etdmap.options.aggregate_folder_path = "../../demodata/aggregate"
etdtransform.options.mapped_folder_path = "../../demodata/mapped_analyze" # change to path to folder where you would like to store mapped data
etdtransform.options.aggregate_folder_path = "../../demodata/aggregate"
etdtransform.options.weather_data_folder_path = "../../demodata/weather_data/yearly"
etdtransform.options.weather_stations_summary_file = "../../demodata/weather_data/SummaryPerYear.xlsx"
Load data¶
To start we load all the Ibis tables that hold the data. These are organized such that we can choose to pull out the data resampled in different intervals, e.g. 5 minutes vs. 24 hours.
We will retain one project for this example.
In [2]:
import ibis
from ibis import _
import ibis.selectors as s
household_tables = etdtransform.load_data.get_household_tables()
# get 5 minute data for households
household_table_5min = household_tables['5min']
project = 1
# select a single project for analysis
household_table_5min = household_table_5min.filter(_.ProjectIdBSV == project)
In [3]:
household_table_5min.head().to_pandas()
Out[3]:
| ReadingDate | ProjectIdBSV | HuisIdBSV | ElektriciteitNetgebruikHoogDiff | ElektriciteitNetgebruikLaagDiff | ElektriciteitTerugleveringHoogDiff | ElektriciteitTerugleveringLaagDiff | ElektriciteitsgebruikWTWDiff | ElektriciteitsgebruikWarmtepompDiff | ElektriciteitsgebruikBoosterDiff | ... | date_column | datetime_column | year | week_of_year | day_of_week | TemperatuurISOWk | GevoelstemperatuurISOWk | days_in_week | Koudste2ISOWkTemperatuur | Koudste2ISOWkGevoelstemperatuur | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 2019-01-01 00:05:00 | 1 | 1 | 0.0 | 0.000 | 0.0 | 0.0 | 0.00 | 0.00 | 0.0 | ... | 2019-01-01 | 2019-01-01 01:00:00 | 2019 | 1 | 1 | 5.83125 | 1.242552 | 8 | False | False |
| 1 | 2019-01-01 00:10:00 | 1 | 1 | 0.0 | 0.135 | 0.0 | 0.0 | 0.00 | 0.05 | 0.0 | ... | 2019-01-01 | 2019-01-01 01:00:00 | 2019 | 1 | 1 | 5.83125 | 1.242552 | 8 | False | False |
| 2 | 2019-01-01 00:15:00 | 1 | 1 | 0.0 | 0.116 | 0.0 | 0.0 | 0.00 | 0.01 | 0.0 | ... | 2019-01-01 | 2019-01-01 01:00:00 | 2019 | 1 | 1 | 5.83125 | 1.242552 | 8 | False | False |
| 3 | 2019-01-01 00:20:00 | 1 | 1 | 0.0 | 0.106 | 0.0 | 0.0 | 0.00 | 0.01 | 0.0 | ... | 2019-01-01 | 2019-01-01 01:00:00 | 2019 | 1 | 1 | 5.83125 | 1.242552 | 8 | False | False |
| 4 | 2019-01-01 00:25:00 | 1 | 1 | 0.0 | 0.106 | 0.0 | 0.0 | 0.01 | 0.01 | 0.0 | ... | 2019-01-01 | 2019-01-01 01:00:00 | 2019 | 1 | 1 | 5.83125 | 1.242552 | 8 | False | False |
5 rows × 149 columns
Set variables of interest and normalize by surface area¶
In order to make our comparison like for like, we normalize the variables by the household surface area and set it to 100 meters squared.
In [4]:
# Choose variables of interest for our analysis
vars = [
"Zelfgebruik",
"ZonopwekBruto",
"ElektriciteitsgebruikTotaalBruto",
"TerugleveringTotaalNetto"
]
household_table_5min = household_table_5min.select(
['ReadingDate', 'ProjectIdBSV', 'HuisIdBSV'] +
vars +
['Oppervlakte', 'Warmtebehoefte']
)
# Divide by surface area to normalize the data to 100m2 and convert to KWh
hh_table_normalized = etdanalyze.analysis_helpers.normalize_100m2(
hh_table=household_table_5min,
interval="5min",
col_names = vars
)
project_normalized = hh_table_normalized.group_by("ReadingDate", "ProjectIdBSV").aggregate([
hh_table_normalized.ZelfgebruikPer100M2KW.mean().name("ZelfgebruikPer100M2KWH"),
hh_table_normalized.ZonopwekBrutoPer100M2KW.mean().name("ZonopwekBrutoPer100M2KWH"),
hh_table_normalized.ElektriciteitsgebruikTotaalBrutoPer100M2KW.mean().name("ElektriciteitsgebruikTotaalBrutoPer100M2KWH"),
hh_table_normalized.TerugleveringTotaalNettoPer100M2KW.mean().name("TerugleveringTotaalNettoPer100M2KWH")
])
In [5]:
print(project_normalized.schema())
ibis.Schema {
ReadingDate timestamp(9)
ProjectIdBSV int64
ZelfgebruikPer100M2KWH float64
ZonopwekBrutoPer100M2KWH float64
ElektriciteitsgebruikTotaalBrutoPer100M2KWH float64
TerugleveringTotaalNettoPer100M2KWH float64
}
In [6]:
project_normalized.head().to_pandas()
Out[6]:
| ReadingDate | ProjectIdBSV | ZelfgebruikPer100M2KWH | ZonopwekBrutoPer100M2KWH | ElektriciteitsgebruikTotaalBrutoPer100M2KWH | TerugleveringTotaalNettoPer100M2KWH | |
|---|---|---|---|---|---|---|
| 0 | 2019-10-11 07:30:00 | 1 | 0.002132 | 0.002132 | 0.543175 | 0.000000 |
| 1 | 2019-10-11 07:50:00 | 1 | 0.067145 | 0.067145 | 0.525649 | 0.000000 |
| 2 | 2019-10-11 08:30:00 | 1 | 0.144552 | 0.160935 | 0.461456 | 0.016383 |
| 3 | 2019-10-11 10:40:00 | 1 | 0.277472 | 0.593646 | 0.425542 | 0.316174 |
| 4 | 2019-10-11 11:40:00 | 1 | 0.341063 | 0.810002 | 0.491637 | 0.468939 |
Produce a seasonal day profile¶
In [7]:
summer_table = project_normalized.filter(
(_["ReadingDate"].month() == 6)
| (_["ReadingDate"].month() == 7)
| (_["ReadingDate"].month() == 8)
)
winter_table = project_normalized.filter(
(_["ReadingDate"].month() == 12)
| (_["ReadingDate"].month() == 1)
| (_["ReadingDate"].month() == 2)
)
In [8]:
summer_table.head().to_pandas()
Out[8]:
| ReadingDate | ProjectIdBSV | ZelfgebruikPer100M2KWH | ZonopwekBrutoPer100M2KWH | ElektriciteitsgebruikTotaalBrutoPer100M2KWH | TerugleveringTotaalNettoPer100M2KWH | |
|---|---|---|---|---|---|---|
| 0 | 2019-06-01 00:25:00 | 1 | 0.0 | 0.0 | 0.312193 | 0.0 |
| 1 | 2019-06-01 00:35:00 | 1 | 0.0 | 0.0 | 0.347553 | 0.0 |
| 2 | 2019-06-01 00:50:00 | 1 | 0.0 | 0.0 | 0.311570 | 0.0 |
| 3 | 2019-06-01 01:15:00 | 1 | 0.0 | 0.0 | 0.338389 | 0.0 |
| 4 | 2019-06-01 03:15:00 | 1 | 0.0 | 0.0 | 0.393386 | 0.0 |
In [9]:
# plot_var = "ElektriciteitsgebruikTotaalBrutoPer100M2KW"
# plot_var_name = "Elektriciteitsgebruik Totaal Bruto"
plot_var = "ZonopwekBrutoPer100M2KWH"
plot_var_name = "ZonopwekBruto (KWh) Per 100m2 "
fig = etdanalyze.plot_functions.plot_daily_profile(
df = summer_table.to_pandas(),
title = f"Summer - Project {project} - {plot_var_name}",
plot_var=plot_var,
plot_var_name=plot_var_name
)
In [10]:
plot_vars = {
"ZelfgebruikPer100M2KWH": "Zelfgebruik woning per 100m2 (kWh)",
"ZonopwekBrutoPer100M2KWH": "Zonopwek Bruto per 100m2 (kWh)",
"ElektriciteitsgebruikTotaalBrutoPer100M2KWH": "Elektriciteitsgebruik Bruto per 100m2 (kWh)",
"TerugleveringTotaalNettoPer100M2KWH": "Teruglevering Netto per 100m2 (kWh)",
}
fig = etdanalyze.plot_functions.plot_daily_profile_mean_combined(
df = winter_table.to_pandas(),
title = f"Winter - Project {project} - {plot_var_name}",
plot_vars=plot_vars.keys(),
# plot_interval=plot_interval,
plot_var_names=plot_vars.values()
)
Produce a load duration curve¶
In [11]:
# It is also possible to load project data directly
project_tables = etdtransform.load_data.get_project_tables()
In [12]:
project_5min_table = project_tables["5min"]
print(project_5min_table.schema())
print(project)
ibis.Schema {
index int64
ReadingDate timestamp(9)
ProjectIdBSV int64
ElektriciteitNetgebruikHoogDiff float64
ElektriciteitNetgebruikLaagDiff float64
ElektriciteitTerugleveringHoogDiff float64
ElektriciteitTerugleveringLaagDiff float64
ElektriciteitsgebruikWTWDiff float64
ElektriciteitsgebruikWarmtepompDiff float64
ElektriciteitsgebruikBoosterDiff float64
ElektriciteitsgebruikBoilervatDiff float64
ElektriciteitsgebruikRadiatorDiff float64
ZonopwekBruto float64
TerugleveringTotaalNetto float64
ElektriciteitsgebruikTotaalNetto float64
Netuitwisseling float64
ElektriciteitsgebruikTotaalWarmtepomp float64
ElektriciteitsgebruikTotaalGebouwgebonden float64
ElektriciteitsgebruikTotaalHuishoudelijk float64
Zelfgebruik float64
ElektriciteitsgebruikTotaalBruto float64
n int64
Weerstation string
STN int64
HH int64
YYYYMMDD int64
STN_right int64
YYYYMMDD_right int64
HH_right int64
DD int64
FH int64
FF int64
FX int64
T int64
T10N string
TD int64
SQ int64
Q int64
DR int64
RH int64
P int64
VV string
N string
U int64
WW string
IX int64
M string
R string
S string
O string
Y string
Temperatuur float64
Windsnelheid float64
Vochtigheid int64
Dampdruk float64
Gevoelstemperatuur float64
TemperatuurRA float64
GevoelstemperatuurRA float64
Koudste2WkTemperatuur boolean
Koudste2WkGevoelstemperatuur boolean
temp_date_string string
date_column date
datetime_column timestamp
year int32
week_of_year int32
day_of_week int16
TemperatuurISOWk float64
GevoelstemperatuurISOWk float64
days_in_week int64
Koudste2ISOWkTemperatuur boolean
Koudste2ISOWkGevoelstemperatuur boolean
}
1
In [13]:
plot_var = "ElektriciteitsgebruikTotaalNetto"
filtered_table = project_5min_table.filter(
_.ProjectIdBSV == project
).select(["ReadingDate", "ProjectIdBSV"] + [plot_var])
In [14]:
fig = etdanalyze.plot_functions.plot_load_duration_curve(
df=filtered_table.to_pandas(),
diff_column=plot_var,
interval="5min"
)
ElektriciteitsgebruikTotaalNetto stats for 5min - Project None: 0.30% above 1.5 kW
Produce a graph of the coldest week¶
In [17]:
coldest_table = project_5min_table.filter(
_.ProjectIdBSV == project,
_.Koudste2WkTemperatuur == True
).select(["ReadingDate", "ProjectIdBSV", "Temperatuur"] + [plot_var])
coldest_table.schema()
Out[17]:
ibis.Schema {
ReadingDate timestamp(9)
ProjectIdBSV int64
Temperatuur float64
ElektriciteitsgebruikTotaalNetto float64
}
In [18]:
fig = etdanalyze.plot_functions.plot_var_vs_temp(
coldest_table.to_pandas(),
var = plot_var,
title = "Coldest two weeks"
)