// if user has specified buildings
if periods_to_load:
buildings_to_load = periods_to_load.keys()
else:
// get buildings present in all tables
sql_query = ""
for table in database_tables:
sql_query = (sql_query + "(SELECT DISTINCT dataid" +
" FROM "" + database_schema + ""." + table +
") UNION ")
sql_query = sql_query[:-7]
sql_query = (sql_query + " ORDER BY dataid")
buildings_to_load = pd.read_sql(sql_query, conn)["dataid"].tolist()
// for each user specified building or all buildings in database
for building_id in buildings_to_load:
print("Loading building {:d} @ {}"
.format(building_id, datetime.datetime.now()))
sys.stdout.flush()
// create new list of chunks for concatenating later
dataframe_list = []
// for each table of 1 month data
for database_table in database_tables:
print(" Loading table {:s}".format(database_table))
sys.stdout.flush()
// get buildings present in electricity_egauge_minutes table
sql_query = ("SELECT DISTINCT dataid" +
" FROM university.metadata" +
" WHERE egauge_min_time IS NOT NULL" +
" ORDER BY dataid")
buildings_in_table = pd.read_sql(sql_query, conn)["dataid"].tolist()
if building_id in buildings_in_table:
// get first and last timestamps for this house in electricity_egauge_minutes table
sql_query = ("SELECT MIN(egauge_min_time) AS minlocalminute," +
" MAX(egauge_max_time) AS maxlocalminute" +
" FROM university.metadata" +
" WHERE dataid=" + str(building_id))
range = pd.read_sql(sql_query, conn)
first_timestamp_in_table = range["minlocalminute"][0]
last_timestamp_in_table = range["maxlocalminute"][0]
// get requested start and end and localize them
requested_start = None
requested_end = None
database_timezone = "US/Central"
if periods_to_load:
if periods_to_load[building_id][0]:
requested_start = pd.Timestamp(periods_to_load[building_id][0])
requested_start = requested_start.tz_localize(database_timezone)
if periods_to_load[building_id][1]:
requested_end = pd.Timestamp(periods_to_load[building_id][1])
requested_end = requested_end.tz_localize(database_timezone)
// check user start is not after end
if requested_start > requested_end:
print("requested end is before requested start")
sys.stdout.flush()
else:
// clip data to smallest range
if requested_start:
start = max(requested_start, first_timestamp_in_table)
else:
start = first_timestamp_in_table
if requested_end:
end = min(requested_end, last_timestamp_in_table)
else:
end = last_timestamp_in_table
// download data in chunks
chunk_start = start
chunk_size = datetime.timedelta(10) // 1 day
while chunk_start < end:
chunk_end = chunk_start + chunk_size
if chunk_end > end:
chunk_end = end
// subtract 1 second so end is exclusive
chunk_end = chunk_end - datetime.timedelta(0, 1)
// query power data for all channels
format = "%Y-%m-%d %H:%M:%S"
After Change
// if user has specified buildings
if periods_to_load:
buildings_to_load = list(periods_to_load.keys())
else:
// get buildings present in all tables
sql_query = ""
for table in database_tables:
sql_query = (sql_query + "(SELECT DISTINCT dataid" +
" FROM "" + database_schema + ""." + table +
") UNION ")
sql_query = sql_query[:-7]
sql_query = (sql_query + " ORDER BY dataid")
buildings_to_load = pd.read_sql(sql_query, conn)["dataid"].tolist()
// for each user specified building or all buildings in database
for building_id in buildings_to_load:
print("Loading building {:d} @ {}"
.format(building_id, datetime.datetime.now()))
sys.stdout.flush()
// create new list of chunks for concatenating later
dataframe_list = []
// for each table of 1 month data
for database_table in database_tables:
print(" Loading table {:s}".format(database_table))
sys.stdout.flush()
// get buildings present in electricity_egauge_minutes table
sql_query = ("SELECT DISTINCT dataid" +
" FROM university.metadata" +
" WHERE egauge_min_time IS NOT NULL" +
" ORDER BY dataid")
buildings_in_table = pd.read_sql(sql_query, conn)["dataid"].tolist()
if building_id in buildings_in_table:
// get first and last timestamps for this house in electricity_egauge_minutes table
sql_query = ("SELECT MIN(egauge_min_time) AS minlocalminute," +
" MAX(egauge_max_time) AS maxlocalminute" +
" FROM university.metadata" +
" WHERE dataid=" + str(building_id))
range = pd.read_sql(sql_query, conn)
first_timestamp_in_table = range["minlocalminute"][0]
last_timestamp_in_table = range["maxlocalminute"][0]
// get requested start and end and localize them
requested_start = None
requested_end = None
database_timezone = "US/Central"
if periods_to_load:
if periods_to_load[building_id][0]:
requested_start = pd.Timestamp(periods_to_load[building_id][0])
requested_start = requested_start.tz_localize(database_timezone)
if periods_to_load[building_id][1]:
requested_end = pd.Timestamp(periods_to_load[building_id][1])
requested_end = requested_end.tz_localize(database_timezone)
// check user start is not after end
if requested_start > requested_end:
print("requested end is before requested start")
sys.stdout.flush()
else:
// clip data to smallest range
if requested_start:
start = max(requested_start, first_timestamp_in_table)
else:
start = first_timestamp_in_table
if requested_end:
end = min(requested_end, last_timestamp_in_table)
else:
end = last_timestamp_in_table
// download data in chunks
chunk_start = start
chunk_size = datetime.timedelta(10) // 1 day
while chunk_start < end:
chunk_end = chunk_start + chunk_size
if chunk_end > end:
chunk_end = end
// subtract 1 second so end is exclusive
chunk_end = chunk_end - datetime.timedelta(0, 1)
// query power data for all channels
format = "%Y-%m-%d %H:%M:%S"