934970c18af492b3874f4d9b4ab341ecd582603c,nilmtk/dataset_converters/dataport/download_dataport.py,,download_dataport,#,124

Before Change



    // if user has specified buildings
    if periods_to_load:
        buildings_to_load = periods_to_load.keys()
    else:
        // get buildings present in all tables
        sql_query = ""
        for table in database_tables:
            sql_query = (sql_query + "(SELECT DISTINCT dataid" + 
                         " FROM "" + database_schema + ""." + table + 
                         ") UNION ")
        sql_query = sql_query[:-7]
        sql_query = (sql_query + " ORDER BY dataid") 
        buildings_to_load = pd.read_sql(sql_query, conn)["dataid"].tolist()

    // for each user specified building or all buildings in database
    for building_id in buildings_to_load:
        print("Loading building {:d} @ {}"
              .format(building_id, datetime.datetime.now()))
        sys.stdout.flush()

        // create new list of chunks for concatenating later
        dataframe_list = []

        // for each table of 1 month data
        for database_table in database_tables:
            print("  Loading table {:s}".format(database_table))
            sys.stdout.flush()

            // get buildings present in electricity_egauge_minutes table
            sql_query = ("SELECT DISTINCT dataid" +
                         " FROM university.metadata" +
                         " WHERE egauge_min_time IS NOT NULL" +
                         " ORDER BY dataid")
            buildings_in_table = pd.read_sql(sql_query, conn)["dataid"].tolist()

            if building_id in buildings_in_table:
                // get first and last timestamps for this house in electricity_egauge_minutes table
                sql_query = ("SELECT MIN(egauge_min_time) AS minlocalminute," +
                             " MAX(egauge_max_time) AS maxlocalminute" +
                             " FROM university.metadata" +
                             " WHERE dataid=" + str(building_id))
                range = pd.read_sql(sql_query, conn)
                first_timestamp_in_table = range["minlocalminute"][0]
                last_timestamp_in_table = range["maxlocalminute"][0]

                // get requested start and end and localize them
                requested_start = None
                requested_end = None
                database_timezone = "US/Central"
                if periods_to_load:
                    if periods_to_load[building_id][0]:
                        requested_start = pd.Timestamp(periods_to_load[building_id][0])
                        requested_start = requested_start.tz_localize(database_timezone)
                    if periods_to_load[building_id][1]:
                        requested_end = pd.Timestamp(periods_to_load[building_id][1])
                        requested_end = requested_end.tz_localize(database_timezone)

                // check user start is not after end
                if requested_start > requested_end:
                    print("requested end is before requested start")
                    sys.stdout.flush()
                else:                        
                    // clip data to smallest range
                    if requested_start:
                        start = max(requested_start, first_timestamp_in_table)
                    else:
                        start = first_timestamp_in_table
                    if requested_end:
                        end = min(requested_end, last_timestamp_in_table)
                    else:
                        end = last_timestamp_in_table

                    // download data in chunks
                    chunk_start = start
                    chunk_size = datetime.timedelta(10)  // 1 day
                    while chunk_start < end:
                        chunk_end = chunk_start + chunk_size 
                        if chunk_end > end:
                            chunk_end = end
                        // subtract 1 second so end is exclusive
                        chunk_end = chunk_end - datetime.timedelta(0, 1)

                        // query power data for all channels
                        format = "%Y-%m-%d %H:%M:%S"

After Change



    // if user has specified buildings
    if periods_to_load:
        buildings_to_load = list(periods_to_load.keys())
    else:
        // get buildings present in all tables
        sql_query = ""
        for table in database_tables:
            sql_query = (sql_query + "(SELECT DISTINCT dataid" + 
                         " FROM "" + database_schema + ""." + table + 
                         ") UNION ")
        sql_query = sql_query[:-7]
        sql_query = (sql_query + " ORDER BY dataid") 
        buildings_to_load = pd.read_sql(sql_query, conn)["dataid"].tolist()

    // for each user specified building or all buildings in database
    for building_id in buildings_to_load:
        print("Loading building {:d} @ {}"
              .format(building_id, datetime.datetime.now()))
        sys.stdout.flush()

        // create new list of chunks for concatenating later
        dataframe_list = []

        // for each table of 1 month data
        for database_table in database_tables:
            print("  Loading table {:s}".format(database_table))
            sys.stdout.flush()

            // get buildings present in electricity_egauge_minutes table
            sql_query = ("SELECT DISTINCT dataid" +
                         " FROM university.metadata" +
                         " WHERE egauge_min_time IS NOT NULL" +
                         " ORDER BY dataid")
            buildings_in_table = pd.read_sql(sql_query, conn)["dataid"].tolist()

            if building_id in buildings_in_table:
                // get first and last timestamps for this house in electricity_egauge_minutes table
                sql_query = ("SELECT MIN(egauge_min_time) AS minlocalminute," +
                             " MAX(egauge_max_time) AS maxlocalminute" +
                             " FROM university.metadata" +
                             " WHERE dataid=" + str(building_id))
                range = pd.read_sql(sql_query, conn)
                first_timestamp_in_table = range["minlocalminute"][0]
                last_timestamp_in_table = range["maxlocalminute"][0]

                // get requested start and end and localize them
                requested_start = None
                requested_end = None
                database_timezone = "US/Central"
                if periods_to_load:
                    if periods_to_load[building_id][0]:
                        requested_start = pd.Timestamp(periods_to_load[building_id][0])
                        requested_start = requested_start.tz_localize(database_timezone)
                    if periods_to_load[building_id][1]:
                        requested_end = pd.Timestamp(periods_to_load[building_id][1])
                        requested_end = requested_end.tz_localize(database_timezone)

                // check user start is not after end
                if requested_start > requested_end:
                    print("requested end is before requested start")
                    sys.stdout.flush()
                else:                        
                    // clip data to smallest range
                    if requested_start:
                        start = max(requested_start, first_timestamp_in_table)
                    else:
                        start = first_timestamp_in_table
                    if requested_end:
                        end = min(requested_end, last_timestamp_in_table)
                    else:
                        end = last_timestamp_in_table

                    // download data in chunks
                    chunk_start = start
                    chunk_size = datetime.timedelta(10)  // 1 day
                    while chunk_start < end:
                        chunk_end = chunk_start + chunk_size 
                        if chunk_end > end:
                            chunk_end = end
                        // subtract 1 second so end is exclusive
                        chunk_end = chunk_end - datetime.timedelta(0, 1)

                        // query power data for all channels
                        format = "%Y-%m-%d %H:%M:%S"

In pattern: SUPERPATTERN

Frequency: 6

Non-data size: 3

Instances

Link

Project Name: nilmtk/nilmtk

Commit Name: 934970c18af492b3874f4d9b4ab341ecd582603c

Time: 2018-04-22

Author: 10246101+PMeira@users.noreply.github.com

File Name: nilmtk/dataset_converters/dataport/download_dataport.py

Class Name:

Method Name: download_dataport

Link

Project Name: A2Zadeh/CMU-MultimodalSDK

Commit Name: ea8d6d88196690aa43483d698b8230bdc5c9ac82

Time: 2019-10-08

Author: abagherz@cs.cmu.edu

File Name: mmsdk/mmdatasdk/dataset/dataset.py

Class Name: mmdataset

Method Name: revert

Link

Project Name: scipy-lectures/scipy-lecture-notes

Commit Name: 890244b04eafb8313be6d49d8a34a134801f7a0c

Time: 2017-09-28

Author: gael.varoquaux@normalesup.org

File Name: advanced/mathematical_optimization/examples/plot_compare_optimizers.py

Class Name:

Method Name:

Link

Project Name: dit/dit

Commit Name: 1f63c86c4ed82316d87d17977987c6323bf3fe41

Time: 2017-02-22

Author: ryangregoryjames@gmail.com

File Name: dit/profiles/information_partitions.py

Class Name: DependencyDecomposition

Method Name: to_string

Link

Project Name: scipy-lectures/scipy-lecture-notes

Commit Name: 54478492154195895518b5624f1a081ca9d572f4

Time: 2017-09-06

Author: gael.varoquaux@normalesup.org

File Name: advanced/mathematical_optimization/examples/plot_compare_optimizers.py

Class Name:

Method Name:

Link

Project Name: D2KLab/entity2rec

Commit Name: ef5fa3833043874c08cf6904359e3ba7f3b94ac8

Time: 2018-01-10

Author: enricopalumbo0@gmail.com

File Name: entity2rec/entity2rec.py

Class Name: Entity2Rec

Method Name: _compute_features