862f99942ce1eefe93f0cfd1bcf3ade031679cd4,pmlb/dataset_lists.py,,,#,26

Before Change


    "yeast"
]

regression_dataset_names = [
    "1027_ESL",
    "1028_SWD",
    "1029_LEV",
    "1030_ERA",
    "1089_USCrime",
    "1096_FacultySalaries",
    "1191_BNG_pbc",
    "1193_BNG_lowbwt",
    "1196_BNG_pharynx",
    "1199_BNG_echoMonths",
    "1201_BNG_breastTumor",
    "1203_BNG_pwLinear",
    "1595_poker",
    "192_vineyard",
    "195_auto_price",
    "197_cpu_act",
    "201_pol",
    "207_autoPrice",
    "210_cloud",
    "215_2dplanes",
    "218_house_8L",
    "225_puma8NH",
    "227_cpu_small",
    "228_elusage",
    "229_pwLinear",
    "230_machine_cpu",
    "294_satellite_image",
    "344_mv",
    "4544_GeographicalOriginalofMusic",
    "485_analcatdata_vehicle",
    "503_wind",
    "505_tecator",
    "519_vinnie",
    "522_pm10",
    "523_analcatdata_neavote",
    "527_analcatdata_election2000",
    "529_pollen",
    "537_houses",
    "542_pollution",
    "547_no2",
    "556_analcatdata_apnea2",
    "557_analcatdata_apnea1",
    "560_bodyfat",
    "561_cpu",
    "562_cpu_small",
    "564_fried",
    "573_cpu_act",
    "574_house_16H",
    "579_fri_c0_250_5",
    "581_fri_c3_500_25",
    "582_fri_c1_500_25",
    "583_fri_c1_1000_50",
    "584_fri_c4_500_25",
    "586_fri_c3_1000_25",
    "588_fri_c4_1000_100",
    "589_fri_c2_1000_25",
    "590_fri_c0_1000_50",
    "591_fri_c1_100_10",
    "592_fri_c4_1000_25",
    "593_fri_c1_1000_10",
    "594_fri_c2_100_5",
    "595_fri_c0_1000_10",
    "596_fri_c2_250_5",
    "597_fri_c2_500_5",
    "598_fri_c0_1000_25",
    "599_fri_c2_1000_5",
    "601_fri_c1_250_5",
    "602_fri_c3_250_10",
    "603_fri_c0_250_50",
    "604_fri_c4_500_10",
    "605_fri_c2_250_25",
    "606_fri_c2_1000_10",
    "607_fri_c4_1000_50",
    "608_fri_c3_1000_10",
    "609_fri_c0_1000_5",
    "611_fri_c3_100_5",
    "612_fri_c1_1000_5",
    "613_fri_c3_250_5",
    "615_fri_c4_250_10",
    "616_fri_c4_500_50",
    "617_fri_c3_500_5",
    "618_fri_c3_1000_50",
    "620_fri_c1_1000_25",
    "621_fri_c0_100_10",
    "622_fri_c2_1000_50",
    "623_fri_c4_1000_10",
    "624_fri_c0_100_5",
    "626_fri_c2_500_50",
    "627_fri_c2_500_10",
    "628_fri_c3_1000_5",
    "631_fri_c1_500_5",
    "633_fri_c0_500_25",
    "634_fri_c2_100_10",
    "635_fri_c0_250_10",
    "637_fri_c1_500_50",
    "641_fri_c1_500_10",
    "643_fri_c2_500_25",
    "644_fri_c4_250_25",
    "645_fri_c3_500_50",
    "646_fri_c3_500_10",
    "647_fri_c1_250_10",
    "648_fri_c1_250_50",
    "649_fri_c0_500_5",
    "650_fri_c0_500_50",
    "651_fri_c0_100_25",
    "653_fri_c0_250_25",
    "654_fri_c0_500_10",
    "656_fri_c1_100_5",
    "657_fri_c2_250_10",
    "658_fri_c3_250_25",
    "659_sleuth_ex1714",
    "663_rabe_266",
    "665_sleuth_case2002",
    "666_rmftsa_ladata",
    "678_visualizing_environmental",
    "687_sleuth_ex1605",
    "690_visualizing_galaxy",
    "695_chatfield_4",
    "706_sleuth_case1202",
    "712_chscase_geyser1"
]

datasets_with_metadata = [
    "molecular_biology_promoters",
    "car",

After Change



df_summary = pandas.read_csv("pmlb/all_summary_stats.tsv", sep="\t")
regression_dataset_names = df_summary.query("task=="regression"")["dataset"].tolist()
classification_dataset_names = df_summary.query("task=="classification"")["dataset"].tolist()
dataset_names = regression_dataset_names + classification_dataset_names

def get_datasets_with_metadata(dataset_names, local_cache_dir = "datasets/"):
    assert (local_cache_dir != None)
    datasets_with_metadata = []
Italian Trulli
In pattern: SUPERPATTERN

Frequency: 3

Non-data size: 5

Instances


Project Name: EpistasisLab/penn-ml-benchmarks
Commit Name: 862f99942ce1eefe93f0cfd1bcf3ade031679cd4
Time: 2020-09-03
Author: grixor@gmail.com
File Name: pmlb/dataset_lists.py
Class Name:
Method Name:


Project Name: oddt/oddt
Commit Name: e626254b74ecb6dc71396c1b35237b53a5e35163
Time: 2017-08-23
Author: maciek@wojcikowski.pl
File Name: oddt/datasets.py
Class Name: pdbbind
Method Name: __init__


Project Name: pgmpy/pgmpy
Commit Name: f58745ab284f48b7ef4ce813f5f8cd26bdb3c0a8
Time: 2015-06-16
Author: ankurankan@gmail.com
File Name: pgmpy/inference/Sampling.py
Class Name: BayesianModelSampling
Method Name: forward_sample