862f99942ce1eefe93f0cfd1bcf3ade031679cd4,pmlb/dataset_lists.py,,,#,26
Before Change
"yeast"
]
regression_dataset_names = [
"1027_ESL",
"1028_SWD",
"1029_LEV",
"1030_ERA",
"1089_USCrime",
"1096_FacultySalaries",
"1191_BNG_pbc",
"1193_BNG_lowbwt",
"1196_BNG_pharynx",
"1199_BNG_echoMonths",
"1201_BNG_breastTumor",
"1203_BNG_pwLinear",
"1595_poker",
"192_vineyard",
"195_auto_price",
"197_cpu_act",
"201_pol",
"207_autoPrice",
"210_cloud",
"215_2dplanes",
"218_house_8L",
"225_puma8NH",
"227_cpu_small",
"228_elusage",
"229_pwLinear",
"230_machine_cpu",
"294_satellite_image",
"344_mv",
"4544_GeographicalOriginalofMusic",
"485_analcatdata_vehicle",
"503_wind",
"505_tecator",
"519_vinnie",
"522_pm10",
"523_analcatdata_neavote",
"527_analcatdata_election2000",
"529_pollen",
"537_houses",
"542_pollution",
"547_no2",
"556_analcatdata_apnea2",
"557_analcatdata_apnea1",
"560_bodyfat",
"561_cpu",
"562_cpu_small",
"564_fried",
"573_cpu_act",
"574_house_16H",
"579_fri_c0_250_5",
"581_fri_c3_500_25",
"582_fri_c1_500_25",
"583_fri_c1_1000_50",
"584_fri_c4_500_25",
"586_fri_c3_1000_25",
"588_fri_c4_1000_100",
"589_fri_c2_1000_25",
"590_fri_c0_1000_50",
"591_fri_c1_100_10",
"592_fri_c4_1000_25",
"593_fri_c1_1000_10",
"594_fri_c2_100_5",
"595_fri_c0_1000_10",
"596_fri_c2_250_5",
"597_fri_c2_500_5",
"598_fri_c0_1000_25",
"599_fri_c2_1000_5",
"601_fri_c1_250_5",
"602_fri_c3_250_10",
"603_fri_c0_250_50",
"604_fri_c4_500_10",
"605_fri_c2_250_25",
"606_fri_c2_1000_10",
"607_fri_c4_1000_50",
"608_fri_c3_1000_10",
"609_fri_c0_1000_5",
"611_fri_c3_100_5",
"612_fri_c1_1000_5",
"613_fri_c3_250_5",
"615_fri_c4_250_10",
"616_fri_c4_500_50",
"617_fri_c3_500_5",
"618_fri_c3_1000_50",
"620_fri_c1_1000_25",
"621_fri_c0_100_10",
"622_fri_c2_1000_50",
"623_fri_c4_1000_10",
"624_fri_c0_100_5",
"626_fri_c2_500_50",
"627_fri_c2_500_10",
"628_fri_c3_1000_5",
"631_fri_c1_500_5",
"633_fri_c0_500_25",
"634_fri_c2_100_10",
"635_fri_c0_250_10",
"637_fri_c1_500_50",
"641_fri_c1_500_10",
"643_fri_c2_500_25",
"644_fri_c4_250_25",
"645_fri_c3_500_50",
"646_fri_c3_500_10",
"647_fri_c1_250_10",
"648_fri_c1_250_50",
"649_fri_c0_500_5",
"650_fri_c0_500_50",
"651_fri_c0_100_25",
"653_fri_c0_250_25",
"654_fri_c0_500_10",
"656_fri_c1_100_5",
"657_fri_c2_250_10",
"658_fri_c3_250_25",
"659_sleuth_ex1714",
"663_rabe_266",
"665_sleuth_case2002",
"666_rmftsa_ladata",
"678_visualizing_environmental",
"687_sleuth_ex1605",
"690_visualizing_galaxy",
"695_chatfield_4",
"706_sleuth_case1202",
"712_chscase_geyser1"
]
datasets_with_metadata = [
"molecular_biology_promoters",
"car",
After Change
df_summary = pandas.read_csv("pmlb/all_summary_stats.tsv", sep="\t")
regression_dataset_names = df_summary.query("task=="regression"")["dataset"].tolist()
classification_dataset_names = df_summary.query("task=="classification"")["dataset"].tolist()
dataset_names = regression_dataset_names + classification_dataset_names
def get_datasets_with_metadata(dataset_names, local_cache_dir = "datasets/"):
assert (local_cache_dir != None)
datasets_with_metadata = []
In pattern: SUPERPATTERN
Frequency: 3
Non-data size: 5
Instances
Project Name: EpistasisLab/penn-ml-benchmarks
Commit Name: 862f99942ce1eefe93f0cfd1bcf3ade031679cd4
Time: 2020-09-03
Author: grixor@gmail.com
File Name: pmlb/dataset_lists.py
Class Name:
Method Name:
Project Name: oddt/oddt
Commit Name: e626254b74ecb6dc71396c1b35237b53a5e35163
Time: 2017-08-23
Author: maciek@wojcikowski.pl
File Name: oddt/datasets.py
Class Name: pdbbind
Method Name: __init__
Project Name: pgmpy/pgmpy
Commit Name: f58745ab284f48b7ef4ce813f5f8cd26bdb3c0a8
Time: 2015-06-16
Author: ankurankan@gmail.com
File Name: pgmpy/inference/Sampling.py
Class Name: BayesianModelSampling
Method Name: forward_sample