header = lines[0]
lines = lines[1:]
patients = list(set([x[:x.find("_")] for x in lines]))
random.shuffle(patients)
train_cnt = int(0.82 * len(patients)) // this will became 70% of all data
train_patients = set(patients[:train_cnt])
val_patients = set(patients[train_cnt:])
assert len(train_patients & val_patients) == 0
train_lines = [x for x in lines if x[:x.find("_")] in train_patients]
val_lines = [x for x in lines if x[:x.find("_")] in val_patients]
After Change
assert args.task in ["decompensation", "in-hospital-mortality", "length-of-stay",
"phenotyping", "multitask"]
val_patients = set()
with open("mimic3models/valset.csv", "r") as valset_file:
for line in valset_file:
x, y = line.split(",")
if int(y) == 1:
val_patients.add(x)
has_header = False
if args.task in ["phenotyping", "multitask"]:
has_header = True