Christina Theodoris
commited on
Commit
•
45b9d69
1
Parent(s):
b36d210
Change list of individual IDs to set to ensure unique before subsetting into train/valid/test sets
Browse files
examples/hyperparam_optimiz_for_disease_classifier.py
CHANGED
@@ -67,10 +67,10 @@ def classes_to_ids(example):
|
|
67 |
trainset_v4 = trainset_v3.map(classes_to_ids, num_proc=num_proc)
|
68 |
|
69 |
# separate into train, validation, test sets
|
70 |
-
|
71 |
random.seed(42)
|
72 |
-
train_indiv = random.sample(
|
73 |
-
eval_indiv = [indiv for indiv in
|
74 |
valid_indiv = random.sample(eval_indiv,round(0.5*len(eval_indiv)))
|
75 |
test_indiv = [indiv for indiv in eval_indiv if indiv not in valid_indiv]
|
76 |
|
|
|
67 |
trainset_v4 = trainset_v3.map(classes_to_ids, num_proc=num_proc)
|
68 |
|
69 |
# separate into train, validation, test sets
|
70 |
+
indiv_set = set(trainset_v4["individual"])
|
71 |
random.seed(42)
|
72 |
+
train_indiv = random.sample(indiv_set,round(0.7*len(indiv_set)))
|
73 |
+
eval_indiv = [indiv for indiv in indiv_set if indiv not in train_indiv]
|
74 |
valid_indiv = random.sample(eval_indiv,round(0.5*len(eval_indiv)))
|
75 |
test_indiv = [indiv for indiv in eval_indiv if indiv not in valid_indiv]
|
76 |
|