Update README.md
Browse files
README.md
CHANGED
@@ -139,8 +139,8 @@ model_id = "gbyuvd/drugtargetpred-chemselfies"
|
|
139 |
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
140 |
model = AutoModelForSequenceClassification.from_pretrained(model_id)
|
141 |
|
142 |
-
# Load the
|
143 |
-
with open("
|
144 |
label_dict = json.load(f)
|
145 |
|
146 |
# Create a mapping from label number to CHEMBL ID and target name
|
@@ -235,7 +235,7 @@ Data Selection Criteria:
|
|
235 |
- SMILES: Canonical representations only
|
236 |
|
237 |
Data Preprocessing
|
238 |
-
-
|
239 |
- Balancing the data distribution by setting up min max (1000)
|
240 |
|
241 |
### Training Procedure
|
|
|
139 |
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
140 |
model = AutoModelForSequenceClassification.from_pretrained(model_id)
|
141 |
|
142 |
+
# Load the label_dict.json file
|
143 |
+
with open("label_dict.json", "r") as f:
|
144 |
label_dict = json.load(f)
|
145 |
|
146 |
# Create a mapping from label number to CHEMBL ID and target name
|
|
|
235 |
- SMILES: Canonical representations only
|
236 |
|
237 |
Data Preprocessing
|
238 |
+
- Conversion from Canonical SMILES into SELFIES readable by base model's tokenizer, any failed entries are removed
|
239 |
- Balancing the data distribution by setting up min max (1000)
|
240 |
|
241 |
### Training Procedure
|