thrumbel commited on
Commit
e4ff7ef
β€’
1 Parent(s): e5567bd

First commit of app

Browse files
Files changed (4) hide show
  1. README.md +16 -3
  2. app.py +120 -0
  3. pre-requirements.txt +2 -0
  4. requirements.txt +6 -0
README.md CHANGED
@@ -1,14 +1,27 @@
1
  ---
2
  title: Biomed.sm.mv Te 84m
3
- emoji: 🐒
4
- colorFrom: yellow
5
- colorTo: yellow
6
  sdk: gradio
7
  sdk_version: 5.4.0
8
  app_file: app.py
9
  pinned: false
10
  license: apache-2.0
11
  short_description: Prediction task tests for biomed-multi-view models
 
 
 
 
 
 
 
 
 
 
 
 
 
12
  ---
13
 
14
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
  ---
2
  title: Biomed.sm.mv Te 84m
3
+ emoji: πŸ‘β€πŸ—¨
4
+ colorFrom: blue
5
+ colorTo: blue
6
  sdk: gradio
7
  sdk_version: 5.4.0
8
  app_file: app.py
9
  pinned: false
10
  license: apache-2.0
11
  short_description: Prediction task tests for biomed-multi-view models
12
+ preload_from_hub:
13
+ - ibm/biomed.sm.mv-te-84m
14
+ - ibm/biomed.sm.mv-te-84m-MoleculeNet-ligand_scaffold-BACE-101
15
+ - ibm/biomed.sm.mv-te-84m-MoleculeNet-ligand_scaffold-BBBP-101
16
+ - ibm/biomed.sm.mv-te-84m-MoleculeNet-ligand_scaffold-CLINTOX-101
17
+ - ibm/biomed.sm.mv-te-84m-MoleculeNet-ligand_scaffold-ESOL-101
18
+ - ibm/biomed.sm.mv-te-84m-MoleculeNet-ligand_scaffold-FREESOLV-101
19
+ - ibm/biomed.sm.mv-te-84m-MoleculeNet-ligand_scaffold-HIV-101
20
+ - ibm/biomed.sm.mv-te-84m-MoleculeNet-ligand_scaffold-LIPOPHILICITY-101
21
+ - ibm/biomed.sm.mv-te-84m-MoleculeNet-ligand_scaffold-MUV-101
22
+ - ibm/biomed.sm.mv-te-84m-MoleculeNet-ligand_scaffold-QM7-101
23
+ - ibm/biomed.sm.mv-te-84m-MoleculeNet-ligand_scaffold-SIDER-101
24
+ - ibm/biomed.sm.mv-te-84m-MoleculeNet-ligand_scaffold-TOX21-101
25
  ---
26
 
27
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py ADDED
@@ -0,0 +1,120 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from bmfm_sm.api.smmv_api import SmallMoleculeMultiViewModel
2
+ from bmfm_sm.core.data_modules.namespace import LateFusionStrategy
3
+ from bmfm_sm.api.dataset_registry import DatasetRegistry
4
+
5
+ import gradio as gr
6
+
7
+
8
+ examples = [
9
+ ["CC(C)CC1=CC=C(C=C1)C(C)C(=O)O", "BACE"],
10
+ ["CC(C)CC1=CC=C(C=C1)C(C)C(=O)O", "BBBP"],
11
+ ["[N+](=O)([O-])[O-]", "CLINTOX"],
12
+ ["OCC3OC(OCC2OC(OC(C#N)c1ccccc1)C(O)C(O)C2O)C(O)C(O)C3O", "ESOL"],
13
+ ["CN(C)C(=O)c1ccc(cc1)OC", "FREESOLV"],
14
+ ["CC(C)CC1=CC=C(C=C1)C(C)C(=O)O", "HIV"],
15
+ ["Cn1c(CN2CCN(CC2)c3ccc(Cl)cc3)nc4ccccc14", "LIPOPHILICITY"],
16
+ ["Cc1cccc(N2CCN(C(=O)C34CC5CC(CC(C5)C3)C4)CC2)c1C", "MUV"],
17
+ ["C([H])([H])([H])[H]", "QM7"],
18
+ ["C(CNCCNCCNCCN)N", "SIDER"],
19
+ ["CCOc1ccc2nc(S(N)(=O)=O)sc2c1", "TOX21"],
20
+ ["CSc1nc(N)nc(-c2cccc(-c3ccc4[nH]ccc4c3)c2)n1", "Pretrained"]
21
+ ]
22
+
23
+ base_huggingface_path = 'ibm/biomed.sm.mv-te-84m'
24
+ finetuned_huggingface_path = "-MoleculeNet-ligand_scaffold-"
25
+
26
+ available_datasets = {
27
+ "BACE": "ibm/biomed.sm.mv-te-84m-MoleculeNet-ligand_scaffold-BACE-101",
28
+ "BBBP": "ibm/biomed.sm.mv-te-84m-MoleculeNet-ligand_scaffold-BBBP-101",
29
+ "CLINTOX": "ibm/biomed.sm.mv-te-84m-MoleculeNet-ligand_scaffold-CLINTOX-101",
30
+ "ESOL": "ibm/biomed.sm.mv-te-84m-MoleculeNet-ligand_scaffold-ESOL-101",
31
+ "FREESOLV": "ibm/biomed.sm.mv-te-84m-MoleculeNet-ligand_scaffold-FREESOLV-101",
32
+ "HIV": "ibm/biomed.sm.mv-te-84m-MoleculeNet-ligand_scaffold-HIV-101",
33
+ "LIPOPHILICITY": "ibm/biomed.sm.mv-te-84m-MoleculeNet-ligand_scaffold-LIPOPHILICITY-101",
34
+ "MUV": "ibm/biomed.sm.mv-te-84m-MoleculeNet-ligand_scaffold-MUV-101",
35
+ "QM7": "ibm/biomed.sm.mv-te-84m-MoleculeNet-ligand_scaffold-QM7-101",
36
+ "SIDER": "ibm/biomed.sm.mv-te-84m-MoleculeNet-ligand_scaffold-SIDER-101",
37
+ "TOX21": "ibm/biomed.sm.mv-te-84m-MoleculeNet-ligand_scaffold-TOX21-101",
38
+ }
39
+
40
+
41
+ class PretrainedSMMVPipeline:
42
+ def __init__(self, pretrained_model_name_or_path: str):
43
+ self.model = SmallMoleculeMultiViewModel.from_pretrained(
44
+ LateFusionStrategy.ATTENTIONAL,
45
+ model_path=pretrained_model_name_or_path,
46
+ huggingface=True
47
+ )
48
+
49
+ def __call__(self, smiles: str) -> float:
50
+ emb = SmallMoleculeMultiViewModel.get_embeddings(
51
+ smiles=smiles,
52
+ pretrained_model=self.model
53
+ )
54
+ return str(emb.tolist())
55
+
56
+
57
+ class FinetunedSMMVPipeline:
58
+ def __init__(self, dataset:str, pretrained_model_name_or_path: str):
59
+ dataset_registry = DatasetRegistry()
60
+ self.ds = dataset_registry.get_dataset_info(dataset)
61
+ self.model = SmallMoleculeMultiViewModel.from_finetuned(
62
+ self.ds,
63
+ model_path=pretrained_model_name_or_path,
64
+ inference_mode=True,
65
+ huggingface=True
66
+ )
67
+
68
+ def __call__(self, smiles: str) -> float:
69
+ prediction = SmallMoleculeMultiViewModel.get_predictions(
70
+ smiles,
71
+ self.ds,
72
+ finetuned_model=self.model
73
+ )
74
+ return str(prediction.tolist())
75
+
76
+
77
+ def deploy():
78
+ print(f"Loading checkpoint: Pretrained from {base_huggingface_path}")
79
+ pipeline_pretrained = PretrainedSMMVPipeline(base_huggingface_path)
80
+
81
+ pipelines_finetuned = {}
82
+ pipelines_finetuned["Pretrained"] = pipeline_pretrained
83
+
84
+ for dataset, huggingface_path in available_datasets.items():
85
+ print(f"Loading checkpoint: {dataset} from {huggingface_path}")
86
+ pipelines_finetuned[dataset] = FinetunedSMMVPipeline(
87
+ dataset=dataset,
88
+ pretrained_model_name_or_path=huggingface_path
89
+ )
90
+
91
+ def pipeline(
92
+ smiles: str,
93
+ dataset: str
94
+ ):
95
+ return pipelines_finetuned[dataset](smiles)
96
+
97
+ smiles_input = gr.Textbox(placeholder="SMILES", label="SMILES")
98
+ datasets_input = gr.Dropdown(
99
+ choices=list(pipelines_finetuned.keys()),
100
+ label="Checkpoint",
101
+ )
102
+ text_output = gr.Textbox(
103
+ max_lines=10,
104
+ label="Prediction",
105
+ )
106
+
107
+ gradio_app = gr.Interface(
108
+ pipeline,
109
+ inputs=[smiles_input, datasets_input],
110
+ outputs=text_output,
111
+ examples=examples,
112
+ examples_per_page=20,
113
+ title="ibm/biomed.sm.mv-te-84m property prediction tasks",
114
+ description="Predictions for Pretrained show embedding vector of base model. Predictions for datasets show output of model finetuned on that task",
115
+ )
116
+
117
+ gradio_app.launch()
118
+
119
+ if __name__ == "__main__":
120
+ deploy()
pre-requirements.txt ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ gradio
2
+ git+https://github.com/BiomedSciAI/biomed-multi-view@main
requirements.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ pyg_lib
2
+ torch_scatter
3
+ torch_cluster
4
+ torch_spline_conv
5
+ -f https://data.pyg.org/whl/torch-2.1.0+cu121.html
6
+ pytorch-fast-transformers==0.4.0