fuxialexander commited on
Commit
764b22c
1 Parent(s): 558f756
Files changed (2) hide show
  1. app/main.py +45 -32
  2. modules/proscope +1 -1
app/main.py CHANGED
@@ -127,7 +127,7 @@ def plot_motif_corr(cell):
127
  link_method="ward",
128
  display_ratio=0.1,
129
  width=600,
130
- height=400,
131
  color_map="rdbu_r",
132
  )
133
  fig["layout"].update(coloraxis_showscale=False)
@@ -144,7 +144,7 @@ if __name__ == "__main__":
144
  """
145
  # GET: A Foundation Model of Transcription Across Human Cell Types
146
 
147
- _Transcriptional regulation, involving the complex interplay between regulatory sequences and proteins,
148
  directs all biological processes. Computational models of transcriptions lack generalizability
149
  to accurately extrapolate in unseen cell types and conditions. Here, we introduce GET,
150
  an interpretable foundation model, designed to uncover deep regulatory patterns across 235 human fetal and adult cell types.
@@ -158,7 +158,7 @@ if __name__ == "__main__":
158
  In particular, we show GET outperforms current models in predicting lentivirus-based massive parallel reporter assay readout with reduced input data.
159
  In fetal erythroblast, we are able to identify distant (>1Mbps) regulatory regions that were missed by previous models.
160
  In sum, we provide a generalizable and predictive cell type specific model for transcription together with catalogs of gene regulation and transcription factor interactions.
161
- Benefit from this catalog, we are able to provide mechanistic understanding of previously unknown significance germline coding variants in disordered regions of PAX5, a lymphoma associated transcription factor._
162
  """
163
  )
164
 
@@ -168,56 +168,56 @@ if __name__ == "__main__":
168
  gr.Markdown(
169
  """
170
  ## Prediction performance
171
- This section allows the selection of cell types and provides a plot depicting the observed versus predicted gene expression levels.
172
  """
173
  )
174
- with gr.Row() as row:
175
- celltype_name = gr.Dropdown(
176
- label="Cell Type", choices=avaliable_celltypes
177
- )
178
- celltype_btn = gr.Button(value="Load & Plot Gene Expression")
179
- gene_exp_plot = gr.Plot(label="Gene Expression Pred vs Obs")
180
 
181
  # Right column: Plot gene motifs
182
  with gr.Column():
183
  gr.Markdown(
184
  """
185
  ## Cell-type specific regulatory inference
186
- This section allows the selection of a gene and provides plots of its cell-type specific regulatory regions and motifs.
187
  """
188
  )
189
  gene_name_for_region = gr.Textbox(
190
- label="Get important regions or motifs for gene:"
191
  )
192
  with gr.Row() as row:
193
  region_plot_btn = gr.Button(value="Regions")
194
  motif_plot_btn = gr.Button(value="Motifs")
195
 
196
- region_plot = gr.Plot(label="Gene Regions")
197
- motif_plot = gr.Plot(label="Gene Motifs")
198
 
199
  gr.Markdown(
200
  """
201
  ## Motif correlation and causal subnetworks
202
 
203
- Here, you can generate a heatmap to visualize motif correlations. Alternatively, you can explore the causal subnetworks related to specific motifs by selecting the motif and the type of subnetwork you are interested in, along with a effect size threshold.
 
204
  """
205
  )
206
  with gr.Row() as row:
207
  with gr.Column():
208
- clustergram_btn = gr.Button(value="Plot Motif Correlation Heatmap")
209
- clustergram_plot = gr.Plot(label="Motif Correlation")
210
 
211
  # Right column: Motif subnet plot
212
  with gr.Column():
213
  with gr.Row() as row:
214
  motif_for_subnet = gr.Dropdown(
215
- label="Motif Causal Subnetwork", choices=motif.cluster_names
216
  )
217
  subnet_type = gr.Dropdown(
218
- label="Type",
219
  choices=["neighbors", "parents", "children"],
220
- default="neighbors",
221
  )
222
  # slider for threshold 0.01-0.2
223
  subnet_threshold = gr.Slider(
@@ -234,27 +234,40 @@ Here, you can generate a heatmap to visualize motif correlations. Alternatively,
234
  """
235
  ## Structural atlas of TF-TF and TF-EP300 interactions
236
 
237
- This section allows you to explore transcription factor pairs. You can visualize various metrics such as Heatmaps and pLDDT (predicted Local Distance Difference Test) for both proteins in the interacting pair. You can also download the PDB file for specific segment pairs.
 
 
 
 
 
 
 
 
 
 
238
  """
239
  )
 
240
  with gr.Row() as row:
241
  with gr.Column():
242
- with gr.Row() as row:
243
- tf_pairs = gr.Dropdown(label="TF pair", choices=gene_pairs)
244
- tf_pairs_btn = gr.Button(value="Load & Plot")
245
- heatmap = gr.Plot(label="Heatmap")
246
- interact_plddt1 = gr.Plot(label="Interact pLDDT 1")
247
- interact_plddt2 = gr.Plot(label="Interact pLDDT 2")
248
  protein1_plddt = gr.Plot(label="Protein 1 pLDDT")
 
 
249
  protein2_plddt = gr.Plot(label="Protein 2 pLDDT")
250
-
 
 
251
  with gr.Column():
252
- with gr.Row() as row:
253
- segpair = gr.Dropdown(label="Seg pair", choices=seg_pairs.value)
254
- segpair_btn = gr.Button(value="Get PDB")
 
 
 
 
255
  pdb_html = gr.HTML(label="PDB HTML")
256
  pdb_file = gr.File(label="Download PDB")
257
-
258
  tf_pairs_btn.click(
259
  visualize_AF2,
260
  inputs=[tf_pairs, af],
 
127
  link_method="ward",
128
  display_ratio=0.1,
129
  width=600,
130
+ height=350,
131
  color_map="rdbu_r",
132
  )
133
  fig["layout"].update(coloraxis_showscale=False)
 
144
  """
145
  # GET: A Foundation Model of Transcription Across Human Cell Types
146
 
147
+ Transcriptional regulation, involving the complex interplay between regulatory sequences and proteins,
148
  directs all biological processes. Computational models of transcriptions lack generalizability
149
  to accurately extrapolate in unseen cell types and conditions. Here, we introduce GET,
150
  an interpretable foundation model, designed to uncover deep regulatory patterns across 235 human fetal and adult cell types.
 
158
  In particular, we show GET outperforms current models in predicting lentivirus-based massive parallel reporter assay readout with reduced input data.
159
  In fetal erythroblast, we are able to identify distant (>1Mbps) regulatory regions that were missed by previous models.
160
  In sum, we provide a generalizable and predictive cell type specific model for transcription together with catalogs of gene regulation and transcription factor interactions.
161
+ Benefit from this catalog, we are able to provide mechanistic understanding of a previously unknown significance germline coding variant in disordered regions of PAX5, a lymphoma associated transcription factor.
162
  """
163
  )
164
 
 
168
  gr.Markdown(
169
  """
170
  ## Prediction performance
171
+ This section allows the selection of cell types and provides a plot depicting the observed versus predicted gene expression levels. Note that cell type without observed gene expression data will show a vertical line at 0.
172
  """
173
  )
174
+ celltype_name = gr.Dropdown(
175
+ label="Cell Type", choices=avaliable_celltypes, value='Fetal Astrocyte 1'
176
+ )
177
+ celltype_btn = gr.Button(value="Load & plot gene expression")
178
+ gene_exp_plot = gr.Plot(label="Gene expression prediction vs observation")
 
179
 
180
  # Right column: Plot gene motifs
181
  with gr.Column():
182
  gr.Markdown(
183
  """
184
  ## Cell-type specific regulatory inference
185
+ This section allows the selection of a gene and provides plots of its cell-type specific regulatory regions and expression-promoting motifs. Hovering over the highlighted (top 10%) regions will show the regional motifs and their score.
186
  """
187
  )
188
  gene_name_for_region = gr.Textbox(
189
+ label="Get important regions or motifs for gene:", value="BCL11A"
190
  )
191
  with gr.Row() as row:
192
  region_plot_btn = gr.Button(value="Regions")
193
  motif_plot_btn = gr.Button(value="Motifs")
194
 
195
+ region_plot = gr.Plot(label="Important regions")
196
+ motif_plot = gr.Plot(label="Important motifs")
197
 
198
  gr.Markdown(
199
  """
200
  ## Motif correlation and causal subnetworks
201
 
202
+ Here, you can generate a heatmap to visualize motif correlations. You can also explore the causal subnetworks related to specific motifs by selecting the motif and the type of subnetwork you are interested in, along with a effect size threshold.
203
+ Node size represents the mean expression value of TFs associated with the motif. Edge width represents the effect size of the interaction. Red edges represent positive effect, while blue edges represent negative effect.
204
  """
205
  )
206
  with gr.Row() as row:
207
  with gr.Column():
208
+ clustergram_btn = gr.Button(value="Plot motif correlation heatmap")
209
+ clustergram_plot = gr.Plot(label="Motif correlation")
210
 
211
  # Right column: Motif subnet plot
212
  with gr.Column():
213
  with gr.Row() as row:
214
  motif_for_subnet = gr.Dropdown(
215
+ label="Motif causal subnetwork", choices=motif.cluster_names, value='KLF/SP/2'
216
  )
217
  subnet_type = gr.Dropdown(
218
+ label="Interaction type",
219
  choices=["neighbors", "parents", "children"],
220
+ value="neighbors",
221
  )
222
  # slider for threshold 0.01-0.2
223
  subnet_threshold = gr.Slider(
 
234
  """
235
  ## Structural atlas of TF-TF and TF-EP300 interactions
236
 
237
+ This section allows you to explore transcription factor pairs identified in the causal network. You can visualize various metrics such as Heatmaps and pLDDT (predicted Local Distance Difference Test) for both proteins in the interacting pair.
238
+ The top row is the pLDDT segmentation plot for the two TF. pLDDT is a good measure of protein disorderness. We use it to identify the disordered regions of the protein.
239
+ Each TF is splited into disordered segments and ordered segments and named numerically as ZFX_0, ZFX_1, etc. The disordered segments are labeled with red color. Annotation from Uniprot is also provided when available.
240
+ The second row is the interaction pLDDT plot. In this plot, we performed all-against-all AlphaFold2 predictions for the segments of the two TFs and plot the pLDDT score for each segment pair in comparison to the pLDDT score of the monomer structure of the two TFs.
241
+ If we find a region that has a higher pLDDT score than the monomer structure, we can infer that this region is stabilized by the interaction between the two TFs.
242
+ The third row is the heatmap plot. In this plot, we plot the interaction score for each segment pair, which includes:
243
+ - interchain min pAE: smaller is better. This is the minimum predicted AlphaFold2 pAE score between the two segments. Well-bound protein-protein interactions ususally have a low interchain pAE score.
244
+ - mean pLDDT: larger is better. This is the mean predicted AlphaFold2 pLDDT score of the two segments, a measure of prediction confidence or (inverse-)disorderness.
245
+ - ipTM: larger is better. This is the interaction interface TM score of the two segments, a measure of the quality of the predicted interactions produced by AlphaFold2.
246
+ - pDockQ: larger is better. This is the pDockQ score of the two segments, which is a measure of the quality of the predicted interactions.
247
+ You can download the PDB file for specific segment pairs by clicking the 'Get PDB' button.
248
  """
249
  )
250
+
251
  with gr.Row() as row:
252
  with gr.Column():
 
 
 
 
 
 
253
  protein1_plddt = gr.Plot(label="Protein 1 pLDDT")
254
+ interact_plddt1 = gr.Plot(label="Interact pLDDT 1")
255
+ with gr.Column():
256
  protein2_plddt = gr.Plot(label="Protein 2 pLDDT")
257
+ interact_plddt2 = gr.Plot(label="Interact pLDDT 2")
258
+
259
+ with gr.Row() as row:
260
  with gr.Column():
261
+ tf_pairs = gr.Dropdown(label="TF pair", choices=gene_pairs)
262
+ tf_pairs_btn = gr.Button(value="Load & Plot")
263
+ heatmap = gr.Plot(label="Heatmap")
264
+
265
+ with gr.Column():
266
+ segpair = gr.Dropdown(label="Seg pair", choices=seg_pairs.value)
267
+ segpair_btn = gr.Button(value="Get PDB")
268
  pdb_html = gr.HTML(label="PDB HTML")
269
  pdb_file = gr.File(label="Download PDB")
270
+
271
  tf_pairs_btn.click(
272
  visualize_AF2,
273
  inputs=[tf_pairs, af],
modules/proscope CHANGED
@@ -1 +1 @@
1
- Subproject commit 55ad8eb1b962471f4e1f9b3b451a20d31713bb8e
 
1
+ Subproject commit 4d3e34581207257cce31758bda8f3ea2b15e260b