Spaces:

PascalNotin
/

Tranception_design

Build error

App Files Files Community

PascalNotin commited on Aug 26, 2022

Commit

7275554

•

1 Parent(s): b66f09d

Adjusted heatmap size and device printing

Browse files

Files changed (1) hide show

app.py +10 -7

app.py CHANGED Viewed

@@ -39,12 +39,12 @@ def create_all_single_mutants(sequence,AA_vocab=AA_vocab,mutation_range_start=No
   return all_single_mutants
 def create_scoring_matrix_visual(scores,sequence,AA_vocab=AA_vocab,mutation_range_start=None,mutation_range_end=None,annotate=True,fontsize=20):
   piv=scores.pivot(index='position',columns='target_AA',values='avg_score').round(4)
-  fig, ax = plt.subplots(figsize=(50,len(sequence)*0.6))
   scores_dict = {}
   valid_mutant_set=set(scores.mutant)
-  if mutation_range_start is None: mutation_range_start=1
-  if mutation_range_end is None: mutation_range_end=len(sequence)
   ax.tick_params(bottom=True, top=True, left=True, right=True)
   ax.tick_params(labelbottom=True, labeltop=True, labelleft=True, labelright=True)
   if annotate:
@@ -63,7 +63,6 @@ def create_scoring_matrix_visual(scores,sequence,AA_vocab=AA_vocab,mutation_rang
                 cbar_kws={'label': 'Log likelihood ratio (mutant / starting sequence)'},annot_kws={"size": fontsize})
   heat.figure.axes[-1].yaxis.label.set_size(fontsize=int(fontsize*1.5))
   heat.figure.axes[-1].yaxis.set_ticklabels(heat.figure.axes[-1].yaxis.get_ticklabels(), fontsize=fontsize)
-  #heat.figure.axes[-1].yaxis.set_ticklabels(fontsize=fontsize)
   heat.set_title("Higher predicted scores (green) imply higher protein fitness",fontsize=fontsize*2, pad=40)
   heat.set_ylabel("Sequence position", fontsize = fontsize*2)
   heat.set_xlabel("Amino Acid mutation", fontsize = fontsize*2)
@@ -87,7 +86,6 @@ def suggest_mutations(scores):
   positive_scores = scores[scores.avg_score > 0]
   positive_scores_position_avg = positive_scores.groupby(['position']).mean()
   top_positions=list(positive_scores_position_avg.sort_values(by=['avg_score'],ascending=False).head(5).index.astype(str))
-  print(top_positions)
   position_recos = "The positions with the highest average fitness increase are (only positions with at least one fitness increase are considered):\n {}".format(", ".join(top_positions))
   return intro_message+mutant_recos+position_recos
@@ -115,6 +113,11 @@ def score_and_create_matrix_all_singles(sequence,mutation_range_start=None,mutat
     model = tranception.model_pytorch.TranceptionLMHeadModel.from_pretrained(pretrained_model_name_or_path="PascalNotin/Tranception_Medium")
   elif model_type=="Large":
     model = tranception.model_pytorch.TranceptionLMHeadModel.from_pretrained(pretrained_model_name_or_path="PascalNotin/Tranception_Large")
   model.config.tokenizer = tokenizer
   all_single_mutants = create_all_single_mutants(sequence,AA_vocab,mutation_range_start,mutation_range_end)
   scores = model.score_mutants(DMS_data=all_single_mutants,
@@ -205,10 +208,10 @@ with tranception_design:
     )
     gr.Markdown("<br>")
     gr.Markdown("# Fitness predictions for all single amino acid substitutions in mutation range")
     #output_plot = gr.Plot(label="Fitness predictions for all single amino acid substitutions in mutation range")
     #output_image = gr.Image(label="Fitness predictions for all single amino acid substitutions in mutation range",type="filepath")
-    output_image = gr.Gallery(label="Fitness predictions (inference may take a few seconds for short proteins & mutation ranges to several minutes for longer ones)",type="filepath") #Using Gallery to be able to scroll large matrix images
     output_recommendations = gr.Textbox(label="Mutation recommendations")

   return all_single_mutants
 def create_scoring_matrix_visual(scores,sequence,AA_vocab=AA_vocab,mutation_range_start=None,mutation_range_end=None,annotate=True,fontsize=20):
+  if mutation_range_start is None: mutation_range_start=1
+  if mutation_range_end is None: mutation_range_end=len(sequence)
   piv=scores.pivot(index='position',columns='target_AA',values='avg_score').round(4)
+  fig, ax = plt.subplots(figsize=(min(len(sequence),50),len(sequence)))
   scores_dict = {}
   valid_mutant_set=set(scores.mutant)
   ax.tick_params(bottom=True, top=True, left=True, right=True)
   ax.tick_params(labelbottom=True, labeltop=True, labelleft=True, labelright=True)
   if annotate:
                 cbar_kws={'label': 'Log likelihood ratio (mutant / starting sequence)'},annot_kws={"size": fontsize})
   heat.figure.axes[-1].yaxis.label.set_size(fontsize=int(fontsize*1.5))
   heat.figure.axes[-1].yaxis.set_ticklabels(heat.figure.axes[-1].yaxis.get_ticklabels(), fontsize=fontsize)
   heat.set_title("Higher predicted scores (green) imply higher protein fitness",fontsize=fontsize*2, pad=40)
   heat.set_ylabel("Sequence position", fontsize = fontsize*2)
   heat.set_xlabel("Amino Acid mutation", fontsize = fontsize*2)
   positive_scores = scores[scores.avg_score > 0]
   positive_scores_position_avg = positive_scores.groupby(['position']).mean()
   top_positions=list(positive_scores_position_avg.sort_values(by=['avg_score'],ascending=False).head(5).index.astype(str))
   position_recos = "The positions with the highest average fitness increase are (only positions with at least one fitness increase are considered):\n {}".format(", ".join(top_positions))
   return intro_message+mutant_recos+position_recos
     model = tranception.model_pytorch.TranceptionLMHeadModel.from_pretrained(pretrained_model_name_or_path="PascalNotin/Tranception_Medium")
   elif model_type=="Large":
     model = tranception.model_pytorch.TranceptionLMHeadModel.from_pretrained(pretrained_model_name_or_path="PascalNotin/Tranception_Large")
+  if torch.cuda.is_available():
+    model.cuda()
+    print("Inference will take place on GPU")
+  else:
+    print("Inference will take place on CPU")
   model.config.tokenizer = tokenizer
   all_single_mutants = create_all_single_mutants(sequence,AA_vocab,mutation_range_start,mutation_range_end)
   scores = model.score_mutants(DMS_data=all_single_mutants,
     )
     gr.Markdown("<br>")
     gr.Markdown("# Fitness predictions for all single amino acid substitutions in mutation range")
+    gr.Markdown("Inference may take a few seconds for short proteins & mutation ranges to several minutes for longer ones")
     #output_plot = gr.Plot(label="Fitness predictions for all single amino acid substitutions in mutation range")
     #output_image = gr.Image(label="Fitness predictions for all single amino acid substitutions in mutation range",type="filepath")
+    output_image = gr.Gallery(label="Fitness predictions for all single amino acid substitutions in mutation range",type="filepath") #Using Gallery to be able to scroll large matrix images
     output_recommendations = gr.Textbox(label="Mutation recommendations")