abuendia commited on
Commit
d02ce4c
1 Parent(s): f3ac683

Pass URI through GET config

Browse files
Files changed (2) hide show
  1. Dockerfile +1 -1
  2. app/main.py +40 -32
Dockerfile CHANGED
@@ -54,4 +54,4 @@ EXPOSE 7681
54
  # Set the working directory where your app resides
55
 
56
  # Command to run the Gradio app automatically
57
- CMD ["python", "app/main.py", "-p", "7681", "-s", "-d", "/data", "-u", "s3://2023-get-xf2217/get_demo/"]
 
54
  # Set the working directory where your app resides
55
 
56
  # Command to run the Gradio app automatically
57
+ CMD ["python", "app/main.py", "-p", "7681", "-s", "-u", "s3://2023-get-xf2217/get_demo", "-d", "/data"]
app/main.py CHANGED
@@ -6,34 +6,31 @@ import matplotlib.pyplot as plt
6
  import pandas as pd
7
  import pkg_resources
8
  from dash_bio import Clustergram
9
- from proscope.data import get_genename_to_uniprot, get_lddt, get_seq
10
-
11
- seq = get_seq()
12
- genename_to_uniprot = get_genename_to_uniprot()
13
- lddt = get_lddt()
14
  import sys
15
  import s3fs
16
  from glob import glob
17
-
18
  import numpy as np
 
19
  from atac_rna_data_processing.config.load_config import load_config
20
  from atac_rna_data_processing.io.celltype import GETCellType
21
  from atac_rna_data_processing.io.nr_motif_v1 import NrMotifV1
22
  from proscope.af2 import AFPairseg
 
23
  from proscope.protein import Protein
24
  from proscope.viewer import view_pdb_html
25
 
 
 
 
 
 
26
  args = argparse.ArgumentParser()
27
  args.add_argument("-p", "--port", type=int, default=7860, help="Port number")
28
  args.add_argument("-s", "--share", action="store_true", help="Share on network")
29
- args.add_argument("-d", "--data", type=str, default="/data", help="Data directory")
30
  args.add_argument("-u", "--s3_uri", type=str, default="None", help="Path to demo S3 bucket")
 
31
  args = args.parse_args()
32
- # set pseudo args
33
- # args = args.parse_args(['-p', '7869', '-s', '-d', '/manitou/pmg/users/xf2217/demo_data'])
34
 
35
- gene_pairs = glob(f"{args.s3_path}/structures/causal/*")
36
- gene_pairs = [os.path.basename(pair) for pair in gene_pairs]
37
  GET_CONFIG = load_config(
38
  "/app/modules/atac_rna_data_processing/atac_rna_data_processing/config/GET"
39
  )
@@ -41,46 +38,57 @@ GET_CONFIG.celltype.jacob = True
41
  GET_CONFIG.celltype.num_cls = 2
42
  GET_CONFIG.celltype.input = True
43
  GET_CONFIG.celltype.embed = True
44
- GET_CONFIG.celltype.data_dir = (
45
- f"{args.s3_path}/pretrain_human_bingren_shendure_apr2023/fetal_adult/"
46
- )
47
- GET_CONFIG.celltype.interpret_dir = (
48
- f"{args.s3_path}/Interpretation_all_hg38_allembed_v4_natac/"
49
- )
50
- GET_CONFIG.motif_dir = "/manitou/pmg/users/xf2217/interpret_natac/motif-clustering"
51
- motif = NrMotifV1.load_from_pickle(
52
- pkg_resources.resource_filename("atac_rna_data_processing", "data/NrMotifV1.pkl"),
53
- # GET_CONFIG.motif_dir,
54
- )
55
- GET_CONFIG.s3_path = args.s3_path if args.s3_path else None
56
 
57
- cell_type_annot = pd.read_csv(
58
- GET_CONFIG.celltype.data_dir.split("fetal_adult")[0]
59
- + "data/cell_type_pretrain_human_bingren_shendure_apr2023.txt"
60
- )
61
- cell_type_id_to_name = dict(zip(cell_type_annot["id"], cell_type_annot["celltype"]))
62
- cell_type_name_to_id = dict(zip(cell_type_annot["celltype"], cell_type_annot["id"]))
63
- if GET_CONFIG.s3_path:
64
  s3 = s3fs.S3FileSystem()
 
 
 
 
 
 
 
65
  available_celltypes = sorted(
66
  [
67
  cell_type_id_to_name[f.split("/")[-1]]
68
  for f in s3.glob(GET_CONFIG.celltype.interpret_dir + "*")
69
  ]
70
  )
 
71
  else:
 
 
 
 
 
 
 
72
  available_celltypes = sorted(
73
  [
74
  cell_type_id_to_name[f.split("/")[-1]]
75
  for f in glob(GET_CONFIG.celltype.interpret_dir + "*")
76
  ]
77
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
78
  plt.rcParams["figure.dpi"] = 100
79
 
80
 
81
  def visualize_AF2(tf_pair, a):
82
- strcture_dir = f"{args.s3_path}/structures/causal/{tf_pair}"
83
- fasta_dir = f"{args.s3_path}/sequences/causal/{tf_pair}"
84
  if not os.path.exists(strcture_dir):
85
  gr.ErrorText("No such gene pair")
86
 
 
6
  import pandas as pd
7
  import pkg_resources
8
  from dash_bio import Clustergram
 
 
 
 
 
9
  import sys
10
  import s3fs
11
  from glob import glob
 
12
  import numpy as np
13
+
14
  from atac_rna_data_processing.config.load_config import load_config
15
  from atac_rna_data_processing.io.celltype import GETCellType
16
  from atac_rna_data_processing.io.nr_motif_v1 import NrMotifV1
17
  from proscope.af2 import AFPairseg
18
+ from proscope.data import get_genename_to_uniprot, get_lddt, get_seq
19
  from proscope.protein import Protein
20
  from proscope.viewer import view_pdb_html
21
 
22
+
23
+ seq = get_seq()
24
+ genename_to_uniprot = get_genename_to_uniprot()
25
+ lddt = get_lddt()
26
+
27
  args = argparse.ArgumentParser()
28
  args.add_argument("-p", "--port", type=int, default=7860, help="Port number")
29
  args.add_argument("-s", "--share", action="store_true", help="Share on network")
 
30
  args.add_argument("-u", "--s3_uri", type=str, default="None", help="Path to demo S3 bucket")
31
+ args.add_argument("-d", "--data", type=str, default="None", help="Data directory")
32
  args = args.parse_args()
 
 
33
 
 
 
34
  GET_CONFIG = load_config(
35
  "/app/modules/atac_rna_data_processing/atac_rna_data_processing/config/GET"
36
  )
 
38
  GET_CONFIG.celltype.num_cls = 2
39
  GET_CONFIG.celltype.input = True
40
  GET_CONFIG.celltype.embed = True
 
 
 
 
 
 
 
 
 
 
 
 
41
 
42
+ if args.s3_uri: # Use S3 path if exists
43
+ GET_CONFIG.s3_uri = args.s3_uri
 
 
 
 
 
44
  s3 = s3fs.S3FileSystem()
45
+ GET_CONFIG.celltype.data_dir = (
46
+ f"{args.s3_uri}/pretrain_human_bingren_shendure_apr2023/fetal_adult/"
47
+ )
48
+ GET_CONFIG.celltype.interpret_dir = (
49
+ f"{args.s3_uri}/Interpretation_all_hg38_allembed_v4_natac/"
50
+ )
51
+ GET_CONFIG.motif_dir = f"{args.s3_uri}/interpret_natac/motif-clustering"
52
  available_celltypes = sorted(
53
  [
54
  cell_type_id_to_name[f.split("/")[-1]]
55
  for f in s3.glob(GET_CONFIG.celltype.interpret_dir + "*")
56
  ]
57
  )
58
+ gene_pairs = s3.glob(f"{args.s3_uri}/structures/causal/*")
59
  else:
60
+ GET_CONFIG.celltype.data_dir = (
61
+ f"{args.data}/pretrain_human_bingren_shendure_apr2023/fetal_adult/"
62
+ )
63
+ GET_CONFIG.celltype.interpret_dir = (
64
+ f"{args.data}/Interpretation_all_hg38_allembed_v4_natac/"
65
+ )
66
+ GET_CONFIG.motif_dir = f"{args.data}/interpret_natac/motif-clustering"
67
  available_celltypes = sorted(
68
  [
69
  cell_type_id_to_name[f.split("/")[-1]]
70
  for f in glob(GET_CONFIG.celltype.interpret_dir + "*")
71
  ]
72
  )
73
+ gene_pairs = glob(f"{args.data}/structures/causal/*")
74
+
75
+ gene_pairs = [os.path.basename(pair) for pair in gene_pairs]
76
+ motif = NrMotifV1.load_from_pickle(
77
+ pkg_resources.resource_filename("atac_rna_data_processing", "data/NrMotifV1.pkl"),
78
+ GET_CONFIG.motif_dir,
79
+ )
80
+ cell_type_annot = pd.read_csv(
81
+ GET_CONFIG.celltype.data_dir.split("fetal_adult")[0]
82
+ + "data/cell_type_pretrain_human_bingren_shendure_apr2023.txt"
83
+ )
84
+ cell_type_id_to_name = dict(zip(cell_type_annot["id"], cell_type_annot["celltype"]))
85
+ cell_type_name_to_id = dict(zip(cell_type_annot["celltype"], cell_type_annot["id"]))
86
  plt.rcParams["figure.dpi"] = 100
87
 
88
 
89
  def visualize_AF2(tf_pair, a):
90
+ strcture_dir = f"{args.s3_uri}/structures/causal/{tf_pair}"
91
+ fasta_dir = f"{args.s3_uri}/sequences/causal/{tf_pair}"
92
  if not os.path.exists(strcture_dir):
93
  gr.ErrorText("No such gene pair")
94