abuendia commited on
Commit
f3ac683
1 Parent(s): bfb3f96

Read from s3 in main.py

Browse files
Files changed (2) hide show
  1. Dockerfile +1 -1
  2. app/main.py +26 -12
Dockerfile CHANGED
@@ -54,4 +54,4 @@ EXPOSE 7681
54
  # Set the working directory where your app resides
55
 
56
  # Command to run the Gradio app automatically
57
- CMD ["python", "app/main.py", "-p", "7681", "-s", "-d", "/data"]
 
54
  # Set the working directory where your app resides
55
 
56
  # Command to run the Gradio app automatically
57
+ CMD ["python", "app/main.py", "-p", "7681", "-s", "-d", "/data", "-u", "s3://2023-get-xf2217/get_demo/"]
app/main.py CHANGED
@@ -12,6 +12,7 @@ seq = get_seq()
12
  genename_to_uniprot = get_genename_to_uniprot()
13
  lddt = get_lddt()
14
  import sys
 
15
  from glob import glob
16
 
17
  import numpy as np
@@ -26,10 +27,12 @@ args = argparse.ArgumentParser()
26
  args.add_argument("-p", "--port", type=int, default=7860, help="Port number")
27
  args.add_argument("-s", "--share", action="store_true", help="Share on network")
28
  args.add_argument("-d", "--data", type=str, default="/data", help="Data directory")
 
29
  args = args.parse_args()
30
  # set pseudo args
31
  # args = args.parse_args(['-p', '7869', '-s', '-d', '/manitou/pmg/users/xf2217/demo_data'])
32
- gene_pairs = glob(f"{args.data}/structures/causal/*")
 
33
  gene_pairs = [os.path.basename(pair) for pair in gene_pairs]
34
  GET_CONFIG = load_config(
35
  "/app/modules/atac_rna_data_processing/atac_rna_data_processing/config/GET"
@@ -39,34 +42,45 @@ GET_CONFIG.celltype.num_cls = 2
39
  GET_CONFIG.celltype.input = True
40
  GET_CONFIG.celltype.embed = True
41
  GET_CONFIG.celltype.data_dir = (
42
- f"{args.data}/pretrain_human_bingren_shendure_apr2023/fetal_adult/"
43
  )
44
  GET_CONFIG.celltype.interpret_dir = (
45
- f"{args.data}/Interpretation_all_hg38_allembed_v4_natac/"
46
  )
47
  GET_CONFIG.motif_dir = "/manitou/pmg/users/xf2217/interpret_natac/motif-clustering"
48
  motif = NrMotifV1.load_from_pickle(
49
  pkg_resources.resource_filename("atac_rna_data_processing", "data/NrMotifV1.pkl"),
50
  # GET_CONFIG.motif_dir,
51
  )
 
 
52
  cell_type_annot = pd.read_csv(
53
  GET_CONFIG.celltype.data_dir.split("fetal_adult")[0]
54
  + "data/cell_type_pretrain_human_bingren_shendure_apr2023.txt"
55
  )
56
  cell_type_id_to_name = dict(zip(cell_type_annot["id"], cell_type_annot["celltype"]))
57
  cell_type_name_to_id = dict(zip(cell_type_annot["celltype"], cell_type_annot["id"]))
58
- avaliable_celltypes = sorted(
59
- [
60
- cell_type_id_to_name[f.split("/")[-1]]
61
- for f in glob(GET_CONFIG.celltype.interpret_dir + "*")
62
- ]
63
- )
 
 
 
 
 
 
 
 
 
64
  plt.rcParams["figure.dpi"] = 100
65
 
66
 
67
  def visualize_AF2(tf_pair, a):
68
- strcture_dir = f"{args.data}/structures/causal/{tf_pair}"
69
- fasta_dir = f"{args.data}/sequences/causal/{tf_pair}"
70
  if not os.path.exists(strcture_dir):
71
  gr.ErrorText("No such gene pair")
72
 
@@ -185,7 +199,7 @@ This section enables you to select different cell types and generates a plot tha
185
  """
186
  )
187
  celltype_name = gr.Dropdown(
188
- label="Cell Type", choices=avaliable_celltypes, value='Fetal Astrocyte 1'
189
  )
190
  celltype_btn = gr.Button(value="Load & plot gene expression")
191
  gene_exp_plot = gr.Plot(label="Gene expression prediction vs observation")
 
12
  genename_to_uniprot = get_genename_to_uniprot()
13
  lddt = get_lddt()
14
  import sys
15
+ import s3fs
16
  from glob import glob
17
 
18
  import numpy as np
 
27
  args.add_argument("-p", "--port", type=int, default=7860, help="Port number")
28
  args.add_argument("-s", "--share", action="store_true", help="Share on network")
29
  args.add_argument("-d", "--data", type=str, default="/data", help="Data directory")
30
+ args.add_argument("-u", "--s3_uri", type=str, default="None", help="Path to demo S3 bucket")
31
  args = args.parse_args()
32
  # set pseudo args
33
  # args = args.parse_args(['-p', '7869', '-s', '-d', '/manitou/pmg/users/xf2217/demo_data'])
34
+
35
+ gene_pairs = glob(f"{args.s3_path}/structures/causal/*")
36
  gene_pairs = [os.path.basename(pair) for pair in gene_pairs]
37
  GET_CONFIG = load_config(
38
  "/app/modules/atac_rna_data_processing/atac_rna_data_processing/config/GET"
 
42
  GET_CONFIG.celltype.input = True
43
  GET_CONFIG.celltype.embed = True
44
  GET_CONFIG.celltype.data_dir = (
45
+ f"{args.s3_path}/pretrain_human_bingren_shendure_apr2023/fetal_adult/"
46
  )
47
  GET_CONFIG.celltype.interpret_dir = (
48
+ f"{args.s3_path}/Interpretation_all_hg38_allembed_v4_natac/"
49
  )
50
  GET_CONFIG.motif_dir = "/manitou/pmg/users/xf2217/interpret_natac/motif-clustering"
51
  motif = NrMotifV1.load_from_pickle(
52
  pkg_resources.resource_filename("atac_rna_data_processing", "data/NrMotifV1.pkl"),
53
  # GET_CONFIG.motif_dir,
54
  )
55
+ GET_CONFIG.s3_path = args.s3_path if args.s3_path else None
56
+
57
  cell_type_annot = pd.read_csv(
58
  GET_CONFIG.celltype.data_dir.split("fetal_adult")[0]
59
  + "data/cell_type_pretrain_human_bingren_shendure_apr2023.txt"
60
  )
61
  cell_type_id_to_name = dict(zip(cell_type_annot["id"], cell_type_annot["celltype"]))
62
  cell_type_name_to_id = dict(zip(cell_type_annot["celltype"], cell_type_annot["id"]))
63
+ if GET_CONFIG.s3_path:
64
+ s3 = s3fs.S3FileSystem()
65
+ available_celltypes = sorted(
66
+ [
67
+ cell_type_id_to_name[f.split("/")[-1]]
68
+ for f in s3.glob(GET_CONFIG.celltype.interpret_dir + "*")
69
+ ]
70
+ )
71
+ else:
72
+ available_celltypes = sorted(
73
+ [
74
+ cell_type_id_to_name[f.split("/")[-1]]
75
+ for f in glob(GET_CONFIG.celltype.interpret_dir + "*")
76
+ ]
77
+ )
78
  plt.rcParams["figure.dpi"] = 100
79
 
80
 
81
  def visualize_AF2(tf_pair, a):
82
+ strcture_dir = f"{args.s3_path}/structures/causal/{tf_pair}"
83
+ fasta_dir = f"{args.s3_path}/sequences/causal/{tf_pair}"
84
  if not os.path.exists(strcture_dir):
85
  gr.ErrorText("No such gene pair")
86
 
 
199
  """
200
  )
201
  celltype_name = gr.Dropdown(
202
+ label="Cell Type", choices=available_celltypes, value='Fetal Astrocyte 1'
203
  )
204
  celltype_btn = gr.Button(value="Load & plot gene expression")
205
  gene_exp_plot = gr.Plot(label="Gene expression prediction vs observation")