Spaces:
Running
Running
Read from s3 in main.py
Browse files- Dockerfile +1 -1
- app/main.py +26 -12
Dockerfile
CHANGED
@@ -54,4 +54,4 @@ EXPOSE 7681
|
|
54 |
# Set the working directory where your app resides
|
55 |
|
56 |
# Command to run the Gradio app automatically
|
57 |
-
CMD ["python", "app/main.py", "-p", "7681", "-s", "-d", "/data"]
|
|
|
54 |
# Set the working directory where your app resides
|
55 |
|
56 |
# Command to run the Gradio app automatically
|
57 |
+
CMD ["python", "app/main.py", "-p", "7681", "-s", "-d", "/data", "-u", "s3://2023-get-xf2217/get_demo/"]
|
app/main.py
CHANGED
@@ -12,6 +12,7 @@ seq = get_seq()
|
|
12 |
genename_to_uniprot = get_genename_to_uniprot()
|
13 |
lddt = get_lddt()
|
14 |
import sys
|
|
|
15 |
from glob import glob
|
16 |
|
17 |
import numpy as np
|
@@ -26,10 +27,12 @@ args = argparse.ArgumentParser()
|
|
26 |
args.add_argument("-p", "--port", type=int, default=7860, help="Port number")
|
27 |
args.add_argument("-s", "--share", action="store_true", help="Share on network")
|
28 |
args.add_argument("-d", "--data", type=str, default="/data", help="Data directory")
|
|
|
29 |
args = args.parse_args()
|
30 |
# set pseudo args
|
31 |
# args = args.parse_args(['-p', '7869', '-s', '-d', '/manitou/pmg/users/xf2217/demo_data'])
|
32 |
-
|
|
|
33 |
gene_pairs = [os.path.basename(pair) for pair in gene_pairs]
|
34 |
GET_CONFIG = load_config(
|
35 |
"/app/modules/atac_rna_data_processing/atac_rna_data_processing/config/GET"
|
@@ -39,34 +42,45 @@ GET_CONFIG.celltype.num_cls = 2
|
|
39 |
GET_CONFIG.celltype.input = True
|
40 |
GET_CONFIG.celltype.embed = True
|
41 |
GET_CONFIG.celltype.data_dir = (
|
42 |
-
f"{args.
|
43 |
)
|
44 |
GET_CONFIG.celltype.interpret_dir = (
|
45 |
-
f"{args.
|
46 |
)
|
47 |
GET_CONFIG.motif_dir = "/manitou/pmg/users/xf2217/interpret_natac/motif-clustering"
|
48 |
motif = NrMotifV1.load_from_pickle(
|
49 |
pkg_resources.resource_filename("atac_rna_data_processing", "data/NrMotifV1.pkl"),
|
50 |
# GET_CONFIG.motif_dir,
|
51 |
)
|
|
|
|
|
52 |
cell_type_annot = pd.read_csv(
|
53 |
GET_CONFIG.celltype.data_dir.split("fetal_adult")[0]
|
54 |
+ "data/cell_type_pretrain_human_bingren_shendure_apr2023.txt"
|
55 |
)
|
56 |
cell_type_id_to_name = dict(zip(cell_type_annot["id"], cell_type_annot["celltype"]))
|
57 |
cell_type_name_to_id = dict(zip(cell_type_annot["celltype"], cell_type_annot["id"]))
|
58 |
-
|
59 |
-
|
60 |
-
|
61 |
-
|
62 |
-
|
63 |
-
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
64 |
plt.rcParams["figure.dpi"] = 100
|
65 |
|
66 |
|
67 |
def visualize_AF2(tf_pair, a):
|
68 |
-
strcture_dir = f"{args.
|
69 |
-
fasta_dir = f"{args.
|
70 |
if not os.path.exists(strcture_dir):
|
71 |
gr.ErrorText("No such gene pair")
|
72 |
|
@@ -185,7 +199,7 @@ This section enables you to select different cell types and generates a plot tha
|
|
185 |
"""
|
186 |
)
|
187 |
celltype_name = gr.Dropdown(
|
188 |
-
label="Cell Type", choices=
|
189 |
)
|
190 |
celltype_btn = gr.Button(value="Load & plot gene expression")
|
191 |
gene_exp_plot = gr.Plot(label="Gene expression prediction vs observation")
|
|
|
12 |
genename_to_uniprot = get_genename_to_uniprot()
|
13 |
lddt = get_lddt()
|
14 |
import sys
|
15 |
+
import s3fs
|
16 |
from glob import glob
|
17 |
|
18 |
import numpy as np
|
|
|
27 |
args.add_argument("-p", "--port", type=int, default=7860, help="Port number")
|
28 |
args.add_argument("-s", "--share", action="store_true", help="Share on network")
|
29 |
args.add_argument("-d", "--data", type=str, default="/data", help="Data directory")
|
30 |
+
args.add_argument("-u", "--s3_uri", type=str, default="None", help="Path to demo S3 bucket")
|
31 |
args = args.parse_args()
|
32 |
# set pseudo args
|
33 |
# args = args.parse_args(['-p', '7869', '-s', '-d', '/manitou/pmg/users/xf2217/demo_data'])
|
34 |
+
|
35 |
+
gene_pairs = glob(f"{args.s3_path}/structures/causal/*")
|
36 |
gene_pairs = [os.path.basename(pair) for pair in gene_pairs]
|
37 |
GET_CONFIG = load_config(
|
38 |
"/app/modules/atac_rna_data_processing/atac_rna_data_processing/config/GET"
|
|
|
42 |
GET_CONFIG.celltype.input = True
|
43 |
GET_CONFIG.celltype.embed = True
|
44 |
GET_CONFIG.celltype.data_dir = (
|
45 |
+
f"{args.s3_path}/pretrain_human_bingren_shendure_apr2023/fetal_adult/"
|
46 |
)
|
47 |
GET_CONFIG.celltype.interpret_dir = (
|
48 |
+
f"{args.s3_path}/Interpretation_all_hg38_allembed_v4_natac/"
|
49 |
)
|
50 |
GET_CONFIG.motif_dir = "/manitou/pmg/users/xf2217/interpret_natac/motif-clustering"
|
51 |
motif = NrMotifV1.load_from_pickle(
|
52 |
pkg_resources.resource_filename("atac_rna_data_processing", "data/NrMotifV1.pkl"),
|
53 |
# GET_CONFIG.motif_dir,
|
54 |
)
|
55 |
+
GET_CONFIG.s3_path = args.s3_path if args.s3_path else None
|
56 |
+
|
57 |
cell_type_annot = pd.read_csv(
|
58 |
GET_CONFIG.celltype.data_dir.split("fetal_adult")[0]
|
59 |
+ "data/cell_type_pretrain_human_bingren_shendure_apr2023.txt"
|
60 |
)
|
61 |
cell_type_id_to_name = dict(zip(cell_type_annot["id"], cell_type_annot["celltype"]))
|
62 |
cell_type_name_to_id = dict(zip(cell_type_annot["celltype"], cell_type_annot["id"]))
|
63 |
+
if GET_CONFIG.s3_path:
|
64 |
+
s3 = s3fs.S3FileSystem()
|
65 |
+
available_celltypes = sorted(
|
66 |
+
[
|
67 |
+
cell_type_id_to_name[f.split("/")[-1]]
|
68 |
+
for f in s3.glob(GET_CONFIG.celltype.interpret_dir + "*")
|
69 |
+
]
|
70 |
+
)
|
71 |
+
else:
|
72 |
+
available_celltypes = sorted(
|
73 |
+
[
|
74 |
+
cell_type_id_to_name[f.split("/")[-1]]
|
75 |
+
for f in glob(GET_CONFIG.celltype.interpret_dir + "*")
|
76 |
+
]
|
77 |
+
)
|
78 |
plt.rcParams["figure.dpi"] = 100
|
79 |
|
80 |
|
81 |
def visualize_AF2(tf_pair, a):
|
82 |
+
strcture_dir = f"{args.s3_path}/structures/causal/{tf_pair}"
|
83 |
+
fasta_dir = f"{args.s3_path}/sequences/causal/{tf_pair}"
|
84 |
if not os.path.exists(strcture_dir):
|
85 |
gr.ErrorText("No such gene pair")
|
86 |
|
|
|
199 |
"""
|
200 |
)
|
201 |
celltype_name = gr.Dropdown(
|
202 |
+
label="Cell Type", choices=available_celltypes, value='Fetal Astrocyte 1'
|
203 |
)
|
204 |
celltype_btn = gr.Button(value="Load & plot gene expression")
|
205 |
gene_exp_plot = gr.Plot(label="Gene expression prediction vs observation")
|