abuendia commited on
Commit
5c01795
1 Parent(s): fc3d984

Tested impl

Browse files
Files changed (2) hide show
  1. Dockerfile +2 -2
  2. app/main.py +25 -15
Dockerfile CHANGED
@@ -6,7 +6,7 @@ WORKDIR /app
6
 
7
 
8
  # Create a new environment using mamba with specified packages
9
- RUN micromamba install -n base -c conda-forge -c bioconda -y python=3.10 pip biopython
10
  RUN micromamba install -n base -c conda-forge -c bioconda -y nglview tqdm matplotlib pandas
11
  RUN micromamba install -n base -c conda-forge -c bioconda -y openpyxl pyarrow python-box xmlschema seaborn numpy py3Dmol pyranges scipy pyyaml zarr numcodecs
12
  RUN micromamba install -n base -c conda-forge -c bioconda -y pybigwig networkx plotly pysam requests seqlogo MOODS urllib3 pyliftover gprofiler-official pyfaidx
@@ -54,4 +54,4 @@ EXPOSE 7681
54
  # Set the working directory where your app resides
55
 
56
  # Command to run the Gradio app automatically
57
- CMD ["python", "app/main.py", "-p", "7681", "-s", "-u", "s3://2023-get-xf2217/get_demo", "-d", "/data"]
 
6
 
7
 
8
  # Create a new environment using mamba with specified packages
9
+ RUN micromamba install -n base -c conda-forge -c bioconda -y python=3.10 pip biopython s3fs
10
  RUN micromamba install -n base -c conda-forge -c bioconda -y nglview tqdm matplotlib pandas
11
  RUN micromamba install -n base -c conda-forge -c bioconda -y openpyxl pyarrow python-box xmlschema seaborn numpy py3Dmol pyranges scipy pyyaml zarr numcodecs
12
  RUN micromamba install -n base -c conda-forge -c bioconda -y pybigwig networkx plotly pysam requests seqlogo MOODS urllib3 pyliftover gprofiler-official pyfaidx
 
54
  # Set the working directory where your app resides
55
 
56
  # Command to run the Gradio app automatically
57
+ CMD ["python", "app/main.py", "-p", "7681", "-s", "-u", "s3://2023-get-xf2217/get_demo_test_data", "-d", "/data"]
app/main.py CHANGED
@@ -32,12 +32,13 @@ args.add_argument("-d", "--data", type=str, default="None", help="Data directory
32
  args = args.parse_args()
33
 
34
  GET_CONFIG = load_config(
35
- "/app/modules/atac_rna_data_processing/atac_rna_data_processing/config/GET"
36
  )
37
  GET_CONFIG.celltype.jacob = True
38
  GET_CONFIG.celltype.num_cls = 2
39
  GET_CONFIG.celltype.input = True
40
  GET_CONFIG.celltype.embed = True
 
41
 
42
  if args.s3_uri: # Use S3 path if exists
43
  GET_CONFIG.s3_uri = args.s3_uri
@@ -49,6 +50,12 @@ if args.s3_uri: # Use S3 path if exists
49
  f"{args.s3_uri}/Interpretation_all_hg38_allembed_v4_natac/"
50
  )
51
  GET_CONFIG.motif_dir = f"{args.s3_uri}/interpret_natac/motif-clustering"
 
 
 
 
 
 
52
  available_celltypes = sorted(
53
  [
54
  cell_type_id_to_name[f.split("/")[-1]]
@@ -56,7 +63,12 @@ if args.s3_uri: # Use S3 path if exists
56
  ]
57
  )
58
  gene_pairs = s3.glob(f"{args.s3_uri}/structures/causal/*")
59
- else:
 
 
 
 
 
60
  GET_CONFIG.celltype.data_dir = (
61
  f"{args.data}/pretrain_human_bingren_shendure_apr2023/fetal_adult/"
62
  )
@@ -64,6 +76,12 @@ else:
64
  f"{args.data}/Interpretation_all_hg38_allembed_v4_natac/"
65
  )
66
  GET_CONFIG.motif_dir = f"{args.data}/interpret_natac/motif-clustering"
 
 
 
 
 
 
67
  available_celltypes = sorted(
68
  [
69
  cell_type_id_to_name[f.split("/")[-1]]
@@ -71,19 +89,11 @@ else:
71
  ]
72
  )
73
  gene_pairs = glob(f"{args.data}/structures/causal/*")
74
-
75
- gene_pairs = [os.path.basename(pair) for pair in gene_pairs]
76
- motif = NrMotifV1.load_from_pickle(
77
- pkg_resources.resource_filename("atac_rna_data_processing", "data/NrMotifV1.pkl"),
78
- GET_CONFIG.motif_dir,
79
- )
80
- cell_type_annot = pd.read_csv(
81
- GET_CONFIG.celltype.data_dir.split("fetal_adult")[0]
82
- + "data/cell_type_pretrain_human_bingren_shendure_apr2023.txt"
83
- )
84
- cell_type_id_to_name = dict(zip(cell_type_annot["id"], cell_type_annot["celltype"]))
85
- cell_type_name_to_id = dict(zip(cell_type_annot["celltype"], cell_type_annot["id"]))
86
- plt.rcParams["figure.dpi"] = 100
87
 
88
 
89
  def visualize_AF2(tf_pair, a):
 
32
  args = args.parse_args()
33
 
34
  GET_CONFIG = load_config(
35
+ "/app/modules/atac_rna_data_processing/atac_rna_data_processing/config/GET"
36
  )
37
  GET_CONFIG.celltype.jacob = True
38
  GET_CONFIG.celltype.num_cls = 2
39
  GET_CONFIG.celltype.input = True
40
  GET_CONFIG.celltype.embed = True
41
+ plt.rcParams["figure.dpi"] = 100
42
 
43
  if args.s3_uri: # Use S3 path if exists
44
  GET_CONFIG.s3_uri = args.s3_uri
 
50
  f"{args.s3_uri}/Interpretation_all_hg38_allembed_v4_natac/"
51
  )
52
  GET_CONFIG.motif_dir = f"{args.s3_uri}/interpret_natac/motif-clustering"
53
+ cell_type_annot = pd.read_csv(
54
+ GET_CONFIG.celltype.data_dir.split("fetal_adult")[0]
55
+ + "data/cell_type_pretrain_human_bingren_shendure_apr2023.txt"
56
+ )
57
+ cell_type_id_to_name = dict(zip(cell_type_annot["id"], cell_type_annot["celltype"]))
58
+ cell_type_name_to_id = dict(zip(cell_type_annot["celltype"], cell_type_annot["id"]))
59
  available_celltypes = sorted(
60
  [
61
  cell_type_id_to_name[f.split("/")[-1]]
 
63
  ]
64
  )
65
  gene_pairs = s3.glob(f"{args.s3_uri}/structures/causal/*")
66
+ gene_pairs = [os.path.basename(pair) for pair in gene_pairs]
67
+ motif = NrMotifV1.load_from_pickle(
68
+ pkg_resources.resource_filename("atac_rna_data_processing", "data/NrMotifV1.pkl"),
69
+ GET_CONFIG.motif_dir,
70
+ )
71
+ else: # Run with local data
72
  GET_CONFIG.celltype.data_dir = (
73
  f"{args.data}/pretrain_human_bingren_shendure_apr2023/fetal_adult/"
74
  )
 
76
  f"{args.data}/Interpretation_all_hg38_allembed_v4_natac/"
77
  )
78
  GET_CONFIG.motif_dir = f"{args.data}/interpret_natac/motif-clustering"
79
+ cell_type_annot = pd.read_csv(
80
+ GET_CONFIG.celltype.data_dir.split("fetal_adult")[0]
81
+ + "data/cell_type_pretrain_human_bingren_shendure_apr2023.txt"
82
+ )
83
+ cell_type_id_to_name = dict(zip(cell_type_annot["id"], cell_type_annot["celltype"]))
84
+ cell_type_name_to_id = dict(zip(cell_type_annot["celltype"], cell_type_annot["id"]))
85
  available_celltypes = sorted(
86
  [
87
  cell_type_id_to_name[f.split("/")[-1]]
 
89
  ]
90
  )
91
  gene_pairs = glob(f"{args.data}/structures/causal/*")
92
+ gene_pairs = [os.path.basename(pair) for pair in gene_pairs]
93
+ motif = NrMotifV1.load_from_pickle(
94
+ pkg_resources.resource_filename("atac_rna_data_processing", "data/NrMotifV1.pkl"),
95
+ GET_CONFIG.motif_dir,
96
+ )
 
 
 
 
 
 
 
 
97
 
98
 
99
  def visualize_AF2(tf_pair, a):