ctheodoris
commited on
Commit
•
188029e
1
Parent(s):
f4fea1e
Rename isp stats methods to clarify mode.
Browse files
geneformer/in_silico_perturber_stats.py
CHANGED
@@ -67,7 +67,8 @@ def n_detections(token, dict_list):
|
|
67 |
def get_fdr(pvalues):
|
68 |
return list(smt.multipletests(pvalues, alpha=0.05, method="fdr_bh")[1])
|
69 |
|
70 |
-
|
|
|
71 |
random_tuples = []
|
72 |
for i in trange(cos_sims_df.shape[0]):
|
73 |
token = cos_sims_df["Gene"][i]
|
@@ -131,6 +132,7 @@ def isp_stats(cos_sims_df, dict_list):
|
|
131 |
|
132 |
return cos_sims_full_df
|
133 |
|
|
|
134 |
def isp_stats_vs_null(cos_sims_df, dict_list, null_dict_list):
|
135 |
cos_sims_full_df = cos_sims_df.copy()
|
136 |
|
@@ -293,7 +295,7 @@ class InSilicoPerturberStats:
|
|
293 |
if self.mode not in ["goal_state_shift", "vs_null"]:
|
294 |
logger.error(
|
295 |
"Currently, only modes available are stats for goal_state_shift \
|
296 |
-
and comparing
|
297 |
raise
|
298 |
|
299 |
self.gene_token_id_dict = invert_dict(self.gene_token_dict)
|
@@ -314,7 +316,7 @@ class InSilicoPerturberStats:
|
|
314 |
|
315 |
dict_list = read_dictionaries(input_data_directory, "cell")
|
316 |
if self.mode == "goal_state_shift":
|
317 |
-
cos_sims_df =
|
318 |
|
319 |
# quantify number of detections of each gene
|
320 |
cos_sims_df["N_Detections"] = [n_detections(i, dict_list) for i in cos_sims_df["Gene"]]
|
|
|
67 |
def get_fdr(pvalues):
|
68 |
return list(smt.multipletests(pvalues, alpha=0.05, method="fdr_bh")[1])
|
69 |
|
70 |
+
# stats comparing cos sim shifts towards goal state of test perturbations vs random perturbations
|
71 |
+
def isp_stats_to_goal_state(cos_sims_df, dict_list):
|
72 |
random_tuples = []
|
73 |
for i in trange(cos_sims_df.shape[0]):
|
74 |
token = cos_sims_df["Gene"][i]
|
|
|
132 |
|
133 |
return cos_sims_full_df
|
134 |
|
135 |
+
# stats comparing cos sim shifts of test perturbations vs null distribution
|
136 |
def isp_stats_vs_null(cos_sims_df, dict_list, null_dict_list):
|
137 |
cos_sims_full_df = cos_sims_df.copy()
|
138 |
|
|
|
295 |
if self.mode not in ["goal_state_shift", "vs_null"]:
|
296 |
logger.error(
|
297 |
"Currently, only modes available are stats for goal_state_shift \
|
298 |
+
and vs_null (comparing to null distribution).")
|
299 |
raise
|
300 |
|
301 |
self.gene_token_id_dict = invert_dict(self.gene_token_dict)
|
|
|
316 |
|
317 |
dict_list = read_dictionaries(input_data_directory, "cell")
|
318 |
if self.mode == "goal_state_shift":
|
319 |
+
cos_sims_df = isp_stats_to_goal_state(cos_sims_df_initial, dict_list)
|
320 |
|
321 |
# quantify number of detections of each gene
|
322 |
cos_sims_df["N_Detections"] = [n_detections(i, dict_list) for i in cos_sims_df["Gene"]]
|