Spaces:
Build error
Build error
Yacine Jernite
commited on
Commit
•
9f53328
1
Parent(s):
2430327
patch to show <10000 point in nPMI for performance
Browse files- data_measurements/streamlit_utils.py +7 -1
- requirements.txt +2 -2
data_measurements/streamlit_utils.py
CHANGED
@@ -434,10 +434,16 @@ def npmi_show(paired_results):
|
|
434 |
s.index.name = "word"
|
435 |
npmi_cols = s.filter(like="npmi").columns
|
436 |
count_cols = s.filter(like="count").columns
|
|
|
|
|
|
|
|
|
|
|
|
|
437 |
# TODO: This is very different look than the duplicates table above. Should probably standardize.
|
438 |
cm = sns.palplot(sns.diverging_palette(270, 36, s=99, l=48, n=16))
|
439 |
out_df = (
|
440 |
-
|
441 |
.format(subset=npmi_cols, formatter="{:,.3f}")
|
442 |
.format(subset=count_cols, formatter=int)
|
443 |
.set_properties(
|
|
|
434 |
s.index.name = "word"
|
435 |
npmi_cols = s.filter(like="npmi").columns
|
436 |
count_cols = s.filter(like="count").columns
|
437 |
+
if s.shape[0] > 10000:
|
438 |
+
bias_thres = max(abs(s["npmi-bias"][5000]), abs(s["npmi-bias"][-5000]))
|
439 |
+
print(f"filtering with bias threshold: {bias_thres}")
|
440 |
+
s_filtered = s[s["npmi-bias"].abs() > bias_thres]
|
441 |
+
else:
|
442 |
+
s_filtered = s
|
443 |
# TODO: This is very different look than the duplicates table above. Should probably standardize.
|
444 |
cm = sns.palplot(sns.diverging_palette(270, 36, s=99, l=48, n=16))
|
445 |
out_df = (
|
446 |
+
s_filtered.style.background_gradient(subset=npmi_cols, cmap=cm)
|
447 |
.format(subset=npmi_cols, formatter="{:,.3f}")
|
448 |
.format(subset=count_cols, formatter=int)
|
449 |
.set_properties(
|
requirements.txt
CHANGED
@@ -10,7 +10,7 @@ iso_639==0.4.5
|
|
10 |
datasets==1.15.1
|
11 |
powerlaw==1.5
|
12 |
numpy==1.19.5
|
13 |
-
pandas==1.
|
14 |
dataclasses==0.6
|
15 |
iso639==0.1.4
|
16 |
python_igraph==0.9.6
|
@@ -23,4 +23,4 @@ numexpr==2.7.3
|
|
23 |
scikit-learn~=0.24.2
|
24 |
scipy~=1.7.3
|
25 |
tqdm~=4.62.3
|
26 |
-
pyarrow~=6.0.1
|
|
|
10 |
datasets==1.15.1
|
11 |
powerlaw==1.5
|
12 |
numpy==1.19.5
|
13 |
+
pandas==1.0.0
|
14 |
dataclasses==0.6
|
15 |
iso639==0.1.4
|
16 |
python_igraph==0.9.6
|
|
|
23 |
scikit-learn~=0.24.2
|
24 |
scipy~=1.7.3
|
25 |
tqdm~=4.62.3
|
26 |
+
pyarrow~=6.0.1
|