Spaces:
Build error
Build error
meg-huggingface
commited on
Commit
•
db74ba9
1
Parent(s):
0803ab3
Scripts to generate cache
Browse files- run.sh +112 -0
- run_data_measurements.py +8 -6
run.sh
ADDED
@@ -0,0 +1,112 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/usr/bin/env bash
|
2 |
+
|
3 |
+
|
4 |
+
python3 run_data_measurements.py --dataset="hate_speech18" --config="default" --split="train" --label_field="label" --feature="text"
|
5 |
+
python3 run_data_measurements.py --dataset="hate_speech_offensive" --config="default" --split="train" --label_field="label" --feature="tweet"
|
6 |
+
|
7 |
+
|
8 |
+
python3 run_data_measurements.py --dataset="imdb" --config="plain_text" --split="train" --label_field="label" --feature="text"
|
9 |
+
python3 run_data_measurements.py --dataset="imdb" --config="plain_text" --split="unsupervised" --label_field="label" --feature="text"
|
10 |
+
|
11 |
+
|
12 |
+
python3 run_data_measurements.py --dataset="glue" --config="cola" --split="train" --label_field="label" --feature="sentence"
|
13 |
+
python3 run_data_measurements.py --dataset="glue" --config="cola" --split="validation" --label_field="label" --feature="sentence"
|
14 |
+
|
15 |
+
python3 run_data_measurements.py --dataset="glue" --config="mnli" --split="train" --label_field="label" --feature="hypothesis"
|
16 |
+
python3 run_data_measurements.py --dataset="glue" --config="mnli" --split="train" --label_field="label" --feature="premise"
|
17 |
+
|
18 |
+
python3 run_data_measurements.py --dataset="glue" --config="mnli" --split="validation_matched" --label_field="label" --feature="premise"
|
19 |
+
python3 run_data_measurements.py --dataset="glue" --config="mnli" --split="validation_matched" --label_field="label" --feature="hypothesis"
|
20 |
+
python3 run_data_measurements.py --dataset="glue" --config="mnli" --split="validation_mismatched" --label_field="label" --feature="premise"
|
21 |
+
python3 run_data_measurements.py --dataset="glue" --config="mnli" --split="validation_mismatched" --label_field="label" --feature="hypothesis"
|
22 |
+
|
23 |
+
|
24 |
+
python3 run_data_measurements.py --dataset="glue" --config="mrpc" --split="train" --label_field="label" --feature="sentence1"
|
25 |
+
python3 run_data_measurements.py --dataset="glue" --config="mrpc" --split="train" --label_field="label" --feature="sentence2"
|
26 |
+
python3 run_data_measurements.py --dataset="glue" --config="mrpc" --split="validation" --label_field="label" --feature="sentence1"
|
27 |
+
python3 run_data_measurements.py --dataset="glue" --config="mrpc" --split="validation" --label_field="label" --feature="sentence2"
|
28 |
+
|
29 |
+
|
30 |
+
python3 run_data_measurements.py --dataset="glue" --config="rte" --split="train" --label_field="label" --feature="sentence1"
|
31 |
+
python3 run_data_measurements.py --dataset="glue" --config="rte" --split="train" --label_field="label" --feature="sentence2"
|
32 |
+
python3 run_data_measurements.py --dataset="glue" --config="rte" --split="validation" --label_field="label" --feature="sentence1"
|
33 |
+
python3 run_data_measurements.py --dataset="glue" --config="rte" --split="validation" --label_field="label" --feature="sentence2"
|
34 |
+
|
35 |
+
|
36 |
+
python3 run_data_measurements.py --dataset="glue" --config="stsb" --split="train" --label_field="label" --feature="sentence1"
|
37 |
+
python3 run_data_measurements.py --dataset="glue" --config="stsb" --split="train" --label_field="label" --feature="sentence2"
|
38 |
+
python3 run_data_measurements.py --dataset="glue" --config="stsb" --split="validation" --label_field="label" --feature="sentence1"
|
39 |
+
python3 run_data_measurements.py --dataset="glue" --config="stsb" --split="validation" --label_field="label" --feature="sentence2"
|
40 |
+
|
41 |
+
python3 run_data_measurements.py --dataset="glue" --config="wnli" --split="train" --label_field="label" --feature="sentence1"
|
42 |
+
python3 run_data_measurements.py --dataset="glue" --config="wnli" --split="train" --label_field="label" --feature="sentence2"
|
43 |
+
python3 run_data_measurements.py --dataset="glue" --config="wnli" --split="validation" --label_field="label" --feature="sentence1"
|
44 |
+
python3 run_data_measurements.py --dataset="glue" --config="wnli" --split="validation" --label_field="label" --feature="sentence2"
|
45 |
+
|
46 |
+
python3 run_data_measurements.py --dataset="glue" --config="sst2" --split="train" --label_field="label" --feature="sentence"
|
47 |
+
python3 run_data_measurements.py --dataset="glue" --config="sst2" --split="validation" --label_field="label" --feature="sentence"
|
48 |
+
|
49 |
+
|
50 |
+
python3 run_data_measurements.py --dataset="glue" --config="qnli" --split="train" --label_field="label" --feature="question"
|
51 |
+
python3 run_data_measurements.py --dataset="glue" --config="qnli" --split="train" --label_field="label" --feature="sentence"
|
52 |
+
python3 run_data_measurements.py --dataset="glue" --config="qnli" --split="validation" --label_field="label" --feature="question"
|
53 |
+
python3 run_data_measurements.py --dataset="glue" --config="qnli" --split="validation" --label_field="label" --feature="sentence"
|
54 |
+
|
55 |
+
|
56 |
+
python3 run_data_measurements.py --dataset="glue" --config="qqp" --split="train" --label_field="label" --feature="question1"
|
57 |
+
python3 run_data_measurements.py --dataset="glue" --config="qqp" --split="train" --label_field="label" --feature="question2"
|
58 |
+
python3 run_data_measurements.py --dataset="glue" --config="qqp" --split="validation" --label_field="label" --feature="question1"
|
59 |
+
python3 run_data_measurements.py --dataset="glue" --config="qqp" --split="validation" --label_field="label" --feature="question2"
|
60 |
+
|
61 |
+
python3 run_data_measurements.py --dataset="glue" --config="mnli_matched" --split="validation" --label_field="label" --feature="hypothesis"
|
62 |
+
python3 run_data_measurements.py --dataset="glue" --config="mnli_matched" --split="validation" --label_field="label" --feature="premise"
|
63 |
+
python3 run_data_measurements.py --dataset="glue" --config="mnli_mismatched" --split="validation" --label_field="label" --feature="hypothesis"
|
64 |
+
python3 run_data_measurements.py --dataset="glue" --config="mnli_mismatched" --split="validation" --label_field="label" --feature="premise"
|
65 |
+
|
66 |
+
|
67 |
+
python3 run_data_measurements.py --dataset="wikitext" --config="wikitext-103-v1" --split="train" --feature="text"
|
68 |
+
python3 run_data_measurements.py --dataset="wikitext" --config="wikitext-103-raw-v1" --split="train" --feature="text"
|
69 |
+
python3 run_data_measurements.py --dataset="wikitext" --config="wikitext-2-v1" --split="train" --feature="text"
|
70 |
+
python3 run_data_measurements.py --dataset="wikitext" --config="wikitext-2-raw-v1" --split="train" --feature="text"
|
71 |
+
python3 run_data_measurements.py --dataset="wikitext" --config="wikitext-103-v1" --split="validation" --feature="text"
|
72 |
+
python3 run_data_measurements.py --dataset="wikitext" --config="wikitext-103-raw-v1" --split="validation" --feature="text"
|
73 |
+
python3 run_data_measurements.py --dataset="wikitext" --config="wikitext-2-v1" --split="validation" --feature="text"
|
74 |
+
python3 run_data_measurements.py --dataset="wikitext" --config="wikitext-2-raw-v1" --split="validation" --feature="text"
|
75 |
+
|
76 |
+
|
77 |
+
# Superglue wsc? wic? rte? record? multirc?
|
78 |
+
|
79 |
+
python3 run_data_measurements.py --dataset="super_glue" --config="boolq" --split="train" --label_field="label" --feature="question"
|
80 |
+
python3 run_data_measurements.py --dataset="super_glue" --config="boolq" --split="validation" --label_field="label" --feature="question"
|
81 |
+
python3 run_data_measurements.py --dataset="super_glue" --config="boolq" --split="train" --label_field="label" --feature="passage"
|
82 |
+
python3 run_data_measurements.py --dataset="super_glue" --config="boolq" --split="validation" --label_field="label" --feature="passage"
|
83 |
+
|
84 |
+
python3 run_data_measurements.py --dataset="super_glue" --config="cb" --split="train" --label_field="label" --feature="premise"
|
85 |
+
python3 run_data_measurements.py --dataset="super_glue" --config="cb" --split="validation" --label_field="label" --feature="premise"
|
86 |
+
python3 run_data_measurements.py --dataset="super_glue" --config="cb" --split="train" --label_field="label" --feature="hypothesis"
|
87 |
+
python3 run_data_measurements.py --dataset="super_glue" --config="cb" --split="validation" --label_field="label" --feature="hypothesis"
|
88 |
+
|
89 |
+
|
90 |
+
python3 run_data_measurements.py --dataset="super_glue" --config="copa" --split="train" --label_field="label" --feature="premise"
|
91 |
+
python3 run_data_measurements.py --dataset="super_glue" --config="copa" --split="validation" --label_field="label" --feature="premise"
|
92 |
+
python3 run_data_measurements.py --dataset="super_glue" --config="copa" --split="train" --label_field="label" --feature="choice1"
|
93 |
+
python3 run_data_measurements.py --dataset="super_glue" --config="copa" --split="validation" --label_field="label" --feature="choice1"
|
94 |
+
python3 run_data_measurements.py --dataset="super_glue" --config="copa" --split="train" --label_field="label" --feature="choice2"
|
95 |
+
python3 run_data_measurements.py --dataset="super_glue" --config="copa" --split="validation" --label_field="label" --feature="choice2"
|
96 |
+
python3 run_data_measurements.py --dataset="super_glue" --config="copa" --split="train" --label_field="label" --feature="question"
|
97 |
+
python3 run_data_measurements.py --dataset="super_glue" --config="copa" --split="validation" --label_field="label" --feature="question"
|
98 |
+
|
99 |
+
python3 run_data_measurements.py --dataset="squad" --config="plain_text" --split="train" --feature="context"
|
100 |
+
python3 run_data_measurements.py --dataset="squad" --config="plain_text" --split="train" --feature="question"
|
101 |
+
python3 run_data_measurements.py --dataset="squad" --config="plain_text" --split="train" --feature="title"
|
102 |
+
python3 run_data_measurements.py --dataset="squad" --config="plain_text" --split="validation" --feature="context"
|
103 |
+
python3 run_data_measurements.py --dataset="squad" --config="plain_text" --split="validation" --feature="question"
|
104 |
+
python3 run_data_measurements.py --dataset="squad" --config="plain_text" --split="validation" --feature="title"
|
105 |
+
|
106 |
+
|
107 |
+
python3 run_data_measurements.py --dataset="squad_v2" --config="squad_v2" --split="train" --feature="context"
|
108 |
+
python3 run_data_measurements.py --dataset="squad_v2" --config="squad_v2" --split="train" --feature="question"
|
109 |
+
python3 run_data_measurements.py --dataset="squad_v2" --config="squad_v2" --split="train" --feature="title"
|
110 |
+
python3 run_data_measurements.py --dataset="squad_v2" --config="squad_v2" --split="validation" --feature="context"
|
111 |
+
python3 run_data_measurements.py --dataset="squad_v2" --config="squad_v2" --split="validation" --feature="question"
|
112 |
+
python3 run_data_measurements.py --dataset="squad_v2" --config="squad_v2" --split="validation" --feature="title"
|
run_data_measurements.py
CHANGED
@@ -25,7 +25,11 @@ def load_or_prepare_widgets(ds_args, show_embeddings=False, use_cache=False):
|
|
25 |
# General stats widget
|
26 |
dstats.load_or_prepare_general_stats()
|
27 |
# Labels widget
|
28 |
-
|
|
|
|
|
|
|
|
|
29 |
# Text lengths widget
|
30 |
dstats.load_or_prepare_text_lengths()
|
31 |
if show_embeddings:
|
@@ -76,9 +80,10 @@ def load_or_prepare(dataset_args, do_html=False, use_cache=False):
|
|
76 |
print("Figure saved to %s." % fig_tok_length_fid)
|
77 |
print("Done!")
|
78 |
|
79 |
-
if
|
80 |
if not dstats.label_field:
|
81 |
-
print("Warning: You asked for label calculation, but didn't provide
|
|
|
82 |
dstats.set_label_field("label")
|
83 |
print("\n* Calculating label distribution.")
|
84 |
dstats.load_or_prepare_labels()
|
@@ -188,9 +193,6 @@ def main():
|
|
188 |
Example for hate speech18 dataset:
|
189 |
python3 run_data_measurements.py --dataset="hate_speech18" --config="default" --split="train" --feature="text"
|
190 |
|
191 |
-
Example for Glue dataset:
|
192 |
-
python3 run_data_measurements.py --dataset="glue" --config="ax" --split="train" --feature="premise"
|
193 |
-
|
194 |
Example for IMDB dataset:
|
195 |
python3 run_data_measurements.py --dataset="imdb" --config="plain_text" --split="train" --label_field="label" --feature="text"
|
196 |
"""
|
|
|
25 |
# General stats widget
|
26 |
dstats.load_or_prepare_general_stats()
|
27 |
# Labels widget
|
28 |
+
try:
|
29 |
+
dstats.set_label_field("label")
|
30 |
+
dstats.load_or_prepare_labels()
|
31 |
+
except:
|
32 |
+
pass
|
33 |
# Text lengths widget
|
34 |
dstats.load_or_prepare_text_lengths()
|
35 |
if show_embeddings:
|
|
|
80 |
print("Figure saved to %s." % fig_tok_length_fid)
|
81 |
print("Done!")
|
82 |
|
83 |
+
if all or dataset_args["calculation"] == "labels":
|
84 |
if not dstats.label_field:
|
85 |
+
print("Warning: You asked for label calculation, but didn't provide "
|
86 |
+
"the labels field name. Assuming it is 'label'...")
|
87 |
dstats.set_label_field("label")
|
88 |
print("\n* Calculating label distribution.")
|
89 |
dstats.load_or_prepare_labels()
|
|
|
193 |
Example for hate speech18 dataset:
|
194 |
python3 run_data_measurements.py --dataset="hate_speech18" --config="default" --split="train" --feature="text"
|
195 |
|
|
|
|
|
|
|
196 |
Example for IMDB dataset:
|
197 |
python3 run_data_measurements.py --dataset="imdb" --config="plain_text" --split="train" --label_field="label" --feature="text"
|
198 |
"""
|