Upload 34 files
Browse files- .gitattributes +8 -0
- data/lang_bpe_500/HLG.pt +3 -0
- data/lang_bpe_500/L.fst +3 -0
- data/lang_bpe_500/L.pt +3 -0
- data/lang_bpe_500/LG.pt +3 -0
- data/lang_bpe_500/L_disambig.fst +3 -0
- data/lang_bpe_500/L_disambig.pt +3 -0
- data/lang_bpe_500/Linv.pt +3 -0
- data/lang_bpe_500/bpe.model +3 -0
- data/lang_bpe_500/lexicon.txt +0 -0
- data/lang_bpe_500/lexicon_disambig.txt +0 -0
- data/lang_bpe_500/tokens.txt +502 -0
- data/lang_bpe_500/train.txt +3 -0
- data/lang_bpe_500/train_orig.txt +3 -0
- data/lang_bpe_500/unigram_500.model +3 -0
- data/lang_bpe_500/unigram_500.vocab +500 -0
- data/lang_bpe_500/words.txt +0 -0
- data/lang_bpe_500/words_no_ids.txt +0 -0
- data/lm/3gram.arpa +3 -0
- data/lm/4gram.arpa +3 -0
- data/lm/G_3_gram.fst.txt +3 -0
- data/lm/G_3_gram.pt +3 -0
- data/lm/G_4_gram.fst.txt +3 -0
- decoding_results/fast_beam_search/errs-test-cv-beam_20.0_max_contexts_8_max_states_64-epoch-29-avg-9-streaming-chunk-size-64-beam-20.0-max-contexts-8-max-states-64-use-averaged-model.txt +0 -0
- decoding_results/fast_beam_search/log-decode-epoch-29-avg-9-streaming-chunk-size-64-beam-20.0-max-contexts-8-max-states-64-use-averaged-model-2023-04-03-17-31-21 +82 -0
- decoding_results/fast_beam_search/recogs-test-cv-beam_20.0_max_contexts_8_max_states_64-epoch-29-avg-9-streaming-chunk-size-64-beam-20.0-max-contexts-8-max-states-64-use-averaged-model.txt +0 -0
- decoding_results/fast_beam_search/wer-summary-test-cv-beam_20.0_max_contexts_8_max_states_64.txt +2 -0
- decoding_results/greedy_search/errs-test-cv-greedy_search-epoch-29-avg-9-streaming-chunk-size-64-context-2-max-sym-per-frame-1-use-averaged-model.txt +0 -0
- decoding_results/greedy_search/log-decode-epoch-29-avg-9-streaming-chunk-size-64-context-2-max-sym-per-frame-1-use-averaged-model-2023-04-03-17-20-40 +52 -0
- decoding_results/greedy_search/recogs-test-cv-greedy_search-epoch-29-avg-9-streaming-chunk-size-64-context-2-max-sym-per-frame-1-use-averaged-model.txt +0 -0
- decoding_results/greedy_search/wer-summary-test-cv-greedy_search.txt +2 -0
- decoding_results/modified_beam_search/errs-test-cv-beam_size_4-epoch-29-avg-9-streaming-chunk-size-64-modified_beam_search-beam-size-4-use-averaged-model.txt +0 -0
- decoding_results/modified_beam_search/log-decode-epoch-29-avg-9-streaming-chunk-size-64-modified_beam_search-beam-size-4-use-averaged-model-2023-04-03-17-22-38 +82 -0
- decoding_results/modified_beam_search/recogs-test-cv-beam_size_4-epoch-29-avg-9-streaming-chunk-size-64-modified_beam_search-beam-size-4-use-averaged-model.txt +0 -0
- decoding_results/modified_beam_search/wer-summary-test-cv-beam_size_4.txt +2 -0
.gitattributes
CHANGED
@@ -32,3 +32,11 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
32 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
33 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
34 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
32 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
33 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
34 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
35 |
+
data/lang_bpe_500/L_disambig.fst filter=lfs diff=lfs merge=lfs -text
|
36 |
+
data/lang_bpe_500/L.fst filter=lfs diff=lfs merge=lfs -text
|
37 |
+
data/lang_bpe_500/train_orig.txt filter=lfs diff=lfs merge=lfs -text
|
38 |
+
data/lang_bpe_500/train.txt filter=lfs diff=lfs merge=lfs -text
|
39 |
+
data/lm/3gram.arpa filter=lfs diff=lfs merge=lfs -text
|
40 |
+
data/lm/4gram.arpa filter=lfs diff=lfs merge=lfs -text
|
41 |
+
data/lm/G_3_gram.fst.txt filter=lfs diff=lfs merge=lfs -text
|
42 |
+
data/lm/G_4_gram.fst.txt filter=lfs diff=lfs merge=lfs -text
|
data/lang_bpe_500/HLG.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1431e9712ce1bf45a3e8ad9775eb148306298a96ed9a642c75be50ab0566da55
|
3 |
+
size 1091845447
|
data/lang_bpe_500/L.fst
ADDED
Git LFS Details
|
data/lang_bpe_500/L.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:70de241a0a4b31867f12d84a2c7f61920df2cd3a09c321da1367c8abd95a820e
|
3 |
+
size 20698447
|
data/lang_bpe_500/LG.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:79f0f73fcd23cd1c17650c87bb70bc4da56f98977105bed69fe1deb91989b37b
|
3 |
+
size 306392522
|
data/lang_bpe_500/L_disambig.fst
ADDED
Git LFS Details
|
data/lang_bpe_500/L_disambig.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6787b0bc4a08f369d364e4b475a29c7f6d49a0d81e881daaf1468c832bd01cd1
|
3 |
+
size 21392435
|
data/lang_bpe_500/Linv.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0e6e66b8d4f763b913ebafd0f21dfe374549df05d704e6587cfb27050bcfc82c
|
3 |
+
size 20698459
|
data/lang_bpe_500/bpe.model
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:787932caf5c0adf93e850c18742279142ab33cdff5a6bd1234ad3aca2fc0b998
|
3 |
+
size 244624
|
data/lang_bpe_500/lexicon.txt
ADDED
The diff for this file is too large to render.
See raw diff
|
|
data/lang_bpe_500/lexicon_disambig.txt
ADDED
The diff for this file is too large to render.
See raw diff
|
|
data/lang_bpe_500/tokens.txt
ADDED
@@ -0,0 +1,502 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
<blk> 0
|
2 |
+
<sos/eos> 1
|
3 |
+
<unk> 2
|
4 |
+
S 3
|
5 |
+
▁ 4
|
6 |
+
E 5
|
7 |
+
▁DE 6
|
8 |
+
' 7
|
9 |
+
T 8
|
10 |
+
R 9
|
11 |
+
É 10
|
12 |
+
▁L 11
|
13 |
+
▁LA 12
|
14 |
+
C 13
|
15 |
+
▁LE 14
|
16 |
+
A 15
|
17 |
+
O 16
|
18 |
+
P 17
|
19 |
+
▁D 18
|
20 |
+
U 19
|
21 |
+
I 20
|
22 |
+
▁EST 21
|
23 |
+
▁IL 22
|
24 |
+
ON 23
|
25 |
+
N 24
|
26 |
+
▁À 25
|
27 |
+
▁S 26
|
28 |
+
▁A 27
|
29 |
+
▁ET 28
|
30 |
+
▁C 29
|
31 |
+
IN 30
|
32 |
+
D 31
|
33 |
+
G 32
|
34 |
+
ER 33
|
35 |
+
ES 34
|
36 |
+
▁EN 35
|
37 |
+
▁LES 36
|
38 |
+
Y 37
|
39 |
+
IS 38
|
40 |
+
IT 39
|
41 |
+
L 40
|
42 |
+
▁DU 41
|
43 |
+
AL 42
|
44 |
+
ENT 43
|
45 |
+
▁F 44
|
46 |
+
RE 45
|
47 |
+
▁PAR 46
|
48 |
+
▁DES 47
|
49 |
+
F 48
|
50 |
+
LE 49
|
51 |
+
▁UN 50
|
52 |
+
▁B 51
|
53 |
+
▁SE 52
|
54 |
+
V 53
|
55 |
+
▁AU 54
|
56 |
+
AN 55
|
57 |
+
B 56
|
58 |
+
OR 57
|
59 |
+
▁G 58
|
60 |
+
RA 59
|
61 |
+
ANT 60
|
62 |
+
OU 61
|
63 |
+
H 62
|
64 |
+
UR 63
|
65 |
+
▁DANS 64
|
66 |
+
▁T 65
|
67 |
+
RI 66
|
68 |
+
▁M 67
|
69 |
+
TE 68
|
70 |
+
▁RE 69
|
71 |
+
AR 70
|
72 |
+
▁UNE 71
|
73 |
+
▁ELLE 72
|
74 |
+
▁V 73
|
75 |
+
M 74
|
76 |
+
ATION 75
|
77 |
+
UL 76
|
78 |
+
ÉE 77
|
79 |
+
Z 78
|
80 |
+
▁N 79
|
81 |
+
NE 80
|
82 |
+
ME 81
|
83 |
+
IL 82
|
84 |
+
▁P 83
|
85 |
+
MENT 84
|
86 |
+
IE 85
|
87 |
+
EN 86
|
88 |
+
▁H 87
|
89 |
+
▁CON 88
|
90 |
+
IR 89
|
91 |
+
EST 90
|
92 |
+
LI 91
|
93 |
+
▁SON 92
|
94 |
+
CH 93
|
95 |
+
▁POUR 94
|
96 |
+
È 95
|
97 |
+
LA 96
|
98 |
+
CE 97
|
99 |
+
▁RÉ 98
|
100 |
+
▁MA 99
|
101 |
+
IQUE 100
|
102 |
+
▁CE 101
|
103 |
+
EUR 102
|
104 |
+
▁SA 103
|
105 |
+
NT 104
|
106 |
+
▁SONT 105
|
107 |
+
US 106
|
108 |
+
EMENT 107
|
109 |
+
▁IN 108
|
110 |
+
▁PRO 109
|
111 |
+
▁SUR 110
|
112 |
+
RÉ 111
|
113 |
+
RO 112
|
114 |
+
▁DÉ 113
|
115 |
+
AIT 114
|
116 |
+
DE 115
|
117 |
+
▁QU 116
|
118 |
+
▁É 117
|
119 |
+
EL 118
|
120 |
+
OL 119
|
121 |
+
AIRE 120
|
122 |
+
ION 121
|
123 |
+
UN 122
|
124 |
+
CHE 123
|
125 |
+
K 124
|
126 |
+
▁CH 125
|
127 |
+
AT 126
|
128 |
+
DI 127
|
129 |
+
AG 128
|
130 |
+
TRE 129
|
131 |
+
TI 130
|
132 |
+
OM 131
|
133 |
+
ELLE 132
|
134 |
+
▁SU 133
|
135 |
+
AM 134
|
136 |
+
▁PO 135
|
137 |
+
▁MO 136
|
138 |
+
IM 137
|
139 |
+
▁PAS 138
|
140 |
+
VI 139
|
141 |
+
TÉ 140
|
142 |
+
FF 141
|
143 |
+
▁CA 142
|
144 |
+
TER 143
|
145 |
+
END 144
|
146 |
+
CETTE 145
|
147 |
+
QUE 146
|
148 |
+
TA 147
|
149 |
+
LO 148
|
150 |
+
▁PLUS 149
|
151 |
+
ILLE 150
|
152 |
+
QU 151
|
153 |
+
▁NE 152
|
154 |
+
▁RO 153
|
155 |
+
▁JE 154
|
156 |
+
▁QUE 155
|
157 |
+
▁DEUX 156
|
158 |
+
UT 157
|
159 |
+
CI 158
|
160 |
+
ALE 159
|
161 |
+
▁AVEC 160
|
162 |
+
UNE 161
|
163 |
+
AB 162
|
164 |
+
ITÉ 163
|
165 |
+
IC 164
|
166 |
+
GE 165
|
167 |
+
MA 166
|
168 |
+
AGE 167
|
169 |
+
AND 168
|
170 |
+
AC 169
|
171 |
+
OIS 170
|
172 |
+
▁CO 171
|
173 |
+
▁COMME 172
|
174 |
+
PH 173
|
175 |
+
VER 174
|
176 |
+
SSE 175
|
177 |
+
AV 176
|
178 |
+
▁QUI 177
|
179 |
+
TU 178
|
180 |
+
▁BA 179
|
181 |
+
NÉ 180
|
182 |
+
ID 181
|
183 |
+
PORT 182
|
184 |
+
▁VO 183
|
185 |
+
IER 184
|
186 |
+
▁ÉTÉ 185
|
187 |
+
MI 186
|
188 |
+
IV 187
|
189 |
+
ÈRE 188
|
190 |
+
ARD 189
|
191 |
+
AU 190
|
192 |
+
OC 191
|
193 |
+
▁EX 192
|
194 |
+
▁DI 193
|
195 |
+
▁CHA 194
|
196 |
+
ÉRI 195
|
197 |
+
MB 196
|
198 |
+
IGN 197
|
199 |
+
▁RA 198
|
200 |
+
DU 199
|
201 |
+
ISTE 200
|
202 |
+
TH 201
|
203 |
+
AIS 202
|
204 |
+
INE 203
|
205 |
+
ANG 204
|
206 |
+
▁COMP 205
|
207 |
+
▁OU 206
|
208 |
+
AUX 207
|
209 |
+
IÈRE 208
|
210 |
+
ORD 209
|
211 |
+
X 210
|
212 |
+
▁PR 211
|
213 |
+
▁ÉGALEMENT 212
|
214 |
+
▁CONS 213
|
215 |
+
LU 214
|
216 |
+
▁SES 215
|
217 |
+
▁SOU 216
|
218 |
+
POS 217
|
219 |
+
CTION 218
|
220 |
+
NA 219
|
221 |
+
TO 220
|
222 |
+
OUR 221
|
223 |
+
ART 222
|
224 |
+
Ô 223
|
225 |
+
UE 224
|
226 |
+
MÉ 225
|
227 |
+
TION 226
|
228 |
+
▁NOM 227
|
229 |
+
▁MAR 228
|
230 |
+
AS 229
|
231 |
+
MAN 230
|
232 |
+
LÉ 231
|
233 |
+
ILL 232
|
234 |
+
▁COM 233
|
235 |
+
▁PA 234
|
236 |
+
W 235
|
237 |
+
TURE 236
|
238 |
+
MIN 237
|
239 |
+
▁MAIS 238
|
240 |
+
VE 239
|
241 |
+
ITE 240
|
242 |
+
IX 241
|
243 |
+
ANCE 242
|
244 |
+
ENCE 243
|
245 |
+
OP 244
|
246 |
+
EMP 245
|
247 |
+
ALL 246
|
248 |
+
EUX 247
|
249 |
+
▁K 248
|
250 |
+
▁ÉTAIT 249
|
251 |
+
▁FAIT 250
|
252 |
+
J 251
|
253 |
+
▁TOUT 252
|
254 |
+
TIQUE 253
|
255 |
+
PE 254
|
256 |
+
UV 255
|
257 |
+
▁PLA 256
|
258 |
+
IRE 257
|
259 |
+
ENS 258
|
260 |
+
▁Y 259
|
261 |
+
IG 260
|
262 |
+
VO 261
|
263 |
+
VEN 262
|
264 |
+
ABLE 263
|
265 |
+
▁AUX 264
|
266 |
+
▁MON 265
|
267 |
+
ÊT 266
|
268 |
+
▁AUSSI 267
|
269 |
+
▁FOR 268
|
270 |
+
TRA 269
|
271 |
+
LES 270
|
272 |
+
NI 271
|
273 |
+
TRI 272
|
274 |
+
▁DIS 273
|
275 |
+
▁MI 274
|
276 |
+
▁TRA 275
|
277 |
+
▁CENT 276
|
278 |
+
▁TO 277
|
279 |
+
ÉS 278
|
280 |
+
TTE 279
|
281 |
+
EX 280
|
282 |
+
▁APP 281
|
283 |
+
▁GRAND 282
|
284 |
+
▁AR 283
|
285 |
+
INS 284
|
286 |
+
▁NO 285
|
287 |
+
▁DÉC 286
|
288 |
+
MO 287
|
289 |
+
▁BR 288
|
290 |
+
▁AN 289
|
291 |
+
ÉES 290
|
292 |
+
ONNE 291
|
293 |
+
▁PRÉ 292
|
294 |
+
▁ME 293
|
295 |
+
▁LUI 294
|
296 |
+
▁FA 295
|
297 |
+
TEN 296
|
298 |
+
AUT 297
|
299 |
+
BL 298
|
300 |
+
IVE 299
|
301 |
+
Ç 300
|
302 |
+
 301
|
303 |
+
▁FUT 302
|
304 |
+
▁SO 303
|
305 |
+
▁TROIS 304
|
306 |
+
EAU 305
|
307 |
+
▁ALORS 306
|
308 |
+
TEUR 307
|
309 |
+
▁MÉ 308
|
310 |
+
DA 309
|
311 |
+
▁J 310
|
312 |
+
▁ON 311
|
313 |
+
▁JA 312
|
314 |
+
GUE 313
|
315 |
+
▁LEUR 314
|
316 |
+
ÈME 315
|
317 |
+
▁ONT 316
|
318 |
+
▁W 317
|
319 |
+
▁MÊME 318
|
320 |
+
ACC 319
|
321 |
+
IEN 320
|
322 |
+
▁SAINT 321
|
323 |
+
AINE 322
|
324 |
+
À 323
|
325 |
+
▁VA 324
|
326 |
+
▁FIN 325
|
327 |
+
ICI 326
|
328 |
+
ITION 327
|
329 |
+
▁CES 328
|
330 |
+
▁COUR 329
|
331 |
+
UM 330
|
332 |
+
BRE 331
|
333 |
+
▁PEU 332
|
334 |
+
Î 333
|
335 |
+
TRO 334
|
336 |
+
IENNE 335
|
337 |
+
IDE 336
|
338 |
+
▁REP 337
|
339 |
+
▁JU 338
|
340 |
+
▁VILLE 339
|
341 |
+
▁APRÈS 340
|
342 |
+
▁ÊTRE 341
|
343 |
+
▁RI 342
|
344 |
+
▁VOUS 343
|
345 |
+
▁PLUSIEURS 344
|
346 |
+
▁NA 345
|
347 |
+
▁TH 346
|
348 |
+
ANTE 347
|
349 |
+
TRÈS 348
|
350 |
+
▁SITUÉ 349
|
351 |
+
▁TOUR 350
|
352 |
+
▁PARTIE 351
|
353 |
+
▁FRANC 352
|
354 |
+
▁QUATRE 353
|
355 |
+
▁PER 354
|
356 |
+
VIENT 355
|
357 |
+
▁ENTRE 356
|
358 |
+
ING 357
|
359 |
+
▁PREMIER 358
|
360 |
+
▁CAR 359
|
361 |
+
LON 360
|
362 |
+
FORM 361
|
363 |
+
BA 362
|
364 |
+
VÉ 363
|
365 |
+
IFI 364
|
366 |
+
AIENT 365
|
367 |
+
HI 366
|
368 |
+
▁JO 367
|
369 |
+
TIF 368
|
370 |
+
TANT 369
|
371 |
+
▁PEUT 370
|
372 |
+
STRU 371
|
373 |
+
▁PARTI 372
|
374 |
+
▁COMMUNE 373
|
375 |
+
Ê 374
|
376 |
+
▁REN 375
|
377 |
+
ATEUR 376
|
378 |
+
▁BIEN 377
|
379 |
+
▁PRI 378
|
380 |
+
▁RUE 379
|
381 |
+
▁MONT 380
|
382 |
+
▁PI 381
|
383 |
+
▁JOUR 382
|
384 |
+
AUTRES 383
|
385 |
+
IBLE 384
|
386 |
+
APP 385
|
387 |
+
▁CINQ 386
|
388 |
+
▁PREMIÈRE 387
|
389 |
+
▁MARI 388
|
390 |
+
CK 389
|
391 |
+
▁TROUVE 390
|
392 |
+
ISSE 391
|
393 |
+
▁DONC 392
|
394 |
+
▁GROUPE 393
|
395 |
+
▁JOUE 394
|
396 |
+
▁VERS 395
|
397 |
+
JO 396
|
398 |
+
ÎT 397
|
399 |
+
ÉTAT 398
|
400 |
+
▁FRANÇAIS 399
|
401 |
+
▁LORS 400
|
402 |
+
▁ENSUITE 401
|
403 |
+
▁PUIS 402
|
404 |
+
▁PETIT 403
|
405 |
+
EUSE 404
|
406 |
+
▁AINSI 405
|
407 |
+
▁TRAVAIL 406
|
408 |
+
▁NOUS 407
|
409 |
+
▁QUELQUE 408
|
410 |
+
▁COLL 409
|
411 |
+
▁CERTAIN 410
|
412 |
+
▁LIEU 411
|
413 |
+
▁PRÉSENT 412
|
414 |
+
ÉQUIPE 413
|
415 |
+
▁VINGT 414
|
416 |
+
▁PÈRE 415
|
417 |
+
▁FAMILLE 416
|
418 |
+
▁RU 417
|
419 |
+
▁RENCONTRE 418
|
420 |
+
▁ESPÈCE 419
|
421 |
+
▁FILS 420
|
422 |
+
▁GÉNÉRAL 421
|
423 |
+
▁OB 422
|
424 |
+
▁ÉV 423
|
425 |
+
Ï 424
|
426 |
+
▁PAYS 425
|
427 |
+
▁ANNÉE 426
|
428 |
+
ÉGLISE 427
|
429 |
+
▁PRINCIPAL 428
|
430 |
+
▁GUERRE 429
|
431 |
+
▁SANS 430
|
432 |
+
ANCIEN 431
|
433 |
+
▁CEPENDANT 432
|
434 |
+
▁RESTE 433
|
435 |
+
Œ 434
|
436 |
+
▁PENDANT 435
|
437 |
+
▁TEMPS 436
|
438 |
+
▁FOND 437
|
439 |
+
HUI 438
|
440 |
+
▁RÉGION 439
|
441 |
+
MM 440
|
442 |
+
▁MEMBRE 441
|
443 |
+
Û 442
|
444 |
+
▁TRANS 443
|
445 |
+
▁CLUB 444
|
446 |
+
BERT 445
|
447 |
+
▁DEPUIS 446
|
448 |
+
▁IMP 447
|
449 |
+
PRÈS 448
|
450 |
+
▁SEPT 449
|
451 |
+
▁NATIONAL 450
|
452 |
+
▁CONNU 451
|
453 |
+
▁VILLAGE 452
|
454 |
+
▁MORT 453
|
455 |
+
▁ENCORE 454
|
456 |
+
ORGANIS 455
|
457 |
+
HISTOIRE 456
|
458 |
+
Ù 457
|
459 |
+
▁SECOND 458
|
460 |
+
▁NOUVELLE 459
|
461 |
+
ŒUVRE 460
|
462 |
+
ORIGINE 461
|
463 |
+
UNIVERSITÉ 462
|
464 |
+
▁UTILISÉ 463
|
465 |
+
▁AUJOURD 464
|
466 |
+
▁COMMUN 465
|
467 |
+
▁FILM 466
|
468 |
+
▁FRÈRE 467
|
469 |
+
▁TITRE 468
|
470 |
+
▁DIRECT 469
|
471 |
+
ÉLECT 470
|
472 |
+
▁FEMME 471
|
473 |
+
▁HUIT 472
|
474 |
+
ÉTAIENT 473
|
475 |
+
ÉCOLE 474
|
476 |
+
▁DERNIER 475
|
477 |
+
▁MONSIEUR 476
|
478 |
+
▁IMPORTANT 477
|
479 |
+
▁PERMET 478
|
480 |
+
▁JUSQU 479
|
481 |
+
▁DROIT 480
|
482 |
+
▁CARRIÈRE 481
|
483 |
+
ARCHI 482
|
484 |
+
▁NOMBREUX 483
|
485 |
+
▁SAISON 484
|
486 |
+
▁NOTAMMENT 485
|
487 |
+
▁PIERRE 486
|
488 |
+
BOURG 487
|
489 |
+
▁DIFFÉRENT 488
|
490 |
+
▁NOUVEAU 489
|
491 |
+
▁TOUJOURS 490
|
492 |
+
▁SIÈGE 491
|
493 |
+
ÉDIT 492
|
494 |
+
▁PERSONNE 493
|
495 |
+
▁SUIVANT 494
|
496 |
+
▁CELUI 495
|
497 |
+
Ë 496
|
498 |
+
Ü 497
|
499 |
+
Q 498
|
500 |
+
Æ 499
|
501 |
+
#0 500
|
502 |
+
#1 501
|
data/lang_bpe_500/train.txt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:279dd4ab74b7c19c20543f31660564cb4dc2adddc4097356f22782928fd76ecd
|
3 |
+
size 31024469
|
data/lang_bpe_500/train_orig.txt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8868efaf3b191da9546ec193cbb0788a847b52b07d7a0c6ecedf0e07dda19c2b
|
3 |
+
size 32593161
|
data/lang_bpe_500/unigram_500.model
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:787932caf5c0adf93e850c18742279142ab33cdff5a6bd1234ad3aca2fc0b998
|
3 |
+
size 244624
|
data/lang_bpe_500/unigram_500.vocab
ADDED
@@ -0,0 +1,500 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
<blk> 0
|
2 |
+
<sos/eos> 0
|
3 |
+
<unk> 0
|
4 |
+
S -2.95044
|
5 |
+
▁ -3.52819
|
6 |
+
E -3.61069
|
7 |
+
▁DE -3.77593
|
8 |
+
' -3.80476
|
9 |
+
T -3.98568
|
10 |
+
R -4.21426
|
11 |
+
É -4.21489
|
12 |
+
▁L -4.24855
|
13 |
+
▁LA -4.30356
|
14 |
+
C -4.37675
|
15 |
+
▁LE -4.50794
|
16 |
+
A -4.51183
|
17 |
+
O -4.52633
|
18 |
+
P -4.53401
|
19 |
+
▁D -4.54343
|
20 |
+
U -4.57523
|
21 |
+
I -4.57714
|
22 |
+
▁EST -4.70374
|
23 |
+
▁IL -4.72016
|
24 |
+
ON -4.78236
|
25 |
+
N -4.81209
|
26 |
+
▁À -4.8331
|
27 |
+
▁S -4.86694
|
28 |
+
▁A -4.86931
|
29 |
+
▁ET -4.90966
|
30 |
+
▁C -4.93591
|
31 |
+
IN -4.96857
|
32 |
+
D -4.97671
|
33 |
+
G -5.00877
|
34 |
+
ER -5.02622
|
35 |
+
ES -5.05635
|
36 |
+
▁EN -5.07399
|
37 |
+
▁LES -5.12278
|
38 |
+
Y -5.12281
|
39 |
+
IS -5.12371
|
40 |
+
IT -5.1512
|
41 |
+
L -5.2095
|
42 |
+
▁DU -5.27551
|
43 |
+
AL -5.30047
|
44 |
+
ENT -5.32252
|
45 |
+
▁F -5.32693
|
46 |
+
RE -5.34073
|
47 |
+
▁PAR -5.34309
|
48 |
+
▁DES -5.37599
|
49 |
+
F -5.40391
|
50 |
+
LE -5.41817
|
51 |
+
▁UN -5.4263
|
52 |
+
▁B -5.42928
|
53 |
+
▁SE -5.45833
|
54 |
+
V -5.4948
|
55 |
+
▁AU -5.51091
|
56 |
+
AN -5.51095
|
57 |
+
B -5.52945
|
58 |
+
OR -5.57706
|
59 |
+
▁G -5.59256
|
60 |
+
RA -5.60306
|
61 |
+
ANT -5.61866
|
62 |
+
OU -5.62585
|
63 |
+
H -5.63579
|
64 |
+
UR -5.65002
|
65 |
+
▁DANS -5.65339
|
66 |
+
▁T -5.65478
|
67 |
+
RI -5.66107
|
68 |
+
▁M -5.67156
|
69 |
+
TE -5.68258
|
70 |
+
▁RE -5.70332
|
71 |
+
AR -5.71733
|
72 |
+
▁UNE -5.73579
|
73 |
+
▁ELLE -5.73837
|
74 |
+
▁V -5.77157
|
75 |
+
M -5.80446
|
76 |
+
ATION -5.80592
|
77 |
+
UL -5.82578
|
78 |
+
ÉE -5.83786
|
79 |
+
Z -5.84404
|
80 |
+
▁N -5.85083
|
81 |
+
NE -5.85186
|
82 |
+
ME -5.85253
|
83 |
+
IL -5.85976
|
84 |
+
▁P -5.8625
|
85 |
+
MENT -5.86255
|
86 |
+
IE -5.87153
|
87 |
+
EN -5.87781
|
88 |
+
▁H -5.87931
|
89 |
+
▁CON -5.91114
|
90 |
+
IR -5.92419
|
91 |
+
EST -5.9346
|
92 |
+
LI -5.93858
|
93 |
+
▁SON -5.94037
|
94 |
+
CH -5.94997
|
95 |
+
▁POUR -5.95025
|
96 |
+
È -5.97227
|
97 |
+
LA -5.98258
|
98 |
+
CE -6.00354
|
99 |
+
▁RÉ -6.00832
|
100 |
+
▁MA -6.01047
|
101 |
+
IQUE -6.01627
|
102 |
+
▁CE -6.01653
|
103 |
+
EUR -6.0238
|
104 |
+
▁SA -6.03806
|
105 |
+
NT -6.09711
|
106 |
+
▁SONT -6.09794
|
107 |
+
US -6.10094
|
108 |
+
EMENT -6.1151
|
109 |
+
▁IN -6.12361
|
110 |
+
▁PRO -6.12491
|
111 |
+
▁SUR -6.14045
|
112 |
+
RÉ -6.14134
|
113 |
+
RO -6.14433
|
114 |
+
▁DÉ -6.14668
|
115 |
+
AIT -6.14836
|
116 |
+
DE -6.15261
|
117 |
+
▁QU -6.15573
|
118 |
+
▁É -6.15607
|
119 |
+
EL -6.15758
|
120 |
+
OL -6.15966
|
121 |
+
AIRE -6.17272
|
122 |
+
ION -6.17315
|
123 |
+
UN -6.17769
|
124 |
+
CHE -6.18594
|
125 |
+
K -6.22088
|
126 |
+
▁CH -6.22102
|
127 |
+
AT -6.25802
|
128 |
+
DI -6.2675
|
129 |
+
AG -6.28667
|
130 |
+
TRE -6.28903
|
131 |
+
TI -6.29845
|
132 |
+
OM -6.31474
|
133 |
+
ELLE -6.32241
|
134 |
+
▁SU -6.3278
|
135 |
+
AM -6.3377
|
136 |
+
▁PO -6.33994
|
137 |
+
▁MO -6.3488
|
138 |
+
IM -6.35296
|
139 |
+
▁PAS -6.35478
|
140 |
+
VI -6.37582
|
141 |
+
TÉ -6.39092
|
142 |
+
FF -6.39812
|
143 |
+
▁CA -6.40222
|
144 |
+
TER -6.40368
|
145 |
+
END -6.40953
|
146 |
+
CETTE -6.41276
|
147 |
+
QUE -6.41398
|
148 |
+
TA -6.41647
|
149 |
+
LO -6.42665
|
150 |
+
▁PLUS -6.43945
|
151 |
+
ILLE -6.44629
|
152 |
+
QU -6.47038
|
153 |
+
▁NE -6.5018
|
154 |
+
▁RO -6.51476
|
155 |
+
▁JE -6.52588
|
156 |
+
▁QUE -6.53357
|
157 |
+
▁DEUX -6.54569
|
158 |
+
UT -6.56102
|
159 |
+
CI -6.56669
|
160 |
+
ALE -6.5754
|
161 |
+
▁AVEC -6.57591
|
162 |
+
UNE -6.58781
|
163 |
+
AB -6.59318
|
164 |
+
ITÉ -6.59338
|
165 |
+
IC -6.59991
|
166 |
+
GE -6.60459
|
167 |
+
MA -6.61775
|
168 |
+
AGE -6.61823
|
169 |
+
AND -6.62569
|
170 |
+
AC -6.63217
|
171 |
+
OIS -6.6338
|
172 |
+
▁CO -6.645
|
173 |
+
▁COMME -6.65494
|
174 |
+
PH -6.65508
|
175 |
+
VER -6.6556
|
176 |
+
SSE -6.66542
|
177 |
+
AV -6.67119
|
178 |
+
▁QUI -6.67297
|
179 |
+
TU -6.68589
|
180 |
+
▁BA -6.68726
|
181 |
+
NÉ -6.69021
|
182 |
+
ID -6.69194
|
183 |
+
PORT -6.70656
|
184 |
+
▁VO -6.71041
|
185 |
+
IER -6.71949
|
186 |
+
▁ÉTÉ -6.74721
|
187 |
+
MI -6.74758
|
188 |
+
IV -6.74785
|
189 |
+
ÈRE -6.75787
|
190 |
+
ARD -6.76178
|
191 |
+
AU -6.76538
|
192 |
+
OC -6.77933
|
193 |
+
▁EX -6.7813
|
194 |
+
▁DI -6.79024
|
195 |
+
▁CHA -6.79452
|
196 |
+
ÉRI -6.79873
|
197 |
+
MB -6.82177
|
198 |
+
IGN -6.82265
|
199 |
+
▁RA -6.82444
|
200 |
+
DU -6.8371
|
201 |
+
ISTE -6.83906
|
202 |
+
TH -6.84131
|
203 |
+
AIS -6.84576
|
204 |
+
INE -6.8465
|
205 |
+
ANG -6.84672
|
206 |
+
▁COMP -6.85555
|
207 |
+
▁OU -6.85622
|
208 |
+
AUX -6.86386
|
209 |
+
IÈRE -6.86924
|
210 |
+
ORD -6.86985
|
211 |
+
X -6.87628
|
212 |
+
▁PR -6.87928
|
213 |
+
▁ÉGALEMENT -6.88091
|
214 |
+
▁CONS -6.88205
|
215 |
+
LU -6.88394
|
216 |
+
▁SES -6.88761
|
217 |
+
▁SOU -6.89325
|
218 |
+
POS -6.89602
|
219 |
+
CTION -6.89668
|
220 |
+
NA -6.90352
|
221 |
+
TO -6.90354
|
222 |
+
OUR -6.90604
|
223 |
+
ART -6.91586
|
224 |
+
Ô -6.91748
|
225 |
+
UE -6.92409
|
226 |
+
MÉ -6.94067
|
227 |
+
TION -6.94666
|
228 |
+
▁NOM -6.94742
|
229 |
+
▁MAR -6.94757
|
230 |
+
AS -6.95132
|
231 |
+
MAN -6.95299
|
232 |
+
LÉ -6.9536
|
233 |
+
ILL -6.95456
|
234 |
+
▁COM -6.96091
|
235 |
+
▁PA -6.96811
|
236 |
+
W -6.97204
|
237 |
+
TURE -6.97208
|
238 |
+
MIN -6.98243
|
239 |
+
▁MAIS -6.9843
|
240 |
+
VE -6.98613
|
241 |
+
ITE -6.98699
|
242 |
+
IX -6.98924
|
243 |
+
ANCE -6.99379
|
244 |
+
ENCE -6.99452
|
245 |
+
OP -7.00439
|
246 |
+
EMP -7.01487
|
247 |
+
ALL -7.01562
|
248 |
+
EUX -7.0194
|
249 |
+
▁K -7.02409
|
250 |
+
▁ÉTAIT -7.02541
|
251 |
+
▁FAIT -7.03542
|
252 |
+
J -7.03897
|
253 |
+
▁TOUT -7.04055
|
254 |
+
TIQUE -7.04819
|
255 |
+
PE -7.0542
|
256 |
+
UV -7.06736
|
257 |
+
▁PLA -7.06843
|
258 |
+
IRE -7.07173
|
259 |
+
ENS -7.07267
|
260 |
+
▁Y -7.08027
|
261 |
+
IG -7.08324
|
262 |
+
VO -7.08886
|
263 |
+
VEN -7.08955
|
264 |
+
ABLE -7.09118
|
265 |
+
▁AUX -7.09488
|
266 |
+
▁MON -7.10007
|
267 |
+
ÊT -7.10148
|
268 |
+
▁AUSSI -7.10337
|
269 |
+
▁FOR -7.1046
|
270 |
+
TRA -7.10535
|
271 |
+
LES -7.10662
|
272 |
+
NI -7.11363
|
273 |
+
TRI -7.1155
|
274 |
+
▁DIS -7.12849
|
275 |
+
▁MI -7.13277
|
276 |
+
▁TRA -7.13732
|
277 |
+
▁CENT -7.14084
|
278 |
+
▁TO -7.14595
|
279 |
+
ÉS -7.14696
|
280 |
+
TTE -7.14853
|
281 |
+
EX -7.15105
|
282 |
+
▁APP -7.15361
|
283 |
+
▁GRAND -7.1556
|
284 |
+
▁AR -7.15993
|
285 |
+
INS -7.16085
|
286 |
+
▁NO -7.16217
|
287 |
+
▁DÉC -7.16942
|
288 |
+
MO -7.17708
|
289 |
+
▁BR -7.17793
|
290 |
+
▁AN -7.17923
|
291 |
+
ÉES -7.18741
|
292 |
+
ONNE -7.18794
|
293 |
+
▁PRÉ -7.19409
|
294 |
+
▁ME -7.20081
|
295 |
+
▁LUI -7.20722
|
296 |
+
▁FA -7.21393
|
297 |
+
TEN -7.22018
|
298 |
+
AUT -7.22063
|
299 |
+
BL -7.22212
|
300 |
+
IVE -7.22299
|
301 |
+
Ç -7.22345
|
302 |
+
 -7.23831
|
303 |
+
▁FUT -7.24785
|
304 |
+
▁SO -7.25014
|
305 |
+
▁TROIS -7.25306
|
306 |
+
EAU -7.25746
|
307 |
+
▁ALORS -7.25977
|
308 |
+
TEUR -7.26764
|
309 |
+
▁MÉ -7.27978
|
310 |
+
DA -7.28468
|
311 |
+
▁J -7.3032
|
312 |
+
▁ON -7.30519
|
313 |
+
▁JA -7.30584
|
314 |
+
GUE -7.31225
|
315 |
+
▁LEUR -7.31485
|
316 |
+
ÈME -7.33414
|
317 |
+
▁ONT -7.33777
|
318 |
+
▁W -7.33813
|
319 |
+
▁MÊME -7.35801
|
320 |
+
ACC -7.36611
|
321 |
+
IEN -7.36761
|
322 |
+
▁SAINT -7.37041
|
323 |
+
AINE -7.3713
|
324 |
+
À -7.37841
|
325 |
+
▁VA -7.38049
|
326 |
+
▁FIN -7.38075
|
327 |
+
ICI -7.38685
|
328 |
+
ITION -7.39214
|
329 |
+
▁CES -7.39695
|
330 |
+
▁COUR -7.40965
|
331 |
+
UM -7.41412
|
332 |
+
BRE -7.42144
|
333 |
+
▁PEU -7.42661
|
334 |
+
Î -7.43206
|
335 |
+
TRO -7.44221
|
336 |
+
IENNE -7.4553
|
337 |
+
IDE -7.46487
|
338 |
+
▁REP -7.46845
|
339 |
+
▁JU -7.48234
|
340 |
+
▁VILLE -7.48283
|
341 |
+
▁APRÈS -7.4882
|
342 |
+
▁ÊTRE -7.50185
|
343 |
+
▁RI -7.50735
|
344 |
+
▁VOUS -7.50844
|
345 |
+
▁PLUSIEURS -7.50953
|
346 |
+
▁NA -7.51404
|
347 |
+
▁TH -7.52054
|
348 |
+
ANTE -7.52681
|
349 |
+
TRÈS -7.55566
|
350 |
+
▁SITUÉ -7.55585
|
351 |
+
▁TOUR -7.57672
|
352 |
+
▁PARTIE -7.58074
|
353 |
+
▁FRANC -7.58851
|
354 |
+
▁QUATRE -7.60502
|
355 |
+
▁PER -7.60579
|
356 |
+
VIENT -7.60672
|
357 |
+
▁ENTRE -7.60832
|
358 |
+
ING -7.61424
|
359 |
+
▁PREMIER -7.61747
|
360 |
+
▁CAR -7.61875
|
361 |
+
LON -7.62346
|
362 |
+
FORM -7.62502
|
363 |
+
BA -7.62598
|
364 |
+
VÉ -7.62913
|
365 |
+
IFI -7.63386
|
366 |
+
AIENT -7.63434
|
367 |
+
HI -7.63915
|
368 |
+
▁JO -7.64691
|
369 |
+
TIF -7.64803
|
370 |
+
TANT -7.64813
|
371 |
+
▁PEUT -7.6495
|
372 |
+
STRU -7.65091
|
373 |
+
▁PARTI -7.6818
|
374 |
+
▁COMMUNE -7.69343
|
375 |
+
Ê -7.70573
|
376 |
+
▁REN -7.71927
|
377 |
+
ATEUR -7.72429
|
378 |
+
▁BIEN -7.72773
|
379 |
+
▁PRI -7.72972
|
380 |
+
▁RUE -7.73029
|
381 |
+
▁MONT -7.73878
|
382 |
+
▁PI -7.74146
|
383 |
+
▁JOUR -7.75127
|
384 |
+
AUTRES -7.75678
|
385 |
+
IBLE -7.76348
|
386 |
+
APP -7.76413
|
387 |
+
▁CINQ -7.77455
|
388 |
+
▁PREMIÈRE -7.7773
|
389 |
+
▁MARI -7.78024
|
390 |
+
CK -7.78135
|
391 |
+
▁TROUVE -7.81049
|
392 |
+
ISSE -7.82038
|
393 |
+
▁DONC -7.82363
|
394 |
+
▁GROUPE -7.84207
|
395 |
+
▁JOUE -7.84684
|
396 |
+
▁VERS -7.84731
|
397 |
+
JO -7.84878
|
398 |
+
ÎT -7.85732
|
399 |
+
ÉTAT -7.86426
|
400 |
+
▁FRANÇAIS -7.86688
|
401 |
+
▁LORS -7.90172
|
402 |
+
▁ENSUITE -7.90256
|
403 |
+
▁PUIS -7.90451
|
404 |
+
▁PETIT -7.90487
|
405 |
+
EUSE -7.90881
|
406 |
+
▁AINSI -7.91868
|
407 |
+
▁TRAVAIL -7.92917
|
408 |
+
▁NOUS -7.93438
|
409 |
+
▁QUELQUE -7.93802
|
410 |
+
▁COLL -7.94043
|
411 |
+
▁CERTAIN -7.94047
|
412 |
+
▁LIEU -7.94101
|
413 |
+
▁PRÉSENT -7.94744
|
414 |
+
ÉQUIPE -7.95119
|
415 |
+
▁VINGT -7.96329
|
416 |
+
▁PÈRE -7.96773
|
417 |
+
▁FAMILLE -7.96931
|
418 |
+
▁RU -7.97689
|
419 |
+
▁RENCONTRE -7.98339
|
420 |
+
▁ESPÈCE -7.98434
|
421 |
+
▁FILS -7.99492
|
422 |
+
▁GÉNÉRAL -7.99556
|
423 |
+
▁OB -7.99882
|
424 |
+
▁ÉV -8.00993
|
425 |
+
Ï -8.03049
|
426 |
+
▁PAYS -8.03417
|
427 |
+
▁ANNÉE -8.03678
|
428 |
+
ÉGLISE -8.04578
|
429 |
+
▁PRINCIPAL -8.05694
|
430 |
+
▁GUERRE -8.06876
|
431 |
+
▁SANS -8.07727
|
432 |
+
ANCIEN -8.09295
|
433 |
+
▁CEPENDANT -8.11907
|
434 |
+
▁RESTE -8.13532
|
435 |
+
Œ -8.14688
|
436 |
+
▁PENDANT -8.15932
|
437 |
+
▁TEMPS -8.17676
|
438 |
+
▁FOND -8.18238
|
439 |
+
HUI -8.18803
|
440 |
+
▁RÉGION -8.19186
|
441 |
+
MM -8.19787
|
442 |
+
▁MEMBRE -8.20795
|
443 |
+
Û -8.20852
|
444 |
+
▁TRANS -8.22004
|
445 |
+
▁CLUB -8.2233
|
446 |
+
BERT -8.23451
|
447 |
+
▁DEPUIS -8.24704
|
448 |
+
▁IMP -8.24718
|
449 |
+
PRÈS -8.24744
|
450 |
+
▁SEPT -8.25346
|
451 |
+
▁NATIONAL -8.27048
|
452 |
+
▁CONNU -8.27896
|
453 |
+
▁VILLAGE -8.28281
|
454 |
+
▁MORT -8.28495
|
455 |
+
▁ENCORE -8.2883
|
456 |
+
ORGANIS -8.29212
|
457 |
+
HISTOIRE -8.29341
|
458 |
+
Ù -8.30578
|
459 |
+
▁SECOND -8.31309
|
460 |
+
▁NOUVELLE -8.32129
|
461 |
+
ŒUVRE -8.32595
|
462 |
+
ORIGINE -8.33151
|
463 |
+
UNIVERSITÉ -8.334
|
464 |
+
▁UTILISÉ -8.33568
|
465 |
+
▁AUJOURD -8.33703
|
466 |
+
▁COMMUN -8.34805
|
467 |
+
▁FILM -8.35268
|
468 |
+
▁FRÈRE -8.35616
|
469 |
+
▁TITRE -8.35992
|
470 |
+
▁DIRECT -8.37008
|
471 |
+
ÉLECT -8.38125
|
472 |
+
▁FEMME -8.39511
|
473 |
+
▁HUIT -8.39969
|
474 |
+
ÉTAIENT -8.40294
|
475 |
+
ÉCOLE -8.40896
|
476 |
+
▁DERNIER -8.41137
|
477 |
+
▁MONSIEUR -8.41865
|
478 |
+
▁IMPORTANT -8.41901
|
479 |
+
▁PERMET -8.42395
|
480 |
+
▁JUSQU -8.43115
|
481 |
+
▁DROIT -8.43711
|
482 |
+
▁CARRIÈRE -8.43793
|
483 |
+
ARCHI -8.44301
|
484 |
+
▁NOMBREUX -8.44313
|
485 |
+
▁SAISON -8.44828
|
486 |
+
▁NOTAMMENT -8.4518
|
487 |
+
▁PIERRE -8.46428
|
488 |
+
BOURG -8.46772
|
489 |
+
▁DIFFÉRENT -8.46861
|
490 |
+
▁NOUVEAU -8.48184
|
491 |
+
▁TOUJOURS -8.48767
|
492 |
+
▁SIÈGE -8.50391
|
493 |
+
ÉDIT -8.51508
|
494 |
+
▁PERSONNE -8.51771
|
495 |
+
▁SUIVANT -8.51936
|
496 |
+
▁CELUI -8.52301
|
497 |
+
Ë -9.3779
|
498 |
+
Ü -9.47316
|
499 |
+
Q -9.58463
|
500 |
+
Æ -11.4978
|
data/lang_bpe_500/words.txt
ADDED
The diff for this file is too large to render.
See raw diff
|
|
data/lang_bpe_500/words_no_ids.txt
ADDED
The diff for this file is too large to render.
See raw diff
|
|
data/lm/3gram.arpa
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1be4fda53d5a4b94d700114f5a5505173da3539d20931957e5c5be4ad8133a7f
|
3 |
+
size 152616228
|
data/lm/4gram.arpa
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7fddfa46e7ebac2fa1a1ce3f8ec484a1f0732fda0dda36fb08afb38ae1c89206
|
3 |
+
size 314528045
|
data/lm/G_3_gram.fst.txt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c20bc2acc13d5f994d45c043efca5912e2de4c35c8f02b4e4f9fec3b40fd7394
|
3 |
+
size 201842208
|
data/lm/G_3_gram.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9d020d44e201b49b73d86fab4de2e27239b65aa6964af798ff6084c663c8c0f7
|
3 |
+
size 125652395
|
data/lm/G_4_gram.fst.txt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4bacb19ab623009603f2880cf406b2bc58e12e0e3fc199bb22dca06f2cc3dfc3
|
3 |
+
size 424054640
|
decoding_results/fast_beam_search/errs-test-cv-beam_20.0_max_contexts_8_max_states_64-epoch-29-avg-9-streaming-chunk-size-64-beam-20.0-max-contexts-8-max-states-64-use-averaged-model.txt
ADDED
The diff for this file is too large to render.
See raw diff
|
|
decoding_results/fast_beam_search/log-decode-epoch-29-avg-9-streaming-chunk-size-64-beam-20.0-max-contexts-8-max-states-64-use-averaged-model-2023-04-03-17-31-21
ADDED
@@ -0,0 +1,82 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
2023-04-03 17:31:21,345 INFO [decode.py:659] Decoding started
|
2 |
+
2023-04-03 17:31:21,345 INFO [decode.py:665] Device: cuda:0
|
3 |
+
2023-04-03 17:31:21,347 INFO [decode.py:675] {'best_train_loss': inf, 'best_valid_loss': inf, 'best_train_epoch': -1, 'best_valid_epoch': -1, 'batch_idx_train': 0, 'log_interval': 50, 'reset_interval': 200, 'valid_interval': 3000, 'feature_dim': 80, 'subsampling_factor': 4, 'warm_step': 2000, 'env_info': {'k2-version': '1.23.4', 'k2-build-type': 'Release', 'k2-with-cuda': True, 'k2-git-sha1': '62e404dd3f3a811d73e424199b3408e309c06e1a', 'k2-git-date': 'Mon Jan 30 02:26:16 2023', 'lhotse-version': '1.12.0.dev+git.3ccfeb7.clean', 'torch-version': '1.13.0', 'torch-cuda-available': True, 'torch-cuda-version': '11.7', 'python-version': '3.8', 'icefall-git-branch': 'master', 'icefall-git-sha1': 'd74822d-dirty', 'icefall-git-date': 'Tue Mar 21 21:35:32 2023', 'icefall-path': '/home/lishaojie/icefall', 'k2-path': '/home/lishaojie/.conda/envs/env_lishaojie/lib/python3.8/site-packages/k2/__init__.py', 'lhotse-path': '/home/lishaojie/.conda/envs/env_lishaojie/lib/python3.8/site-packages/lhotse/__init__.py', 'hostname': 'cnc533', 'IP address': '127.0.1.1'}, 'epoch': 29, 'iter': 0, 'avg': 9, 'use_averaged_model': True, 'exp_dir': PosixPath('pruned_transducer_stateless7_streaming/exp1'), 'bpe_model': 'data/lang_bpe_500/bpe.model', 'lang_dir': PosixPath('data/lang_bpe_500'), 'decoding_method': 'fast_beam_search', 'beam_size': 4, 'beam': 20.0, 'ngram_lm_scale': 0.01, 'max_contexts': 8, 'max_states': 64, 'context_size': 2, 'max_sym_per_frame': 1, 'num_paths': 200, 'nbest_scale': 0.5, 'num_encoder_layers': '2,4,3,2,4', 'feedforward_dims': '1024,1024,2048,2048,1024', 'nhead': '8,8,8,8,8', 'encoder_dims': '384,384,384,384,384', 'attention_dims': '192,192,192,192,192', 'encoder_unmasked_dims': '256,256,256,256,256', 'zipformer_downsampling_factors': '1,2,4,8,2', 'cnn_module_kernels': '31,31,31,31,31', 'decoder_dim': 512, 'joiner_dim': 512, 'short_chunk_size': 50, 'num_left_chunks': 4, 'decode_chunk_len': 64, 'manifest_dir': PosixPath('data/fbank'), 'max_duration': 200, 'bucketing_sampler': True, 'num_buckets': 30, 'concatenate_cuts': False, 'duration_factor': 1.0, 'gap': 1.0, 'on_the_fly_feats': False, 'shuffle': True, 'drop_last': True, 'return_cuts': True, 'num_workers': 2, 'enable_spec_aug': True, 'spec_aug_time_warp_factor': 80, 'enable_musan': True, 'input_strategy': 'PrecomputedFeatures', 'res_dir': PosixPath('pruned_transducer_stateless7_streaming/exp1/fast_beam_search'), 'suffix': 'epoch-29-avg-9-streaming-chunk-size-64-beam-20.0-max-contexts-8-max-states-64-use-averaged-model', 'blank_id': 0, 'unk_id': 2, 'vocab_size': 500}
|
4 |
+
2023-04-03 17:31:21,347 INFO [decode.py:677] About to create model
|
5 |
+
2023-04-03 17:31:21,749 INFO [zipformer.py:405] At encoder stack 4, which has downsampling_factor=2, we will combine the outputs of layers 1 and 3, with downsampling_factors=2 and 8.
|
6 |
+
2023-04-03 17:31:21,757 INFO [decode.py:748] Calculating the averaged model over epoch range from 20 (excluded) to 29
|
7 |
+
2023-04-03 17:31:23,870 INFO [decode.py:782] Number of model parameters: 70369391
|
8 |
+
2023-04-03 17:31:23,871 INFO [commonvoice_fr.py:406] About to get test cuts
|
9 |
+
2023-04-03 17:31:26,743 INFO [decode.py:560] batch 0/?, cuts processed until now is 27
|
10 |
+
2023-04-03 17:31:31,854 INFO [zipformer.py:2441] attn_weights_entropy = tensor([1.8338, 1.6836, 1.5364, 1.7643, 2.1272, 2.0399, 1.7407, 1.5925],
|
11 |
+
device='cuda:0'), covar=tensor([0.0367, 0.0349, 0.0585, 0.0342, 0.0213, 0.0459, 0.0350, 0.0414],
|
12 |
+
device='cuda:0'), in_proj_covar=tensor([0.0097, 0.0103, 0.0143, 0.0108, 0.0097, 0.0111, 0.0100, 0.0110],
|
13 |
+
device='cuda:0'), out_proj_covar=tensor([7.4944e-05, 7.9098e-05, 1.1173e-04, 8.2734e-05, 7.5248e-05, 8.1783e-05,
|
14 |
+
7.3728e-05, 8.3511e-05], device='cuda:0')
|
15 |
+
2023-04-03 17:31:36,035 INFO [decode.py:560] batch 20/?, cuts processed until now is 604
|
16 |
+
2023-04-03 17:31:46,332 INFO [decode.py:560] batch 40/?, cuts processed until now is 1209
|
17 |
+
2023-04-03 17:31:54,962 INFO [decode.py:560] batch 60/?, cuts processed until now is 1866
|
18 |
+
2023-04-03 17:32:04,386 INFO [decode.py:560] batch 80/?, cuts processed until now is 2422
|
19 |
+
2023-04-03 17:32:13,074 INFO [decode.py:560] batch 100/?, cuts processed until now is 3088
|
20 |
+
2023-04-03 17:32:14,054 INFO [zipformer.py:2441] attn_weights_entropy = tensor([2.2042, 1.9050, 2.4510, 1.6668, 2.2104, 2.4274, 1.7766, 2.5439],
|
21 |
+
device='cuda:0'), covar=tensor([0.1183, 0.2019, 0.1326, 0.1735, 0.0824, 0.1156, 0.2855, 0.0688],
|
22 |
+
device='cuda:0'), in_proj_covar=tensor([0.0188, 0.0202, 0.0188, 0.0186, 0.0170, 0.0210, 0.0213, 0.0194],
|
23 |
+
device='cuda:0'), out_proj_covar=tensor([0.0002, 0.0002, 0.0002, 0.0002, 0.0002, 0.0002, 0.0002, 0.0002],
|
24 |
+
device='cuda:0')
|
25 |
+
2023-04-03 17:32:22,296 INFO [decode.py:560] batch 120/?, cuts processed until now is 3672
|
26 |
+
2023-04-03 17:32:28,822 INFO [zipformer.py:2441] attn_weights_entropy = tensor([2.4821, 2.5480, 2.1061, 1.0486, 2.2957, 1.9745, 1.9215, 2.3727],
|
27 |
+
device='cuda:0'), covar=tensor([0.0910, 0.0618, 0.1588, 0.1982, 0.1334, 0.2716, 0.2164, 0.0818],
|
28 |
+
device='cuda:0'), in_proj_covar=tensor([0.0167, 0.0187, 0.0196, 0.0178, 0.0206, 0.0207, 0.0220, 0.0192],
|
29 |
+
device='cuda:0'), out_proj_covar=tensor([0.0002, 0.0002, 0.0002, 0.0002, 0.0002, 0.0002, 0.0002, 0.0002],
|
30 |
+
device='cuda:0')
|
31 |
+
2023-04-03 17:32:30,833 INFO [decode.py:560] batch 140/?, cuts processed until now is 4348
|
32 |
+
2023-04-03 17:32:39,389 INFO [decode.py:560] batch 160/?, cuts processed until now is 5035
|
33 |
+
2023-04-03 17:32:41,458 INFO [zipformer.py:2441] attn_weights_entropy = tensor([0.5151, 1.7409, 1.7151, 0.9089, 1.8593, 1.9981, 2.0410, 1.5445],
|
34 |
+
device='cuda:0'), covar=tensor([0.0868, 0.0573, 0.0496, 0.0555, 0.0400, 0.0600, 0.0273, 0.0678],
|
35 |
+
device='cuda:0'), in_proj_covar=tensor([0.0119, 0.0146, 0.0125, 0.0119, 0.0128, 0.0127, 0.0138, 0.0146],
|
36 |
+
device='cuda:0'), out_proj_covar=tensor([8.7160e-05, 1.0465e-04, 8.8840e-05, 8.3773e-05, 8.9721e-05, 9.0117e-05,
|
37 |
+
9.8475e-05, 1.0448e-04], device='cuda:0')
|
38 |
+
2023-04-03 17:32:48,122 INFO [decode.py:560] batch 180/?, cuts processed until now is 5674
|
39 |
+
2023-04-03 17:32:56,943 INFO [decode.py:560] batch 200/?, cuts processed until now is 6301
|
40 |
+
2023-04-03 17:33:05,928 INFO [decode.py:560] batch 220/?, cuts processed until now is 6914
|
41 |
+
2023-04-03 17:33:14,496 INFO [zipformer.py:2441] attn_weights_entropy = tensor([2.3674, 2.1843, 2.3572, 1.7864, 2.2139, 2.4677, 2.4855, 1.9260],
|
42 |
+
device='cuda:0'), covar=tensor([0.0445, 0.0549, 0.0557, 0.0678, 0.1166, 0.0458, 0.0437, 0.0916],
|
43 |
+
device='cuda:0'), in_proj_covar=tensor([0.0128, 0.0133, 0.0136, 0.0116, 0.0123, 0.0135, 0.0136, 0.0158],
|
44 |
+
device='cuda:0'), out_proj_covar=tensor([0.0002, 0.0002, 0.0002, 0.0001, 0.0002, 0.0002, 0.0002, 0.0002],
|
45 |
+
device='cuda:0')
|
46 |
+
2023-04-03 17:33:14,778 INFO [decode.py:560] batch 240/?, cuts processed until now is 7540
|
47 |
+
2023-04-03 17:33:23,635 INFO [decode.py:560] batch 260/?, cuts processed until now is 8161
|
48 |
+
2023-04-03 17:33:27,250 INFO [zipformer.py:2441] attn_weights_entropy = tensor([2.4658, 2.5376, 2.1602, 1.2614, 2.3429, 2.0655, 1.9856, 2.4390],
|
49 |
+
device='cuda:0'), covar=tensor([0.0989, 0.0582, 0.1814, 0.1883, 0.1155, 0.2226, 0.2119, 0.0799],
|
50 |
+
device='cuda:0'), in_proj_covar=tensor([0.0167, 0.0187, 0.0196, 0.0178, 0.0206, 0.0207, 0.0220, 0.0192],
|
51 |
+
device='cuda:0'), out_proj_covar=tensor([0.0002, 0.0002, 0.0002, 0.0002, 0.0002, 0.0002, 0.0002, 0.0002],
|
52 |
+
device='cuda:0')
|
53 |
+
2023-04-03 17:33:32,012 INFO [decode.py:560] batch 280/?, cuts processed until now is 8857
|
54 |
+
2023-04-03 17:33:39,857 INFO [zipformer.py:2441] attn_weights_entropy = tensor([2.5870, 2.6057, 2.1060, 1.0188, 2.3087, 2.0434, 1.9922, 2.3943],
|
55 |
+
device='cuda:0'), covar=tensor([0.0943, 0.0679, 0.1608, 0.1959, 0.1304, 0.2489, 0.2117, 0.0787],
|
56 |
+
device='cuda:0'), in_proj_covar=tensor([0.0167, 0.0187, 0.0196, 0.0178, 0.0206, 0.0207, 0.0220, 0.0192],
|
57 |
+
device='cuda:0'), out_proj_covar=tensor([0.0002, 0.0002, 0.0002, 0.0002, 0.0002, 0.0002, 0.0002, 0.0002],
|
58 |
+
device='cuda:0')
|
59 |
+
2023-04-03 17:33:40,207 INFO [decode.py:560] batch 300/?, cuts processed until now is 9574
|
60 |
+
2023-04-03 17:33:49,272 INFO [decode.py:560] batch 320/?, cuts processed until now is 10169
|
61 |
+
2023-04-03 17:33:57,944 INFO [decode.py:560] batch 340/?, cuts processed until now is 10810
|
62 |
+
2023-04-03 17:34:06,479 INFO [decode.py:560] batch 360/?, cuts processed until now is 11452
|
63 |
+
2023-04-03 17:34:14,029 INFO [zipformer.py:2441] attn_weights_entropy = tensor([2.5451, 2.5973, 2.1309, 1.0323, 2.3093, 2.0920, 1.9761, 2.4208],
|
64 |
+
device='cuda:0'), covar=tensor([0.0829, 0.0689, 0.1334, 0.1924, 0.1226, 0.2190, 0.2106, 0.0793],
|
65 |
+
device='cuda:0'), in_proj_covar=tensor([0.0167, 0.0187, 0.0196, 0.0178, 0.0206, 0.0207, 0.0220, 0.0192],
|
66 |
+
device='cuda:0'), out_proj_covar=tensor([0.0002, 0.0002, 0.0002, 0.0002, 0.0002, 0.0002, 0.0002, 0.0002],
|
67 |
+
device='cuda:0')
|
68 |
+
2023-04-03 17:34:14,816 INFO [decode.py:560] batch 380/?, cuts processed until now is 12133
|
69 |
+
2023-04-03 17:34:24,080 INFO [decode.py:560] batch 400/?, cuts processed until now is 12706
|
70 |
+
2023-04-03 17:34:33,187 INFO [decode.py:560] batch 420/?, cuts processed until now is 13299
|
71 |
+
2023-04-03 17:34:42,380 INFO [decode.py:560] batch 440/?, cuts processed until now is 13891
|
72 |
+
2023-04-03 17:34:51,250 INFO [decode.py:560] batch 460/?, cuts processed until now is 14515
|
73 |
+
2023-04-03 17:34:59,929 INFO [decode.py:560] batch 480/?, cuts processed until now is 15158
|
74 |
+
2023-04-03 17:35:08,659 INFO [decode.py:560] batch 500/?, cuts processed until now is 15743
|
75 |
+
2023-04-03 17:35:11,772 INFO [decode.py:576] The transcripts are stored in pruned_transducer_stateless7_streaming/exp1/fast_beam_search/recogs-test-cv-beam_20.0_max_contexts_8_max_states_64-epoch-29-avg-9-streaming-chunk-size-64-beam-20.0-max-contexts-8-max-states-64-use-averaged-model.txt
|
76 |
+
2023-04-03 17:35:12,013 INFO [utils.py:558] [test-cv-beam_20.0_max_contexts_8_max_states_64] %WER 10.25% [16082 / 156915, 1180 ins, 1721 del, 13181 sub ]
|
77 |
+
2023-04-03 17:35:12,601 INFO [decode.py:589] Wrote detailed error stats to pruned_transducer_stateless7_streaming/exp1/fast_beam_search/errs-test-cv-beam_20.0_max_contexts_8_max_states_64-epoch-29-avg-9-streaming-chunk-size-64-beam-20.0-max-contexts-8-max-states-64-use-averaged-model.txt
|
78 |
+
2023-04-03 17:35:12,601 INFO [decode.py:609]
|
79 |
+
For test-cv, WER of different settings are:
|
80 |
+
beam_20.0_max_contexts_8_max_states_64 10.25 best for test-cv
|
81 |
+
|
82 |
+
2023-04-03 17:35:12,601 INFO [decode.py:808] Done!
|
decoding_results/fast_beam_search/recogs-test-cv-beam_20.0_max_contexts_8_max_states_64-epoch-29-avg-9-streaming-chunk-size-64-beam-20.0-max-contexts-8-max-states-64-use-averaged-model.txt
ADDED
The diff for this file is too large to render.
See raw diff
|
|
decoding_results/fast_beam_search/wer-summary-test-cv-beam_20.0_max_contexts_8_max_states_64.txt
ADDED
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
1 |
+
settings WER
|
2 |
+
beam_20.0_max_contexts_8_max_states_64 10.25
|
decoding_results/greedy_search/errs-test-cv-greedy_search-epoch-29-avg-9-streaming-chunk-size-64-context-2-max-sym-per-frame-1-use-averaged-model.txt
ADDED
The diff for this file is too large to render.
See raw diff
|
|
decoding_results/greedy_search/log-decode-epoch-29-avg-9-streaming-chunk-size-64-context-2-max-sym-per-frame-1-use-averaged-model-2023-04-03-17-20-40
ADDED
@@ -0,0 +1,52 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
2023-04-03 17:20:40,951 INFO [decode.py:659] Decoding started
|
2 |
+
2023-04-03 17:20:40,952 INFO [decode.py:665] Device: cuda:0
|
3 |
+
2023-04-03 17:20:40,953 INFO [decode.py:675] {'best_train_loss': inf, 'best_valid_loss': inf, 'best_train_epoch': -1, 'best_valid_epoch': -1, 'batch_idx_train': 0, 'log_interval': 50, 'reset_interval': 200, 'valid_interval': 3000, 'feature_dim': 80, 'subsampling_factor': 4, 'warm_step': 2000, 'env_info': {'k2-version': '1.23.4', 'k2-build-type': 'Release', 'k2-with-cuda': True, 'k2-git-sha1': '62e404dd3f3a811d73e424199b3408e309c06e1a', 'k2-git-date': 'Mon Jan 30 02:26:16 2023', 'lhotse-version': '1.12.0.dev+git.3ccfeb7.clean', 'torch-version': '1.13.0', 'torch-cuda-available': True, 'torch-cuda-version': '11.7', 'python-version': '3.8', 'icefall-git-branch': 'master', 'icefall-git-sha1': 'd74822d-dirty', 'icefall-git-date': 'Tue Mar 21 21:35:32 2023', 'icefall-path': '/home/lishaojie/icefall', 'k2-path': '/home/lishaojie/.conda/envs/env_lishaojie/lib/python3.8/site-packages/k2/__init__.py', 'lhotse-path': '/home/lishaojie/.conda/envs/env_lishaojie/lib/python3.8/site-packages/lhotse/__init__.py', 'hostname': 'cnc533', 'IP address': '127.0.1.1'}, 'epoch': 29, 'iter': 0, 'avg': 9, 'use_averaged_model': True, 'exp_dir': PosixPath('pruned_transducer_stateless7_streaming/exp1'), 'bpe_model': 'data/lang_bpe_500/bpe.model', 'lang_dir': PosixPath('data/lang_bpe_500'), 'decoding_method': 'greedy_search', 'beam_size': 4, 'beam': 20.0, 'ngram_lm_scale': 0.01, 'max_contexts': 8, 'max_states': 64, 'context_size': 2, 'max_sym_per_frame': 1, 'num_paths': 200, 'nbest_scale': 0.5, 'num_encoder_layers': '2,4,3,2,4', 'feedforward_dims': '1024,1024,2048,2048,1024', 'nhead': '8,8,8,8,8', 'encoder_dims': '384,384,384,384,384', 'attention_dims': '192,192,192,192,192', 'encoder_unmasked_dims': '256,256,256,256,256', 'zipformer_downsampling_factors': '1,2,4,8,2', 'cnn_module_kernels': '31,31,31,31,31', 'decoder_dim': 512, 'joiner_dim': 512, 'short_chunk_size': 50, 'num_left_chunks': 4, 'decode_chunk_len': 64, 'manifest_dir': PosixPath('data/fbank'), 'max_duration': 200, 'bucketing_sampler': True, 'num_buckets': 30, 'concatenate_cuts': False, 'duration_factor': 1.0, 'gap': 1.0, 'on_the_fly_feats': False, 'shuffle': True, 'drop_last': True, 'return_cuts': True, 'num_workers': 2, 'enable_spec_aug': True, 'spec_aug_time_warp_factor': 80, 'enable_musan': True, 'input_strategy': 'PrecomputedFeatures', 'res_dir': PosixPath('pruned_transducer_stateless7_streaming/exp1/greedy_search'), 'suffix': 'epoch-29-avg-9-streaming-chunk-size-64-context-2-max-sym-per-frame-1-use-averaged-model', 'blank_id': 0, 'unk_id': 2, 'vocab_size': 500}
|
4 |
+
2023-04-03 17:20:40,954 INFO [decode.py:677] About to create model
|
5 |
+
2023-04-03 17:20:41,325 INFO [zipformer.py:405] At encoder stack 4, which has downsampling_factor=2, we will combine the outputs of layers 1 and 3, with downsampling_factors=2 and 8.
|
6 |
+
2023-04-03 17:20:41,332 INFO [decode.py:748] Calculating the averaged model over epoch range from 20 (excluded) to 29
|
7 |
+
2023-04-03 17:20:43,416 INFO [decode.py:782] Number of model parameters: 70369391
|
8 |
+
2023-04-03 17:20:43,416 INFO [commonvoice_fr.py:406] About to get test cuts
|
9 |
+
2023-04-03 17:20:46,076 INFO [decode.py:560] batch 0/?, cuts processed until now is 27
|
10 |
+
2023-04-03 17:20:56,934 INFO [decode.py:560] batch 50/?, cuts processed until now is 1548
|
11 |
+
2023-04-03 17:21:07,967 INFO [decode.py:560] batch 100/?, cuts processed until now is 3088
|
12 |
+
2023-04-03 17:21:11,846 INFO [zipformer.py:2441] attn_weights_entropy = tensor([2.1787, 1.8910, 2.4172, 1.6407, 2.1728, 2.3893, 1.7562, 2.5306],
|
13 |
+
device='cuda:0'), covar=tensor([0.1221, 0.2071, 0.1560, 0.2019, 0.0944, 0.1410, 0.2892, 0.0782],
|
14 |
+
device='cuda:0'), in_proj_covar=tensor([0.0188, 0.0202, 0.0188, 0.0186, 0.0170, 0.0210, 0.0213, 0.0194],
|
15 |
+
device='cuda:0'), out_proj_covar=tensor([0.0002, 0.0002, 0.0002, 0.0002, 0.0002, 0.0002, 0.0002, 0.0002],
|
16 |
+
device='cuda:0')
|
17 |
+
2023-04-03 17:21:19,943 INFO [decode.py:560] batch 150/?, cuts processed until now is 4693
|
18 |
+
2023-04-03 17:21:24,480 INFO [zipformer.py:2441] attn_weights_entropy = tensor([2.9368, 4.1410, 3.9511, 2.1512, 4.1722, 3.3482, 1.1814, 3.0681],
|
19 |
+
device='cuda:0'), covar=tensor([0.1757, 0.1534, 0.1609, 0.2749, 0.0958, 0.0773, 0.3330, 0.1192],
|
20 |
+
device='cuda:0'), in_proj_covar=tensor([0.0149, 0.0177, 0.0157, 0.0127, 0.0159, 0.0121, 0.0146, 0.0122],
|
21 |
+
device='cuda:0'), out_proj_covar=tensor([0.0003, 0.0003, 0.0003, 0.0002, 0.0003, 0.0002, 0.0003, 0.0002],
|
22 |
+
device='cuda:0')
|
23 |
+
2023-04-03 17:21:30,520 INFO [decode.py:560] batch 200/?, cuts processed until now is 6301
|
24 |
+
2023-04-03 17:21:38,318 INFO [zipformer.py:2441] attn_weights_entropy = tensor([2.2529, 2.0297, 1.8643, 2.1288, 1.9791, 1.9835, 1.9948, 2.7326],
|
25 |
+
device='cuda:0'), covar=tensor([0.3832, 0.5126, 0.3710, 0.4009, 0.4766, 0.2687, 0.4275, 0.1859],
|
26 |
+
device='cuda:0'), in_proj_covar=tensor([0.0286, 0.0261, 0.0233, 0.0273, 0.0255, 0.0225, 0.0254, 0.0234],
|
27 |
+
device='cuda:0'), out_proj_covar=tensor([0.0002, 0.0002, 0.0002, 0.0002, 0.0002, 0.0002, 0.0002, 0.0002],
|
28 |
+
device='cuda:0')
|
29 |
+
2023-04-03 17:21:41,318 INFO [decode.py:560] batch 250/?, cuts processed until now is 7825
|
30 |
+
2023-04-03 17:21:45,845 INFO [zipformer.py:2441] attn_weights_entropy = tensor([1.8310, 1.7597, 1.6839, 1.8309, 1.2796, 3.4939, 1.4962, 1.9513],
|
31 |
+
device='cuda:0'), covar=tensor([0.3100, 0.2143, 0.1930, 0.2158, 0.1619, 0.0206, 0.2399, 0.1100],
|
32 |
+
device='cuda:0'), in_proj_covar=tensor([0.0130, 0.0115, 0.0120, 0.0123, 0.0112, 0.0094, 0.0093, 0.0093],
|
33 |
+
device='cuda:0'), out_proj_covar=tensor([0.0006, 0.0005, 0.0005, 0.0006, 0.0005, 0.0004, 0.0005, 0.0004],
|
34 |
+
device='cuda:0')
|
35 |
+
2023-04-03 17:21:51,496 INFO [decode.py:560] batch 300/?, cuts processed until now is 9574
|
36 |
+
2023-04-03 17:21:53,631 INFO [zipformer.py:2441] attn_weights_entropy = tensor([1.9710, 1.4775, 2.1112, 2.0248, 1.8644, 1.8023, 1.9569, 1.9764],
|
37 |
+
device='cuda:0'), covar=tensor([0.4725, 0.4362, 0.3711, 0.4021, 0.5538, 0.4317, 0.5152, 0.3214],
|
38 |
+
device='cuda:0'), in_proj_covar=tensor([0.0260, 0.0243, 0.0263, 0.0289, 0.0289, 0.0265, 0.0295, 0.0247],
|
39 |
+
device='cuda:0'), out_proj_covar=tensor([0.0002, 0.0002, 0.0002, 0.0002, 0.0002, 0.0002, 0.0002, 0.0002],
|
40 |
+
device='cuda:0')
|
41 |
+
2023-04-03 17:22:02,103 INFO [decode.py:560] batch 350/?, cuts processed until now is 11145
|
42 |
+
2023-04-03 17:22:12,758 INFO [decode.py:560] batch 400/?, cuts processed until now is 12706
|
43 |
+
2023-04-03 17:22:23,693 INFO [decode.py:560] batch 450/?, cuts processed until now is 14224
|
44 |
+
2023-04-03 17:22:34,416 INFO [decode.py:560] batch 500/?, cuts processed until now is 15743
|
45 |
+
2023-04-03 17:22:35,701 INFO [decode.py:576] The transcripts are stored in pruned_transducer_stateless7_streaming/exp1/greedy_search/recogs-test-cv-greedy_search-epoch-29-avg-9-streaming-chunk-size-64-context-2-max-sym-per-frame-1-use-averaged-model.txt
|
46 |
+
2023-04-03 17:22:35,945 INFO [utils.py:558] [test-cv-greedy_search] %WER 10.57% [16585 / 156915, 1231 ins, 1791 del, 13563 sub ]
|
47 |
+
2023-04-03 17:22:36,536 INFO [decode.py:589] Wrote detailed error stats to pruned_transducer_stateless7_streaming/exp1/greedy_search/errs-test-cv-greedy_search-epoch-29-avg-9-streaming-chunk-size-64-context-2-max-sym-per-frame-1-use-averaged-model.txt
|
48 |
+
2023-04-03 17:22:36,536 INFO [decode.py:609]
|
49 |
+
For test-cv, WER of different settings are:
|
50 |
+
greedy_search 10.57 best for test-cv
|
51 |
+
|
52 |
+
2023-04-03 17:22:36,537 INFO [decode.py:808] Done!
|
decoding_results/greedy_search/recogs-test-cv-greedy_search-epoch-29-avg-9-streaming-chunk-size-64-context-2-max-sym-per-frame-1-use-averaged-model.txt
ADDED
The diff for this file is too large to render.
See raw diff
|
|
decoding_results/greedy_search/wer-summary-test-cv-greedy_search.txt
ADDED
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
1 |
+
settings WER
|
2 |
+
greedy_search 10.57
|
decoding_results/modified_beam_search/errs-test-cv-beam_size_4-epoch-29-avg-9-streaming-chunk-size-64-modified_beam_search-beam-size-4-use-averaged-model.txt
ADDED
The diff for this file is too large to render.
See raw diff
|
|
decoding_results/modified_beam_search/log-decode-epoch-29-avg-9-streaming-chunk-size-64-modified_beam_search-beam-size-4-use-averaged-model-2023-04-03-17-22-38
ADDED
@@ -0,0 +1,82 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
2023-04-03 17:22:38,516 INFO [decode.py:659] Decoding started
|
2 |
+
2023-04-03 17:22:38,516 INFO [decode.py:665] Device: cuda:0
|
3 |
+
2023-04-03 17:22:38,518 INFO [decode.py:675] {'best_train_loss': inf, 'best_valid_loss': inf, 'best_train_epoch': -1, 'best_valid_epoch': -1, 'batch_idx_train': 0, 'log_interval': 50, 'reset_interval': 200, 'valid_interval': 3000, 'feature_dim': 80, 'subsampling_factor': 4, 'warm_step': 2000, 'env_info': {'k2-version': '1.23.4', 'k2-build-type': 'Release', 'k2-with-cuda': True, 'k2-git-sha1': '62e404dd3f3a811d73e424199b3408e309c06e1a', 'k2-git-date': 'Mon Jan 30 02:26:16 2023', 'lhotse-version': '1.12.0.dev+git.3ccfeb7.clean', 'torch-version': '1.13.0', 'torch-cuda-available': True, 'torch-cuda-version': '11.7', 'python-version': '3.8', 'icefall-git-branch': 'master', 'icefall-git-sha1': 'd74822d-dirty', 'icefall-git-date': 'Tue Mar 21 21:35:32 2023', 'icefall-path': '/home/lishaojie/icefall', 'k2-path': '/home/lishaojie/.conda/envs/env_lishaojie/lib/python3.8/site-packages/k2/__init__.py', 'lhotse-path': '/home/lishaojie/.conda/envs/env_lishaojie/lib/python3.8/site-packages/lhotse/__init__.py', 'hostname': 'cnc533', 'IP address': '127.0.1.1'}, 'epoch': 29, 'iter': 0, 'avg': 9, 'use_averaged_model': True, 'exp_dir': PosixPath('pruned_transducer_stateless7_streaming/exp1'), 'bpe_model': 'data/lang_bpe_500/bpe.model', 'lang_dir': PosixPath('data/lang_bpe_500'), 'decoding_method': 'modified_beam_search', 'beam_size': 4, 'beam': 20.0, 'ngram_lm_scale': 0.01, 'max_contexts': 8, 'max_states': 64, 'context_size': 2, 'max_sym_per_frame': 1, 'num_paths': 200, 'nbest_scale': 0.5, 'num_encoder_layers': '2,4,3,2,4', 'feedforward_dims': '1024,1024,2048,2048,1024', 'nhead': '8,8,8,8,8', 'encoder_dims': '384,384,384,384,384', 'attention_dims': '192,192,192,192,192', 'encoder_unmasked_dims': '256,256,256,256,256', 'zipformer_downsampling_factors': '1,2,4,8,2', 'cnn_module_kernels': '31,31,31,31,31', 'decoder_dim': 512, 'joiner_dim': 512, 'short_chunk_size': 50, 'num_left_chunks': 4, 'decode_chunk_len': 64, 'manifest_dir': PosixPath('data/fbank'), 'max_duration': 200, 'bucketing_sampler': True, 'num_buckets': 30, 'concatenate_cuts': False, 'duration_factor': 1.0, 'gap': 1.0, 'on_the_fly_feats': False, 'shuffle': True, 'drop_last': True, 'return_cuts': True, 'num_workers': 2, 'enable_spec_aug': True, 'spec_aug_time_warp_factor': 80, 'enable_musan': True, 'input_strategy': 'PrecomputedFeatures', 'res_dir': PosixPath('pruned_transducer_stateless7_streaming/exp1/modified_beam_search'), 'suffix': 'epoch-29-avg-9-streaming-chunk-size-64-modified_beam_search-beam-size-4-use-averaged-model', 'blank_id': 0, 'unk_id': 2, 'vocab_size': 500}
|
4 |
+
2023-04-03 17:22:38,519 INFO [decode.py:677] About to create model
|
5 |
+
2023-04-03 17:22:38,918 INFO [zipformer.py:405] At encoder stack 4, which has downsampling_factor=2, we will combine the outputs of layers 1 and 3, with downsampling_factors=2 and 8.
|
6 |
+
2023-04-03 17:22:38,925 INFO [decode.py:748] Calculating the averaged model over epoch range from 20 (excluded) to 29
|
7 |
+
2023-04-03 17:22:40,997 INFO [decode.py:782] Number of model parameters: 70369391
|
8 |
+
2023-04-03 17:22:40,997 INFO [commonvoice_fr.py:406] About to get test cuts
|
9 |
+
2023-04-03 17:22:44,389 INFO [decode.py:560] batch 0/?, cuts processed until now is 27
|
10 |
+
2023-04-03 17:23:03,653 INFO [zipformer.py:2441] attn_weights_entropy = tensor([2.4396, 2.2447, 2.4559, 1.5968, 2.3244, 2.4744, 2.4945, 1.9745],
|
11 |
+
device='cuda:0'), covar=tensor([0.0486, 0.0604, 0.0566, 0.0803, 0.0805, 0.0564, 0.0533, 0.1100],
|
12 |
+
device='cuda:0'), in_proj_covar=tensor([0.0128, 0.0133, 0.0136, 0.0116, 0.0123, 0.0135, 0.0136, 0.0158],
|
13 |
+
device='cuda:0'), out_proj_covar=tensor([0.0002, 0.0002, 0.0002, 0.0001, 0.0002, 0.0002, 0.0002, 0.0002],
|
14 |
+
device='cuda:0')
|
15 |
+
2023-04-03 17:23:05,745 INFO [decode.py:560] batch 20/?, cuts processed until now is 604
|
16 |
+
2023-04-03 17:23:23,592 INFO [zipformer.py:2441] attn_weights_entropy = tensor([2.0329, 1.8940, 1.7735, 2.1519, 2.5543, 2.1581, 1.8200, 1.6895],
|
17 |
+
device='cuda:0'), covar=tensor([0.2199, 0.2101, 0.1994, 0.1657, 0.1436, 0.1120, 0.2161, 0.2030],
|
18 |
+
device='cuda:0'), in_proj_covar=tensor([0.0242, 0.0208, 0.0212, 0.0195, 0.0242, 0.0187, 0.0214, 0.0202],
|
19 |
+
device='cuda:0'), out_proj_covar=tensor([0.0002, 0.0002, 0.0002, 0.0002, 0.0002, 0.0002, 0.0002, 0.0002],
|
20 |
+
device='cuda:0')
|
21 |
+
2023-04-03 17:23:26,475 INFO [decode.py:560] batch 40/?, cuts processed until now is 1209
|
22 |
+
2023-04-03 17:23:41,645 INFO [zipformer.py:2441] attn_weights_entropy = tensor([2.7772, 2.7137, 2.1413, 1.0557, 2.3674, 2.2655, 2.0341, 2.4959],
|
23 |
+
device='cuda:0'), covar=tensor([0.0948, 0.0634, 0.1572, 0.1977, 0.1178, 0.2220, 0.2033, 0.0825],
|
24 |
+
device='cuda:0'), in_proj_covar=tensor([0.0167, 0.0187, 0.0196, 0.0178, 0.0206, 0.0207, 0.0220, 0.0192],
|
25 |
+
device='cuda:0'), out_proj_covar=tensor([0.0002, 0.0002, 0.0002, 0.0002, 0.0002, 0.0002, 0.0002, 0.0002],
|
26 |
+
device='cuda:0')
|
27 |
+
2023-04-03 17:23:46,667 INFO [decode.py:560] batch 60/?, cuts processed until now is 1866
|
28 |
+
2023-04-03 17:24:07,713 INFO [decode.py:560] batch 80/?, cuts processed until now is 2422
|
29 |
+
2023-04-03 17:24:10,930 INFO [zipformer.py:2441] attn_weights_entropy = tensor([1.5944, 1.6332, 1.4009, 1.6980, 2.0331, 1.9103, 1.6203, 1.4564],
|
30 |
+
device='cuda:0'), covar=tensor([0.0370, 0.0328, 0.0636, 0.0290, 0.0200, 0.0398, 0.0360, 0.0438],
|
31 |
+
device='cuda:0'), in_proj_covar=tensor([0.0097, 0.0103, 0.0143, 0.0108, 0.0097, 0.0111, 0.0100, 0.0110],
|
32 |
+
device='cuda:0'), out_proj_covar=tensor([7.4944e-05, 7.9098e-05, 1.1173e-04, 8.2734e-05, 7.5248e-05, 8.1783e-05,
|
33 |
+
7.3728e-05, 8.3511e-05], device='cuda:0')
|
34 |
+
2023-04-03 17:24:19,887 INFO [zipformer.py:2441] attn_weights_entropy = tensor([2.5455, 2.5531, 2.0976, 0.9939, 2.3244, 2.0391, 1.9380, 2.4056],
|
35 |
+
device='cuda:0'), covar=tensor([0.0949, 0.0639, 0.1516, 0.2032, 0.1282, 0.2361, 0.2408, 0.0816],
|
36 |
+
device='cuda:0'), in_proj_covar=tensor([0.0167, 0.0187, 0.0196, 0.0178, 0.0206, 0.0207, 0.0220, 0.0192],
|
37 |
+
device='cuda:0'), out_proj_covar=tensor([0.0002, 0.0002, 0.0002, 0.0002, 0.0002, 0.0002, 0.0002, 0.0002],
|
38 |
+
device='cuda:0')
|
39 |
+
2023-04-03 17:24:27,881 INFO [decode.py:560] batch 100/?, cuts processed until now is 3088
|
40 |
+
2023-04-03 17:24:48,430 INFO [decode.py:560] batch 120/?, cuts processed until now is 3672
|
41 |
+
2023-04-03 17:25:08,496 INFO [decode.py:560] batch 140/?, cuts processed until now is 4348
|
42 |
+
2023-04-03 17:25:28,540 INFO [decode.py:560] batch 160/?, cuts processed until now is 5035
|
43 |
+
2023-04-03 17:25:40,611 INFO [zipformer.py:2441] attn_weights_entropy = tensor([2.4775, 2.3269, 2.5159, 1.6390, 2.4735, 2.5963, 2.5525, 2.0379],
|
44 |
+
device='cuda:0'), covar=tensor([0.0466, 0.0579, 0.0534, 0.0716, 0.0834, 0.0513, 0.0447, 0.1035],
|
45 |
+
device='cuda:0'), in_proj_covar=tensor([0.0128, 0.0133, 0.0136, 0.0116, 0.0123, 0.0135, 0.0136, 0.0158],
|
46 |
+
device='cuda:0'), out_proj_covar=tensor([0.0002, 0.0002, 0.0002, 0.0001, 0.0002, 0.0002, 0.0002, 0.0002],
|
47 |
+
device='cuda:0')
|
48 |
+
2023-04-03 17:25:48,922 INFO [decode.py:560] batch 180/?, cuts processed until now is 5674
|
49 |
+
2023-04-03 17:26:09,282 INFO [decode.py:560] batch 200/?, cuts processed until now is 6301
|
50 |
+
2023-04-03 17:26:29,657 INFO [decode.py:560] batch 220/?, cuts processed until now is 6914
|
51 |
+
2023-04-03 17:26:50,151 INFO [decode.py:560] batch 240/?, cuts processed until now is 7540
|
52 |
+
2023-04-03 17:27:10,601 INFO [decode.py:560] batch 260/?, cuts processed until now is 8161
|
53 |
+
2023-04-03 17:27:30,848 INFO [decode.py:560] batch 280/?, cuts processed until now is 8857
|
54 |
+
2023-04-03 17:27:50,769 INFO [decode.py:560] batch 300/?, cuts processed until now is 9574
|
55 |
+
2023-04-03 17:28:11,398 INFO [decode.py:560] batch 320/?, cuts processed until now is 10169
|
56 |
+
2023-04-03 17:28:25,548 INFO [zipformer.py:2441] attn_weights_entropy = tensor([1.8605, 1.7340, 2.4232, 3.4970, 2.3920, 2.5277, 1.1464, 2.9314],
|
57 |
+
device='cuda:0'), covar=tensor([0.1522, 0.1223, 0.1074, 0.0445, 0.0713, 0.1245, 0.1648, 0.0431],
|
58 |
+
device='cuda:0'), in_proj_covar=tensor([0.0097, 0.0114, 0.0131, 0.0162, 0.0098, 0.0133, 0.0122, 0.0098],
|
59 |
+
device='cuda:0'), out_proj_covar=tensor([0.0003, 0.0003, 0.0004, 0.0004, 0.0003, 0.0004, 0.0003, 0.0003],
|
60 |
+
device='cuda:0')
|
61 |
+
2023-04-03 17:28:31,696 INFO [decode.py:560] batch 340/?, cuts processed until now is 10810
|
62 |
+
2023-04-03 17:28:52,180 INFO [decode.py:560] batch 360/?, cuts processed until now is 11452
|
63 |
+
2023-04-03 17:29:12,247 INFO [decode.py:560] batch 380/?, cuts processed until now is 12133
|
64 |
+
2023-04-03 17:29:33,066 INFO [decode.py:560] batch 400/?, cuts processed until now is 12706
|
65 |
+
2023-04-03 17:29:53,675 INFO [decode.py:560] batch 420/?, cuts processed until now is 13299
|
66 |
+
2023-04-03 17:30:07,997 INFO [zipformer.py:2441] attn_weights_entropy = tensor([1.8529, 1.7486, 1.5706, 1.8211, 2.1766, 2.0610, 1.7289, 1.5829],
|
67 |
+
device='cuda:0'), covar=tensor([0.0370, 0.0322, 0.0575, 0.0276, 0.0210, 0.0417, 0.0338, 0.0404],
|
68 |
+
device='cuda:0'), in_proj_covar=tensor([0.0097, 0.0103, 0.0143, 0.0108, 0.0097, 0.0111, 0.0100, 0.0110],
|
69 |
+
device='cuda:0'), out_proj_covar=tensor([7.4944e-05, 7.9098e-05, 1.1173e-04, 8.2734e-05, 7.5248e-05, 8.1783e-05,
|
70 |
+
7.3728e-05, 8.3511e-05], device='cuda:0')
|
71 |
+
2023-04-03 17:30:13,957 INFO [decode.py:560] batch 440/?, cuts processed until now is 13891
|
72 |
+
2023-04-03 17:30:34,408 INFO [decode.py:560] batch 460/?, cuts processed until now is 14515
|
73 |
+
2023-04-03 17:30:54,607 INFO [decode.py:560] batch 480/?, cuts processed until now is 15158
|
74 |
+
2023-04-03 17:31:14,650 INFO [decode.py:560] batch 500/?, cuts processed until now is 15743
|
75 |
+
2023-04-03 17:31:18,578 INFO [decode.py:576] The transcripts are stored in pruned_transducer_stateless7_streaming/exp1/modified_beam_search/recogs-test-cv-beam_size_4-epoch-29-avg-9-streaming-chunk-size-64-modified_beam_search-beam-size-4-use-averaged-model.txt
|
76 |
+
2023-04-03 17:31:18,889 INFO [utils.py:558] [test-cv-beam_size_4] %WER 10.19% [15988 / 156915, 1250 ins, 1549 del, 13189 sub ]
|
77 |
+
2023-04-03 17:31:19,408 INFO [decode.py:589] Wrote detailed error stats to pruned_transducer_stateless7_streaming/exp1/modified_beam_search/errs-test-cv-beam_size_4-epoch-29-avg-9-streaming-chunk-size-64-modified_beam_search-beam-size-4-use-averaged-model.txt
|
78 |
+
2023-04-03 17:31:19,408 INFO [decode.py:609]
|
79 |
+
For test-cv, WER of different settings are:
|
80 |
+
beam_size_4 10.19 best for test-cv
|
81 |
+
|
82 |
+
2023-04-03 17:31:19,408 INFO [decode.py:808] Done!
|
decoding_results/modified_beam_search/recogs-test-cv-beam_size_4-epoch-29-avg-9-streaming-chunk-size-64-modified_beam_search-beam-size-4-use-averaged-model.txt
ADDED
The diff for this file is too large to render.
See raw diff
|
|
decoding_results/modified_beam_search/wer-summary-test-cv-beam_size_4.txt
ADDED
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
1 |
+
settings WER
|
2 |
+
beam_size_4 10.19
|