initial commit
Browse files- decoder_jit_trace-pnnx.ncnn.bin +3 -0
- decoder_jit_trace-pnnx.ncnn.param +8 -0
- encoder_jit_trace-pnnx.ncnn.bin +3 -0
- encoder_jit_trace-pnnx.ncnn.param +0 -0
- export-ncnn.sh +60 -0
- joiner_jit_trace-pnnx.ncnn.bin +3 -0
- joiner_jit_trace-pnnx.ncnn.param +9 -0
- test_wavs_zh/0.wav +0 -0
- test_wavs_zh/1.wav +0 -0
- test_wavs_zh/2.wav +0 -0
decoder_jit_trace-pnnx.ncnn.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:26ff4fa3e415f54a7ee6b4360873c25db764f68ca010139864008ab0391e0192
|
3 |
+
size 3548808
|
decoder_jit_trace-pnnx.ncnn.param
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
7767517
|
2 |
+
6 6
|
3 |
+
Input in0 0 1 in0
|
4 |
+
Embed embed_1 1 1 in0 1 0=320 1=5537 2=0 3=1771840
|
5 |
+
Permute permute_2 1 1 1 2 0=1
|
6 |
+
ConvolutionDepthWise1D convdw1d_4 1 1 2 3 0=320 1=2 2=1 3=1 4=0 5=0 6=2560 7=80
|
7 |
+
Permute permute_3 1 1 3 4 0=1
|
8 |
+
ReLU relu_0 1 1 4 out0
|
encoder_jit_trace-pnnx.ncnn.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4f951dfe1fbee93edaf40ae857b1fb164424316ae702393dd1491efcb2ce2590
|
3 |
+
size 17150324
|
encoder_jit_trace-pnnx.ncnn.param
ADDED
The diff for this file is too large to render.
See raw diff
|
|
export-ncnn.sh
ADDED
@@ -0,0 +1,60 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/usr/bin/env bash
|
2 |
+
|
3 |
+
# Here are the model hyper-parameters required for model exporting
|
4 |
+
|
5 |
+
exp_dir=pruned_transducer_stateless7_streaming/exp-14M
|
6 |
+
|
7 |
+
python ./pruned_transducer_stateless7_streaming/export-for-ncnn-zh.py \
|
8 |
+
--lang-dir data/lang_char \
|
9 |
+
--exp-dir $exp_dir \
|
10 |
+
--use-averaged-model True \
|
11 |
+
--iter 400000 \
|
12 |
+
--avg 8 \
|
13 |
+
--decode-chunk-len 32 \
|
14 |
+
--num-encoder-layers "2,3,2,2,3" \
|
15 |
+
--feedforward-dims "320,320,640,640,320" \
|
16 |
+
--nhead "4,4,4,4,4" \
|
17 |
+
--encoder-dims "160,160,160,160,160" \
|
18 |
+
--attention-dims "96,96,96,96,96" \
|
19 |
+
--encoder-unmasked-dims "128,128,128,128,128" \
|
20 |
+
--decoder-dim 320 \
|
21 |
+
--joiner-dim 320
|
22 |
+
|
23 |
+
pushd $exp_dir
|
24 |
+
|
25 |
+
pnnx encoder_jit_trace-pnnx.pt
|
26 |
+
pnnx decoder_jit_trace-pnnx.pt
|
27 |
+
pnnx joiner_jit_trace-pnnx.pt
|
28 |
+
|
29 |
+
popd
|
30 |
+
|
31 |
+
# modify encoder_jit_trace-pnnx.ncnn.param to support sherpa-ncnn
|
32 |
+
# The following is the diff
|
33 |
+
# --- encoder_jit_trace-pnnx.ncnn.param-before 2023-02-14 20:48:52.000000000 +0800
|
34 |
+
# +++ encoder_jit_trace-pnnx.ncnn.param 2023-02-14 20:50:15.000000000 +0800
|
35 |
+
# @@ -1,5 +1,6 @@
|
36 |
+
# 7767517
|
37 |
+
# -2028 2547
|
38 |
+
# +2029 2547
|
39 |
+
# +SherpaMetaData sherpa_meta_data1 0 0 0=2 1=32 2=4 3=7 -23316=5,2,4,3,2,4 -23317=5,384,384,384,384,384 -23318=5,192,192,192,192,192 -23319=5,1,2,4,8,2 -23320=5,31,31,31,31,31
|
40 |
+
# Input in0 0 1 in0
|
41 |
+
# Input in1 0 1 in1
|
42 |
+
# Split splitncnn_0 1 2 in1 2 3
|
43 |
+
#
|
44 |
+
#------
|
45 |
+
# Explanation:
|
46 |
+
#
|
47 |
+
# (1) 2028 is changed to 2029 as an extra layer SherpaMetaData is added
|
48 |
+
# (2) SherpaMetaData is the layer type
|
49 |
+
# (3) sherpa_meta_data1 is the name of this layer. Must be sherpa_meta_data1
|
50 |
+
# (4) 0 0 means this layer has no input or output
|
51 |
+
# (5) 1=32, attribute 1, 32 is the value of --decode-chunk-len
|
52 |
+
# (6) 2=4, attribute 2, 4 is the value of --num-left-chunks
|
53 |
+
# (7) 3=7, attribute 3, 7 is the pad length. The first subsampling layer is using (x_len - 7) // 2, so we use 7 here
|
54 |
+
# (8) -23316=5,2,4,3,2,4, attribute 16, this is an array attribute. It is attribute 16 since -23300 - (-23316) = 16
|
55 |
+
# the first element of the array is the length of the array, which is 5 in our case.
|
56 |
+
# 2,4,3,2,4 is the value of --num-encoder-layers
|
57 |
+
# (9) -23317=5,384,384,384,384,384, attribute 17. 384,384,384,384,384 is the value of --encoder-dims
|
58 |
+
# (10) -23318=5,192,192,192,192,192, attribute 18, 192,192,192,192,192 is the value of --attention-dims
|
59 |
+
# (11) -23319=5,1,2,4,8,2, attribute 19, 1,2,4,8,2 is the value of --zipformer-downsampling-factors
|
60 |
+
# (12) -23320=5,31,31,31,31,31, attribute 20, 31,31,31,31,31 is the value of --cnn-module-kernels
|
joiner_jit_trace-pnnx.ncnn.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d5209db16c9308e4b83134829f6de5ce993be5c340fd962b18c140f5e27c5c42
|
3 |
+
size 3875600
|
joiner_jit_trace-pnnx.ncnn.param
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
7767517
|
2 |
+
7 7
|
3 |
+
Input in0 0 1 in0
|
4 |
+
Input in1 0 1 in1
|
5 |
+
InnerProduct linear_2 1 1 in1 2 0=320 1=1 2=102400
|
6 |
+
InnerProduct linear_1 1 1 in0 3 0=320 1=1 2=51200
|
7 |
+
BinaryOp add_0 2 1 3 2 4 0=0
|
8 |
+
TanH tanh_0 1 1 4 5
|
9 |
+
InnerProduct linear_3 1 1 5 out0 0=5537 1=1 2=1771840
|
test_wavs_zh/0.wav
ADDED
Binary file (180 kB). View file
|
|
test_wavs_zh/1.wav
ADDED
Binary file (165 kB). View file
|
|
test_wavs_zh/2.wav
ADDED
Binary file (145 kB). View file
|
|