chatthai
commited on
Commit
•
e63b020
1
Parent(s):
fc063e2
Update app.py
Browse files
app.py
CHANGED
@@ -34,13 +34,14 @@ def model_(n_gram = 21):
|
|
34 |
input1 = Input(shape=(21,),dtype='float32',name = 'char_input')
|
35 |
input2 = Input(shape=(21,),dtype='float32',name = 'type_input')
|
36 |
|
37 |
-
a = Embedding(
|
38 |
-
a = SpatialDropout1D(0.
|
39 |
-
a = TimestepDropout(0.05)(a)
|
40 |
char_input = BatchNormalization()(a)
|
41 |
|
42 |
a_concat = []
|
43 |
-
filters = [[1,200],[2,200],[3,200],[4,200],[5,200],[6,200],[
|
|
|
44 |
|
45 |
for (window_size, filters_size) in filters:
|
46 |
convs = Conv1D(filters=filters_size, kernel_size=window_size, strides=1)(char_input)
|
@@ -49,19 +50,22 @@ def model_(n_gram = 21):
|
|
49 |
convs = ZeroPadding1D(padding=(0, window_size-1))(convs)
|
50 |
a_concat.append(convs)
|
51 |
token_max = Maximum()(a_concat)
|
52 |
-
lstm_char = Bidirectional(LSTM(
|
|
|
|
|
|
|
53 |
|
54 |
b = Embedding(12, 12, input_length=21)(input2)
|
55 |
-
|
56 |
-
type_inputs = TimestepDropout(0.05)(b)
|
57 |
|
58 |
-
x = Concatenate()([
|
59 |
x = BatchNormalization()(x)
|
60 |
-
|
61 |
x = Flatten()(x)
|
62 |
-
x = Dense(
|
63 |
x = Dropout(0.2)(x)
|
64 |
-
out = Dense(1, activation='sigmoid',dtype = 'float32')(x)
|
65 |
|
66 |
|
67 |
model = Model(inputs=[input1, input2], outputs=out)
|
@@ -101,7 +105,7 @@ def tokenize(text):
|
|
101 |
word_end = []
|
102 |
|
103 |
y_predict = model.predict([x_char, x_type], batch_size = 512)
|
104 |
-
y_predict = (y_predict.ravel() > 0.
|
105 |
word_end = y_predict[1:].tolist() + [1]
|
106 |
|
107 |
tokens = []
|
|
|
34 |
input1 = Input(shape=(21,),dtype='float32',name = 'char_input')
|
35 |
input2 = Input(shape=(21,),dtype='float32',name = 'type_input')
|
36 |
|
37 |
+
a = Embedding(178, 32,input_length=21)(input1)
|
38 |
+
a = SpatialDropout1D(0.15)(a)
|
39 |
+
#a = TimestepDropout(0.05)(a)
|
40 |
char_input = BatchNormalization()(a)
|
41 |
|
42 |
a_concat = []
|
43 |
+
filters = [[1,200],[2,200],[3,200],[4,200],[5,200],[6,200],[8,200],[11,150],[12,100]]
|
44 |
+
#filters = [[1,200],[2,200],[3,200],[4,200],[5,200],[6,200],[7,200],[8,200],[9,150],[10,150],[11,150],[12,100]]
|
45 |
|
46 |
for (window_size, filters_size) in filters:
|
47 |
convs = Conv1D(filters=filters_size, kernel_size=window_size, strides=1)(char_input)
|
|
|
50 |
convs = ZeroPadding1D(padding=(0, window_size-1))(convs)
|
51 |
a_concat.append(convs)
|
52 |
token_max = Maximum()(a_concat)
|
53 |
+
lstm_char = Bidirectional(LSTM(128 ,return_sequences=True,kernel_regularizer=regularizers.L2(0.0000001),bias_regularizer=regularizers.L2(0.0000001)))(char_input)
|
54 |
+
lstm_char = Dense(64, activation='elu')(lstm_char)
|
55 |
+
#lstm_char = Bidirectional(LSTM(64 ,return_sequences=True))(lstm_char)
|
56 |
+
#lstm_char = Attention(return_sequences=True)(lstm_char)
|
57 |
|
58 |
b = Embedding(12, 12, input_length=21)(input2)
|
59 |
+
type_inputs = SpatialDropout1D(0.15)(b)
|
60 |
+
#type_inputs = TimestepDropout(0.05)(b)
|
61 |
|
62 |
+
x = Concatenate()([type_inputs, char_input, lstm_char, token_max])
|
63 |
x = BatchNormalization()(x)
|
64 |
+
|
65 |
x = Flatten()(x)
|
66 |
+
x = Dense(100, activation='elu')(x)
|
67 |
x = Dropout(0.2)(x)
|
68 |
+
out = Dense(1, activation='sigmoid',dtype = 'float32',kernel_regularizer=regularizers.L2(0.01),bias_regularizer=regularizers.L2(0.01))(x)
|
69 |
|
70 |
|
71 |
model = Model(inputs=[input1, input2], outputs=out)
|
|
|
105 |
word_end = []
|
106 |
|
107 |
y_predict = model.predict([x_char, x_type], batch_size = 512)
|
108 |
+
y_predict = (y_predict.ravel() > 0.46542968749999997).astype(int)
|
109 |
word_end = y_predict[1:].tolist() + [1]
|
110 |
|
111 |
tokens = []
|