Preechanon Chatthai
commited on
Commit
•
78a3063
1
Parent(s):
8af9c20
Update app.py
Browse files
app.py
CHANGED
@@ -28,17 +28,14 @@ class TimestepDropout(Dropout):
|
|
28 |
noise_shape = (input_shape[0], input_shape[1], 1)
|
29 |
return noise_shape
|
30 |
|
31 |
-
|
32 |
def model_(n_gram = 21):
|
33 |
|
34 |
input1 = Input(shape=(21,),dtype='float32',name = 'char_input')
|
35 |
input2 = Input(shape=(21,),dtype='float32',name = 'type_input')
|
36 |
-
|
37 |
a = Embedding(178, 32,input_length=21)(input1)
|
38 |
a = SpatialDropout1D(0.15)(a)
|
39 |
#a = TimestepDropout(0.05)(a)
|
40 |
char_input = BatchNormalization()(a)
|
41 |
-
|
42 |
a_concat = []
|
43 |
filters = [[1,200],[2,200],[3,200],[4,200],[5,200],[6,200],[8,200],[11,150],[12,100]]
|
44 |
#filters = [[1,200],[2,200],[3,200],[4,200],[5,200],[6,200],[7,200],[8,200],[9,150],[10,150],[11,150],[12,100]]
|
@@ -58,18 +55,13 @@ def model_(n_gram = 21):
|
|
58 |
b = Embedding(12, 12, input_length=21)(input2)
|
59 |
type_inputs = SpatialDropout1D(0.15)(b)
|
60 |
#type_inputs = TimestepDropout(0.05)(b)
|
61 |
-
|
62 |
x = Concatenate()([type_inputs, char_input, lstm_char, token_max])
|
63 |
x = BatchNormalization()(x)
|
64 |
-
|
65 |
x = Flatten()(x)
|
66 |
x = Dense(100, activation='elu')(x)
|
67 |
x = Dropout(0.2)(x)
|
68 |
out = Dense(1, activation='sigmoid',dtype = 'float32',kernel_regularizer=regularizers.L2(0.01),bias_regularizer=regularizers.L2(0.01))(x)
|
69 |
-
|
70 |
-
|
71 |
model = Model(inputs=[input1, input2], outputs=out)
|
72 |
-
|
73 |
return model
|
74 |
|
75 |
|
@@ -91,23 +83,18 @@ def create_feature_array(text, n_pad=21):
|
|
91 |
x_char = np.array(x_char).astype(float)
|
92 |
x_type = np.array(x_type).astype(float)
|
93 |
return x_char, x_type
|
94 |
-
|
95 |
def tokenize(text):
|
|
|
96 |
n_pad = 21
|
97 |
-
|
98 |
if not text:
|
99 |
return ['']
|
100 |
-
|
101 |
if isinstance(text, str) and sys.version_info.major == 2:
|
102 |
text = text.decode('utf-8')
|
103 |
-
|
104 |
x_char, x_type = create_feature_array(text, n_pad=n_pad)
|
105 |
word_end = []
|
106 |
-
|
107 |
y_predict = model.predict([x_char, x_type], batch_size = 512)
|
108 |
y_predict = (y_predict.ravel() > 0.46542968749999997).astype(int)
|
109 |
word_end = y_predict[1:].tolist() + [1]
|
110 |
-
|
111 |
tokens = []
|
112 |
word = ''
|
113 |
for char, w_e in zip(text, word_end):
|
@@ -117,7 +104,6 @@ def tokenize(text):
|
|
117 |
word = ''
|
118 |
return tokens
|
119 |
|
120 |
-
|
121 |
model = model_()
|
122 |
model.load_weights("cutto_tf2.h5")
|
123 |
|
|
|
28 |
noise_shape = (input_shape[0], input_shape[1], 1)
|
29 |
return noise_shape
|
30 |
|
|
|
31 |
def model_(n_gram = 21):
|
32 |
|
33 |
input1 = Input(shape=(21,),dtype='float32',name = 'char_input')
|
34 |
input2 = Input(shape=(21,),dtype='float32',name = 'type_input')
|
|
|
35 |
a = Embedding(178, 32,input_length=21)(input1)
|
36 |
a = SpatialDropout1D(0.15)(a)
|
37 |
#a = TimestepDropout(0.05)(a)
|
38 |
char_input = BatchNormalization()(a)
|
|
|
39 |
a_concat = []
|
40 |
filters = [[1,200],[2,200],[3,200],[4,200],[5,200],[6,200],[8,200],[11,150],[12,100]]
|
41 |
#filters = [[1,200],[2,200],[3,200],[4,200],[5,200],[6,200],[7,200],[8,200],[9,150],[10,150],[11,150],[12,100]]
|
|
|
55 |
b = Embedding(12, 12, input_length=21)(input2)
|
56 |
type_inputs = SpatialDropout1D(0.15)(b)
|
57 |
#type_inputs = TimestepDropout(0.05)(b)
|
|
|
58 |
x = Concatenate()([type_inputs, char_input, lstm_char, token_max])
|
59 |
x = BatchNormalization()(x)
|
|
|
60 |
x = Flatten()(x)
|
61 |
x = Dense(100, activation='elu')(x)
|
62 |
x = Dropout(0.2)(x)
|
63 |
out = Dense(1, activation='sigmoid',dtype = 'float32',kernel_regularizer=regularizers.L2(0.01),bias_regularizer=regularizers.L2(0.01))(x)
|
|
|
|
|
64 |
model = Model(inputs=[input1, input2], outputs=out)
|
|
|
65 |
return model
|
66 |
|
67 |
|
|
|
83 |
x_char = np.array(x_char).astype(float)
|
84 |
x_type = np.array(x_type).astype(float)
|
85 |
return x_char, x_type
|
|
|
86 |
def tokenize(text):
|
87 |
+
|
88 |
n_pad = 21
|
|
|
89 |
if not text:
|
90 |
return ['']
|
|
|
91 |
if isinstance(text, str) and sys.version_info.major == 2:
|
92 |
text = text.decode('utf-8')
|
|
|
93 |
x_char, x_type = create_feature_array(text, n_pad=n_pad)
|
94 |
word_end = []
|
|
|
95 |
y_predict = model.predict([x_char, x_type], batch_size = 512)
|
96 |
y_predict = (y_predict.ravel() > 0.46542968749999997).astype(int)
|
97 |
word_end = y_predict[1:].tolist() + [1]
|
|
|
98 |
tokens = []
|
99 |
word = ''
|
100 |
for char, w_e in zip(text, word_end):
|
|
|
104 |
word = ''
|
105 |
return tokens
|
106 |
|
|
|
107 |
model = model_()
|
108 |
model.load_weights("cutto_tf2.h5")
|
109 |
|