chatthai commited on
Commit
e63b020
1 Parent(s): fc063e2

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +16 -12
app.py CHANGED
@@ -34,13 +34,14 @@ def model_(n_gram = 21):
34
  input1 = Input(shape=(21,),dtype='float32',name = 'char_input')
35
  input2 = Input(shape=(21,),dtype='float32',name = 'type_input')
36
 
37
- a = Embedding(180, 32,input_length=21)(input1)
38
- a = SpatialDropout1D(0.1)(a)
39
- a = TimestepDropout(0.05)(a)
40
  char_input = BatchNormalization()(a)
41
 
42
  a_concat = []
43
- filters = [[1,200],[2,200],[3,200],[4,200],[5,200],[6,200],[7,200],[8,200],[9,150],[10,150],[11,150],[12,100]]
 
44
 
45
  for (window_size, filters_size) in filters:
46
  convs = Conv1D(filters=filters_size, kernel_size=window_size, strides=1)(char_input)
@@ -49,19 +50,22 @@ def model_(n_gram = 21):
49
  convs = ZeroPadding1D(padding=(0, window_size-1))(convs)
50
  a_concat.append(convs)
51
  token_max = Maximum()(a_concat)
52
- lstm_char = Bidirectional(LSTM(100 ,return_sequences=True))(char_input)
 
 
 
53
 
54
  b = Embedding(12, 12, input_length=21)(input2)
55
- b = SpatialDropout1D(0.1)(b)
56
- type_inputs = TimestepDropout(0.05)(b)
57
 
58
- x = Concatenate()([lstm_char, type_inputs, char_input, token_max])
59
  x = BatchNormalization()(x)
60
-
61
  x = Flatten()(x)
62
- x = Dense(200, activation='elu')(x)
63
  x = Dropout(0.2)(x)
64
- out = Dense(1, activation='sigmoid',dtype = 'float32')(x)
65
 
66
 
67
  model = Model(inputs=[input1, input2], outputs=out)
@@ -101,7 +105,7 @@ def tokenize(text):
101
  word_end = []
102
 
103
  y_predict = model.predict([x_char, x_type], batch_size = 512)
104
- y_predict = (y_predict.ravel() > 0.4609375).astype(int)
105
  word_end = y_predict[1:].tolist() + [1]
106
 
107
  tokens = []
 
34
  input1 = Input(shape=(21,),dtype='float32',name = 'char_input')
35
  input2 = Input(shape=(21,),dtype='float32',name = 'type_input')
36
 
37
+ a = Embedding(178, 32,input_length=21)(input1)
38
+ a = SpatialDropout1D(0.15)(a)
39
+ #a = TimestepDropout(0.05)(a)
40
  char_input = BatchNormalization()(a)
41
 
42
  a_concat = []
43
+ filters = [[1,200],[2,200],[3,200],[4,200],[5,200],[6,200],[8,200],[11,150],[12,100]]
44
+ #filters = [[1,200],[2,200],[3,200],[4,200],[5,200],[6,200],[7,200],[8,200],[9,150],[10,150],[11,150],[12,100]]
45
 
46
  for (window_size, filters_size) in filters:
47
  convs = Conv1D(filters=filters_size, kernel_size=window_size, strides=1)(char_input)
 
50
  convs = ZeroPadding1D(padding=(0, window_size-1))(convs)
51
  a_concat.append(convs)
52
  token_max = Maximum()(a_concat)
53
+ lstm_char = Bidirectional(LSTM(128 ,return_sequences=True,kernel_regularizer=regularizers.L2(0.0000001),bias_regularizer=regularizers.L2(0.0000001)))(char_input)
54
+ lstm_char = Dense(64, activation='elu')(lstm_char)
55
+ #lstm_char = Bidirectional(LSTM(64 ,return_sequences=True))(lstm_char)
56
+ #lstm_char = Attention(return_sequences=True)(lstm_char)
57
 
58
  b = Embedding(12, 12, input_length=21)(input2)
59
+ type_inputs = SpatialDropout1D(0.15)(b)
60
+ #type_inputs = TimestepDropout(0.05)(b)
61
 
62
+ x = Concatenate()([type_inputs, char_input, lstm_char, token_max])
63
  x = BatchNormalization()(x)
64
+
65
  x = Flatten()(x)
66
+ x = Dense(100, activation='elu')(x)
67
  x = Dropout(0.2)(x)
68
+ out = Dense(1, activation='sigmoid',dtype = 'float32',kernel_regularizer=regularizers.L2(0.01),bias_regularizer=regularizers.L2(0.01))(x)
69
 
70
 
71
  model = Model(inputs=[input1, input2], outputs=out)
 
105
  word_end = []
106
 
107
  y_predict = model.predict([x_char, x_type], batch_size = 512)
108
+ y_predict = (y_predict.ravel() > 0.46542968749999997).astype(int)
109
  word_end = y_predict[1:].tolist() + [1]
110
 
111
  tokens = []