Martin Tomov
commited on
Commit
•
bb99784
1
Parent(s):
14b9899
Upload 2 files
Browse files
grounding-dino/grounding-dino_GroundingDINO_SwinB.cfg.py
ADDED
@@ -0,0 +1,43 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
batch_size = 1
|
2 |
+
modelname = "groundingdino"
|
3 |
+
backbone = "swin_B_384_22k"
|
4 |
+
position_embedding = "sine"
|
5 |
+
pe_temperatureH = 20
|
6 |
+
pe_temperatureW = 20
|
7 |
+
return_interm_indices = [1, 2, 3]
|
8 |
+
backbone_freeze_keywords = None
|
9 |
+
enc_layers = 6
|
10 |
+
dec_layers = 6
|
11 |
+
pre_norm = False
|
12 |
+
dim_feedforward = 2048
|
13 |
+
hidden_dim = 256
|
14 |
+
dropout = 0.0
|
15 |
+
nheads = 8
|
16 |
+
num_queries = 900
|
17 |
+
query_dim = 4
|
18 |
+
num_patterns = 0
|
19 |
+
num_feature_levels = 4
|
20 |
+
enc_n_points = 4
|
21 |
+
dec_n_points = 4
|
22 |
+
two_stage_type = "standard"
|
23 |
+
two_stage_bbox_embed_share = False
|
24 |
+
two_stage_class_embed_share = False
|
25 |
+
transformer_activation = "relu"
|
26 |
+
dec_pred_bbox_embed_share = True
|
27 |
+
dn_box_noise_scale = 1.0
|
28 |
+
dn_label_noise_ratio = 0.5
|
29 |
+
dn_label_coef = 1.0
|
30 |
+
dn_bbox_coef = 1.0
|
31 |
+
embed_init_tgt = True
|
32 |
+
dn_labelbook_size = 2000
|
33 |
+
max_text_len = 256
|
34 |
+
text_encoder_type = "bert-base-uncased"
|
35 |
+
use_text_enhancer = True
|
36 |
+
use_fusion_layer = True
|
37 |
+
use_checkpoint = True
|
38 |
+
use_transformer_ckpt = True
|
39 |
+
use_text_cross_attention = True
|
40 |
+
text_dropout = 0.0
|
41 |
+
fusion_dropout = 0.0
|
42 |
+
fusion_droppath = 0.1
|
43 |
+
sub_sentence_present = True
|
grounding-dino/grounding-dino_GroundingDINO_SwinT_OGC.cfg.py
ADDED
@@ -0,0 +1,43 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
batch_size = 1
|
2 |
+
modelname = "groundingdino"
|
3 |
+
backbone = "swin_T_224_1k"
|
4 |
+
position_embedding = "sine"
|
5 |
+
pe_temperatureH = 20
|
6 |
+
pe_temperatureW = 20
|
7 |
+
return_interm_indices = [1, 2, 3]
|
8 |
+
backbone_freeze_keywords = None
|
9 |
+
enc_layers = 6
|
10 |
+
dec_layers = 6
|
11 |
+
pre_norm = False
|
12 |
+
dim_feedforward = 2048
|
13 |
+
hidden_dim = 256
|
14 |
+
dropout = 0.0
|
15 |
+
nheads = 8
|
16 |
+
num_queries = 900
|
17 |
+
query_dim = 4
|
18 |
+
num_patterns = 0
|
19 |
+
num_feature_levels = 4
|
20 |
+
enc_n_points = 4
|
21 |
+
dec_n_points = 4
|
22 |
+
two_stage_type = "standard"
|
23 |
+
two_stage_bbox_embed_share = False
|
24 |
+
two_stage_class_embed_share = False
|
25 |
+
transformer_activation = "relu"
|
26 |
+
dec_pred_bbox_embed_share = True
|
27 |
+
dn_box_noise_scale = 1.0
|
28 |
+
dn_label_noise_ratio = 0.5
|
29 |
+
dn_label_coef = 1.0
|
30 |
+
dn_bbox_coef = 1.0
|
31 |
+
embed_init_tgt = True
|
32 |
+
dn_labelbook_size = 2000
|
33 |
+
max_text_len = 256
|
34 |
+
text_encoder_type = "bert-base-uncased"
|
35 |
+
use_text_enhancer = True
|
36 |
+
use_fusion_layer = True
|
37 |
+
use_checkpoint = True
|
38 |
+
use_transformer_ckpt = True
|
39 |
+
use_text_cross_attention = True
|
40 |
+
text_dropout = 0.0
|
41 |
+
fusion_dropout = 0.0
|
42 |
+
fusion_droppath = 0.1
|
43 |
+
sub_sentence_present = True
|