Spaces:

Kedreamix
/

YoloGesture

Running

App Files Files Community

Kedreamix commited on Apr 22, 2023

Commit

4a3ab35

•

1 Parent(s): bea3c7d

YoloGesture推理主要代码

Browse files

Files changed (29) hide show

.gitattributes +1 -0
img/anticlockwise.jpg +0 -0
img/back.jpg +0 -0
img/clockwise.jpg +0 -0
img/down.jpg +0 -0
img/front.jpg +0 -0
img/left.jpg +0 -0
img/right.jpg +0 -0
img/up.jpg +0 -0
model_data/gesture.yaml +20 -0
model_data/gesture_classes.txt +8 -0
model_data/simhei.ttf +3 -0
model_data/yolo_anchors.txt +1 -0
model_data/yolotiny_anchors.txt +1 -0
nets/CSPdarknet.py +174 -0
nets/CSPdarknet53_tiny.py +143 -0
nets/__init__.py +1 -0
nets/attention.py +114 -0
nets/yolo.py +185 -0
nets/yolo_tiny.py +99 -0
nets/yolo_training.py +476 -0
nets/yolotiny_training.py +474 -0
utils/__init__.py +1 -0
utils/callbacks.py +71 -0
utils/dataloader.py +360 -0
utils/utils.py +62 -0
utils/utils_bbox.py +227 -0
utils/utils_fit.py +128 -0
utils/utils_map.py +901 -0

.gitattributes CHANGED Viewed

@@ -32,3 +32,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+model_data/simhei.ttf filter=lfs diff=lfs merge=lfs -text

img/anticlockwise.jpg ADDED Viewed

img/back.jpg ADDED Viewed

img/clockwise.jpg ADDED Viewed

img/down.jpg ADDED Viewed

img/front.jpg ADDED Viewed

img/left.jpg ADDED Viewed

img/right.jpg ADDED Viewed

img/up.jpg ADDED Viewed

model_data/gesture.yaml ADDED Viewed

	@@ -0,0 +1,20 @@

+#------------------------------detect.py--------------------------------#
+# 这一部分是为了半自动标注数据，可以减轻负担，需要提前训练一个权重，以Labelme格式保存
+# dir_origin_path 图片存放位置
+# dir_save_path Annotation保存位置
+# ----------------------------------------------------------------------#
+dir_detect_path: ./JPEGImages
+detect_save_path: ./Annotation
+# ----------------------------- train.py -------------------------------#
+nc: 8 # 类别的数量
+classes: ["up","down","left","right","front","back","clockwise","anticlockwise"] # 类别
+confidence: 0.5 # 置信度
+nms_iou: 0.3
+letterbox_image: False
+lr_decay_type: cos # 使用到的学习率下降方式，可选的有step、cos
+# 用于设置是否使用多线程读取数据
+# 开启后会加快数据读取速度，但是会占用更多内存
+# 内存较小的电脑可以设置为2或者0，win建议设为0
+num_workers: 4

model_data/gesture_classes.txt ADDED Viewed

	@@ -0,0 +1,8 @@

+up
+down
+left
+right
+front
+back
+clockwise
+anticlockwise

model_data/simhei.ttf ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:aa4560dd8fe5645745fed3ffa301c3ca4d6c03cbd738145b613303961ba733b8
+size 9753388

model_data/yolo_anchors.txt ADDED Viewed

	@@ -0,0 +1 @@


1	+ 12, 16, 19, 36, 40, 28, 36, 75, 76, 55, 72, 146, 142, 110, 192, 243, 459, 401

model_data/yolotiny_anchors.txt ADDED Viewed

	@@ -0,0 +1 @@


1	+ 10,14, 23,27, 37,58, 81,82, 135,169, 344,319

nets/CSPdarknet.py ADDED Viewed

	@@ -0,0 +1,174 @@

+import math
+from collections import OrderedDict
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+#-------------------------------------------------#
+#   MISH激活函数
+#-------------------------------------------------#
+class Mish(nn.Module):
+    def __init__(self):
+        super(Mish, self).__init__()
+    def forward(self, x):
+        return x * torch.tanh(F.softplus(x))
+#---------------------------------------------------#
+#   卷积块 -> 卷积 + 标准化 + 激活函数
+#   Conv2d + BatchNormalization + Mish
+#---------------------------------------------------#
+class BasicConv(nn.Module):
+    def __init__(self, in_channels, out_channels, kernel_size, stride=1):
+        super(BasicConv, self).__init__()
+        self.conv = nn.Conv2d(in_channels, out_channels, kernel_size, stride, kernel_size//2, bias=False)
+        self.bn = nn.BatchNorm2d(out_channels)
+        self.activation = Mish()
+    def forward(self, x):
+        x = self.conv(x)
+        x = self.bn(x)
+        x = self.activation(x)
+        return x
+#---------------------------------------------------#
+#   CSPdarknet的结构块的组成部分
+#   内部堆叠的残差块
+#---------------------------------------------------#
+class Resblock(nn.Module):
+    def __init__(self, channels, hidden_channels=None):
+        super(Resblock, self).__init__()
+        if hidden_channels is None:
+            hidden_channels = channels
+        self.block = nn.Sequential(
+            BasicConv(channels, hidden_channels, 1),
+            BasicConv(hidden_channels, channels, 3)
+        )
+    def forward(self, x):
+        return x + self.block(x)
+#--------------------------------------------------------------------#
+#   CSPdarknet的结构块
+#   首先利用ZeroPadding2D和一个步长为2x2的卷积块进行高和宽的压缩
+#   然后建立一个大的残差边shortconv、这个大残差边绕过了很多的残差结构
+#   主干部分会对num_blocks进行循环，循环内部是残差结构。
+#   对于整个CSPdarknet的结构块，就是一个大残差块+内部多个小残差块
+#--------------------------------------------------------------------#
+class Resblock_body(nn.Module):
+    def __init__(self, in_channels, out_channels, num_blocks, first):
+        super(Resblock_body, self).__init__()
+        #----------------------------------------------------------------#
+        #   利用一个步长为2x2的卷积块进行高和宽的压缩
+        #----------------------------------------------------------------#
+        self.downsample_conv = BasicConv(in_channels, out_channels, 3, stride=2)
+        if first:
+            #--------------------------------------------------------------------------#
+            #   然后建立一个大的残差边self.split_conv0、这个大残差边绕过了很多的残差结构
+            #--------------------------------------------------------------------------#
+            self.split_conv0 = BasicConv(out_channels, out_channels, 1)
+            #----------------------------------------------------------------#
+            #   主干部分会对num_blocks进行循环，循环内部是残差结构。
+            #----------------------------------------------------------------#
+            self.split_conv1 = BasicConv(out_channels, out_channels, 1)
+            self.blocks_conv = nn.Sequential(
+                Resblock(channels=out_channels, hidden_channels=out_channels//2),
+                BasicConv(out_channels, out_channels, 1)
+            )
+            self.concat_conv = BasicConv(out_channels*2, out_channels, 1)
+        else:
+            #--------------------------------------------------------------------------#
+            #   然后建立一个大的残差边self.split_conv0、这个大残差边绕过了很多的残差结构
+            #--------------------------------------------------------------------------#
+            self.split_conv0 = BasicConv(out_channels, out_channels//2, 1)
+            #----------------------------------------------------------------#
+            #   主干部分会对num_blocks进行循环，循环内部是残差结构。
+            #----------------------------------------------------------------#
+            self.split_conv1 = BasicConv(out_channels, out_channels//2, 1)
+            self.blocks_conv = nn.Sequential(
+                *[Resblock(out_channels//2) for _ in range(num_blocks)],
+                BasicConv(out_channels//2, out_channels//2, 1)
+            )
+            self.concat_conv = BasicConv(out_channels, out_channels, 1)
+    def forward(self, x):
+        x = self.downsample_conv(x)
+        x0 = self.split_conv0(x)
+        x1 = self.split_conv1(x)
+        x1 = self.blocks_conv(x1)
+        #------------------------------------#
+        #   将大残差边再堆叠回来
+        #------------------------------------#
+        x = torch.cat([x1, x0], dim=1)
+        #------------------------------------#
+        #   最���对通道数进行整合
+        #------------------------------------#
+        x = self.concat_conv(x)
+        return x
+#---------------------------------------------------#
+#   CSPdarknet53 的主体部分
+#   输入为一张416x416x3的图片
+#   输出为三个有效特征层
+#---------------------------------------------------#
+class CSPDarkNet(nn.Module):
+    def __init__(self, layers):
+        super(CSPDarkNet, self).__init__()
+        self.inplanes = 32
+        # 416,416,3 -> 416,416,32
+        self.conv1 = BasicConv(3, self.inplanes, kernel_size=3, stride=1)
+        self.feature_channels = [64, 128, 256, 512, 1024]
+        self.stages = nn.ModuleList([
+            # 416,416,32 -> 208,208,64
+            Resblock_body(self.inplanes, self.feature_channels[0], layers[0], first=True),
+            # 208,208,64 -> 104,104,128
+            Resblock_body(self.feature_channels[0], self.feature_channels[1], layers[1], first=False),
+            # 104,104,128 -> 52,52,256
+            Resblock_body(self.feature_channels[1], self.feature_channels[2], layers[2], first=False),
+            # 52,52,256 -> 26,26,512
+            Resblock_body(self.feature_channels[2], self.feature_channels[3], layers[3], first=False),
+            # 26,26,512 -> 13,13,1024
+            Resblock_body(self.feature_channels[3], self.feature_channels[4], layers[4], first=False)
+        ])
+        self.num_features = 1
+        for m in self.modules():
+            if isinstance(m, nn.Conv2d):
+                n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
+                m.weight.data.normal_(0, math.sqrt(2. / n))
+            elif isinstance(m, nn.BatchNorm2d):
+                m.weight.data.fill_(1)
+                m.bias.data.zero_()
+    def forward(self, x):
+        x = self.conv1(x)
+        x = self.stages[0](x)
+        x = self.stages[1](x)
+        out3 = self.stages[2](x)
+        out4 = self.stages[3](out3)
+        out5 = self.stages[4](out4)
+        return out3, out4, out5
+def darknet53(pretrained):
+    model = CSPDarkNet([1, 2, 8, 8, 4])
+    if pretrained:
+        model.load_state_dict(torch.load("model_data/CSPdarknet53_backbone_weights.pth"))
+    return model

nets/CSPdarknet53_tiny.py ADDED Viewed

	@@ -0,0 +1,143 @@

+import math
+import torch
+import torch.nn as nn
+#-------------------------------------------------#
+#   卷积块
+#   Conv2d + BatchNorm2d + LeakyReLU
+#-------------------------------------------------#
+class BasicConv(nn.Module):
+    def __init__(self, in_channels, out_channels, kernel_size, stride=1):
+        super(BasicConv, self).__init__()
+        self.conv = nn.Conv2d(in_channels, out_channels, kernel_size, stride, kernel_size//2, bias=False)
+        self.bn = nn.BatchNorm2d(out_channels)
+        self.activation = nn.LeakyReLU(0.1)
+    def forward(self, x):
+        x = self.conv(x)
+        x = self.bn(x)
+        x = self.activation(x)
+        return x
+'''
+                    input
+                      |
+                  BasicConv
+                      -----------------------
+                      |                     |
+                 route_group              route
+                      |                     |
+                  BasicConv                 |
+                      |                     |
+    -------------------                     |
+    |                 |                     |
+ route_1          BasicConv                 |
+    |                 |                     |
+    -----------------cat                    |
+                      |                     |
+        ----      BasicConv                 |
+        |             |                     |
+      feat           cat---------------------
+                      |
+                 MaxPooling2D
+'''
+#---------------------------------------------------#
+#   CSPdarknet53-tiny的结构块
+#   存在一个大残差边
+#   这个大残差边绕过了很多的残差结构
+#---------------------------------------------------#
+class Resblock_body(nn.Module):
+    def __init__(self, in_channels, out_channels):
+        super(Resblock_body, self).__init__()
+        self.out_channels = out_channels
+        self.conv1 = BasicConv(in_channels, out_channels, 3)
+        self.conv2 = BasicConv(out_channels//2, out_channels//2, 3)
+        self.conv3 = BasicConv(out_channels//2, out_channels//2, 3)
+        self.conv4 = BasicConv(out_channels, out_channels, 1)
+        self.maxpool = nn.MaxPool2d([2,2],[2,2])
+    def forward(self, x):
+        # 利用一个3x3卷积进行特征整合
+        x = self.conv1(x)
+        # 引出一个大的残差边route
+        route = x
+        c = self.out_channels
+        # 对特征层的通道进行分割，取第二部分作为主干部分。
+        x = torch.split(x, c//2, dim = 1)[1]
+        # 对主干部分进行3x3卷积
+        x = self.conv2(x)
+        # 引出一个小的残差边route_1
+        route1 = x
+        # 对第主干部分进行3x3卷积
+        x = self.conv3(x)
+        # 主干部分与残差部分进行相接
+        x = torch.cat([x,route1], dim = 1)
+        # 对相接后的结果进行1x1卷积
+        x = self.conv4(x)
+        feat = x
+        x = torch.cat([route, x], dim = 1)
+        # 利用最大池化进行高和宽的压缩
+        x = self.maxpool(x)
+        return x,feat
+class CSPDarkNet(nn.Module):
+    def __init__(self):
+        super(CSPDarkNet, self).__init__()
+        # 首先利用两次步长为2x2的3x3卷积进行高和宽的压缩
+        # 416,416,3 -> 208,208,32 -> 104,104,64
+        self.conv1 = BasicConv(3, 32, kernel_size=3, stride=2)
+        self.conv2 = BasicConv(32, 64, kernel_size=3, stride=2)
+        # 104,104,64 -> 52,52,128
+        self.resblock_body1 =  Resblock_body(64, 64)
+        # 52,52,128 -> 26,26,256
+        self.resblock_body2 =  Resblock_body(128, 128)
+        # 26,26,256 -> 13,13,512
+        self.resblock_body3 =  Resblock_body(256, 256)
+        # 13,13,512 -> 13,13,512
+        self.conv3 = BasicConv(512, 512, kernel_size=3)
+        self.num_features = 1
+        # 进行权值初始化
+        for m in self.modules():
+            if isinstance(m, nn.Conv2d):
+                n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
+                m.weight.data.normal_(0, math.sqrt(2. / n))
+            elif isinstance(m, nn.BatchNorm2d):
+                m.weight.data.fill_(1)
+                m.bias.data.zero_()
+    def forward(self, x):
+        # 416,416,3 -> 208,208,32 -> 104,104,64
+        x = self.conv1(x)
+        x = self.conv2(x)
+        # 104,104,64 -> 52,52,128
+        x, _    = self.resblock_body1(x)
+        # 52,52,128 -> 26,26,256
+        x, _    = self.resblock_body2(x)
+        # 26,26,256 -> x为13,13,512
+        #           -> feat1为26,26,256
+        x, feat1    = self.resblock_body3(x)
+        # 13,13,512 -> 13,13,512
+        x = self.conv3(x)
+        feat2 = x
+        return feat1,feat2
+def darknet53_tiny(pretrained, **kwargs):
+    model = CSPDarkNet()
+    if pretrained:
+        model.load_state_dict(torch.load("model_data/CSPdarknet53_tiny_backbone_weights.pth"))
+    return model

nets/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ #

nets/attention.py ADDED Viewed

	@@ -0,0 +1,114 @@

+import torch
+import torch.nn as nn
+import math
+class se_block(nn.Module):
+    def __init__(self, channel, ratio=16):
+        super(se_block, self).__init__()
+        self.avg_pool = nn.AdaptiveAvgPool2d(1)
+        self.fc = nn.Sequential(
+                nn.Linear(channel, channel // ratio, bias=False),
+                nn.ReLU(inplace=True),
+                nn.Linear(channel // ratio, channel, bias=False),
+                nn.Sigmoid()
+        )
+    def forward(self, x):
+        b, c, _, _ = x.size()
+        y = self.avg_pool(x).view(b, c)
+        y = self.fc(y).view(b, c, 1, 1)
+        return x * y
+class ChannelAttention(nn.Module):
+    def __init__(self, in_planes, ratio=8):
+        super(ChannelAttention, self).__init__()
+        self.avg_pool = nn.AdaptiveAvgPool2d(1)
+        self.max_pool = nn.AdaptiveMaxPool2d(1)
+        # 利用1x1卷积代替全连接
+        self.fc1   = nn.Conv2d(in_planes, in_planes // ratio, 1, bias=False)
+        self.relu1 = nn.ReLU()
+        self.fc2   = nn.Conv2d(in_planes // ratio, in_planes, 1, bias=False)
+        self.sigmoid = nn.Sigmoid()
+    def forward(self, x):
+        avg_out = self.fc2(self.relu1(self.fc1(self.avg_pool(x))))
+        max_out = self.fc2(self.relu1(self.fc1(self.max_pool(x))))
+        out = avg_out + max_out
+        return self.sigmoid(out)
+class SpatialAttention(nn.Module):
+    def __init__(self, kernel_size=7):
+        super(SpatialAttention, self).__init__()
+        assert kernel_size in (3, 7), 'kernel size must be 3 or 7'
+        padding = 3 if kernel_size == 7 else 1
+        self.conv1 = nn.Conv2d(2, 1, kernel_size, padding=padding, bias=False)
+        self.sigmoid = nn.Sigmoid()
+    def forward(self, x):
+        avg_out = torch.mean(x, dim=1, keepdim=True)
+        max_out, _ = torch.max(x, dim=1, keepdim=True)
+        x = torch.cat([avg_out, max_out], dim=1)
+        x = self.conv1(x)
+        return self.sigmoid(x)
+class cbam_block(nn.Module):
+    def __init__(self, channel, ratio=8, kernel_size=7):
+        super(cbam_block, self).__init__()
+        self.channelattention = ChannelAttention(channel, ratio=ratio)
+        self.spatialattention = SpatialAttention(kernel_size=kernel_size)
+    def forward(self, x):
+        x = x*self.channelattention(x)
+        x = x*self.spatialattention(x)
+        return x
+class eca_block(nn.Module):
+    def __init__(self, channel, b=1, gamma=2):
+        super(eca_block, self).__init__()
+        kernel_size = int(abs((math.log(channel, 2) + b) / gamma))
+        kernel_size = kernel_size if kernel_size % 2 else kernel_size + 1
+        self.avg_pool = nn.AdaptiveAvgPool2d(1)
+        self.conv = nn.Conv1d(1, 1, kernel_size=kernel_size, padding=(kernel_size - 1) // 2, bias=False)
+        self.sigmoid = nn.Sigmoid()
+    def forward(self, x):
+        y = self.avg_pool(x)
+        y = self.conv(y.squeeze(-1).transpose(-1, -2)).transpose(-1, -2).unsqueeze(-1)
+        y = self.sigmoid(y)
+        return x * y.expand_as(x)
+class CA_Block(nn.Module):
+    def __init__(self, channel, reduction=16):
+        super(CA_Block, self).__init__()
+        self.conv_1x1 = nn.Conv2d(in_channels=channel, out_channels=channel//reduction, kernel_size=1, stride=1, bias=False)
+        self.relu   = nn.ReLU()
+        self.bn     = nn.BatchNorm2d(channel//reduction)
+        self.F_h = nn.Conv2d(in_channels=channel//reduction, out_channels=channel, kernel_size=1, stride=1, bias=False)
+        self.F_w = nn.Conv2d(in_channels=channel//reduction, out_channels=channel, kernel_size=1, stride=1, bias=False)
+        self.sigmoid_h = nn.Sigmoid()
+        self.sigmoid_w = nn.Sigmoid()
+    def forward(self, x):
+        _, _, h, w = x.size()
+        x_h = torch.mean(x, dim = 3, keepdim = True).permute(0, 1, 3, 2)
+        x_w = torch.mean(x, dim = 2, keepdim = True)
+        x_cat_conv_relu = self.relu(self.bn(self.conv_1x1(torch.cat((x_h, x_w), 3))))
+        x_cat_conv_split_h, x_cat_conv_split_w = x_cat_conv_relu.split([h, w], 3)
+        s_h = self.sigmoid_h(self.F_h(x_cat_conv_split_h.permute(0, 1, 3, 2)))
+        s_w = self.sigmoid_w(self.F_w(x_cat_conv_split_w))
+        out = x * s_h.expand_as(x) * s_w.expand_as(x)
+        return out

nets/yolo.py ADDED Viewed

	@@ -0,0 +1,185 @@

+from collections import OrderedDict
+import torch
+import torch.nn as nn
+from nets.CSPdarknet import darknet53
+def conv2d(filter_in, filter_out, kernel_size, stride=1):
+    pad = (kernel_size - 1) // 2 if kernel_size else 0
+    return nn.Sequential(OrderedDict([
+        ("conv", nn.Conv2d(filter_in, filter_out, kernel_size=kernel_size, stride=stride, padding=pad, bias=False)),
+        ("bn", nn.BatchNorm2d(filter_out)),
+        ("relu", nn.LeakyReLU(0.1)),
+    ]))
+#---------------------------------------------------#
+#   SPP结构，利用不同大小的池化核进行池化
+#   池化后堆叠
+#---------------------------------------------------#
+class SpatialPyramidPooling(nn.Module):
+    def __init__(self, pool_sizes=[5, 9, 13]):
+        super(SpatialPyramidPooling, self).__init__()
+        self.maxpools = nn.ModuleList([nn.MaxPool2d(pool_size, 1, pool_size//2) for pool_size in pool_sizes])
+    def forward(self, x):
+        features = [maxpool(x) for maxpool in self.maxpools[::-1]]
+        features = torch.cat(features + [x], dim=1)
+        return features
+#---------------------------------------------------#
+#   卷积 + 上采样
+#---------------------------------------------------#
+class Upsample(nn.Module):
+    def __init__(self, in_channels, out_channels):
+        super(Upsample, self).__init__()
+        self.upsample = nn.Sequential(
+            conv2d(in_channels, out_channels, 1),
+            nn.Upsample(scale_factor=2, mode='nearest')
+        )
+    def forward(self, x,):
+        x = self.upsample(x)
+        return x
+#---------------------------------------------------#
+#   三次卷积块
+#---------------------------------------------------#
+def make_three_conv(filters_list, in_filters):
+    m = nn.Sequential(
+        conv2d(in_filters, filters_list[0], 1),
+        conv2d(filters_list[0], filters_list[1], 3),
+        conv2d(filters_list[1], filters_list[0], 1),
+    )
+    return m
+#---------------------------------------------------#
+#   五次卷积块
+#---------------------------------------------------#
+def make_five_conv(filters_list, in_filters):
+    m = nn.Sequential(
+        conv2d(in_filters, filters_list[0], 1),
+        conv2d(filters_list[0], filters_list[1], 3),
+        conv2d(filters_list[1], filters_list[0], 1),
+        conv2d(filters_list[0], filters_list[1], 3),
+        conv2d(filters_list[1], filters_list[0], 1),
+    )
+    return m
+#---------------------------------------------------#
+#   最后获得yolov4的输出
+#---------------------------------------------------#
+def yolo_head(filters_list, in_filters):
+    m = nn.Sequential(
+        conv2d(in_filters, filters_list[0], 3),
+        nn.Conv2d(filters_list[0], filters_list[1], 1),
+    )
+    return m
+#---------------------------------------------------#
+#   yolo_body
+#---------------------------------------------------#
+class YoloBody(nn.Module):
+    def __init__(self, anchors_mask, num_classes, pretrained = False):
+        super(YoloBody, self).__init__()
+        #---------------------------------------------------#
+        #   生成CSPdarknet53的主干模型
+        #   获得三个有效特征层，他们的shape分别是：
+        #   52,52,256
+        #   26,26,512
+        #   13,13,1024
+        #---------------------------------------------------#
+        self.backbone   = darknet53(pretrained)
+        self.conv1      = make_three_conv([512,1024],1024)
+        self.SPP        = SpatialPyramidPooling()
+        self.conv2      = make_three_conv([512,1024],2048)
+        self.upsample1          = Upsample(512,256)
+        self.conv_for_P4        = conv2d(512,256,1)
+        self.make_five_conv1    = make_five_conv([256, 512],512)
+        self.upsample2          = Upsample(256,128)
+        self.conv_for_P3        = conv2d(256,128,1)
+        self.make_five_conv2    = make_five_conv([128, 256],256)
+        # 3*(5+num_classes) = 3*(5+20) = 3*(4+1+20)=75
+        self.yolo_head3         = yolo_head([256, len(anchors_mask[0]) * (5 + num_classes)],128)
+        self.down_sample1       = conv2d(128,256,3,stride=2)
+        self.make_five_conv3    = make_five_conv([256, 512],512)
+        # 3*(5+num_classes) = 3*(5+20) = 3*(4+1+20)=75
+        self.yolo_head2         = yolo_head([512, len(anchors_mask[1]) * (5 + num_classes)],256)
+        self.down_sample2       = conv2d(256,512,3,stride=2)
+        self.make_five_conv4    = make_five_conv([512, 1024],1024)
+        # 3*(5+num_classes)=3*(5+20)=3*(4+1+20)=75
+        self.yolo_head1         = yolo_head([1024, len(anchors_mask[2]) * (5 + num_classes)],512)
+    def forward(self, x):
+        #  backbone
+        x2, x1, x0 = self.backbone(x)
+        # 13,13,1024 -> 13,13,512 -> 13,13,1024 -> 13,13,512 -> 13,13,2048
+        P5 = self.conv1(x0)
+        P5 = self.SPP(P5)
+        # 13,13,2048 -> 13,13,512 -> 13,13,1024 -> 13,13,512
+        P5 = self.conv2(P5)
+        # 13,13,512 -> 13,13,256 -> 26,26,256
+        P5_upsample = self.upsample1(P5)
+        # 26,26,512 -> 26,26,256
+        P4 = self.conv_for_P4(x1)
+        # 26,26,256 + 26,26,256 -> 26,26,512
+        P4 = torch.cat([P4,P5_upsample],axis=1)
+        # 26,26,512 -> 26,26,256 -> 26,26,512 -> 26,26,256 -> 26,26,512 -> 26,26,256
+        P4 = self.make_five_conv1(P4)
+        # 26,26,256 -> 26,26,128 -> 52,52,128
+        P4_upsample = self.upsample2(P4)
+        # 52,52,256 -> 52,52,128
+        P3 = self.conv_for_P3(x2)
+        # 52,52,128 + 52,52,128 -> 52,52,256
+        P3 = torch.cat([P3,P4_upsample],axis=1)
+        # 52,52,256 -> 52,52,128 -> 52,52,256 -> 52,52,128 -> 52,52,256 -> 52,52,128
+        P3 = self.make_five_conv2(P3)
+        # 52,52,128 -> 26,26,256
+        P3_downsample = self.down_sample1(P3)
+        # 26,26,256 + 26,26,256 -> 26,26,512
+        P4 = torch.cat([P3_downsample,P4],axis=1)
+        # 26,26,512 -> 26,26,256 -> 26,26,512 -> 26,26,256 -> 26,26,512 -> 26,26,256
+        P4 = self.make_five_conv3(P4)
+        # 26,26,256 -> 13,13,512
+        P4_downsample = self.down_sample2(P4)
+        # 13,13,512 + 13,13,512 -> 13,13,1024
+        P5 = torch.cat([P4_downsample,P5],axis=1)
+        # 13,13,1024 -> 13,13,512 -> 13,13,1024 -> 13,13,512 -> 13,13,1024 -> 13,13,512
+        P5 = self.make_five_conv4(P5)
+        #---------------------------------------------------#
+        #   第三个特征层
+        #   y3=(batch_size,75,52,52)
+        #---------------------------------------------------#
+        out2 = self.yolo_head3(P3)
+        #---------------------------------------------------#
+        #   第二个特征层
+        #   y2=(batch_size,75,26,26)
+        #---------------------------------------------------#
+        out1 = self.yolo_head2(P4)
+        #---------------------------------------------------#
+        #   第一个特征层
+        #   y1=(batch_size,75,13,13)
+        #---------------------------------------------------#
+        out0 = self.yolo_head1(P5)
+        return out0, out1, out2

nets/yolo_tiny.py ADDED Viewed

	@@ -0,0 +1,99 @@

+import torch
+import torch.nn as nn
+from nets.CSPdarknet53_tiny import darknet53_tiny
+from nets.attention import cbam_block, eca_block, se_block, CA_Block
+attention_block = [se_block, cbam_block, eca_block, CA_Block]
+#-------------------------------------------------#
+#   卷积块 -> 卷积 + 标准化 + 激活函数
+#   Conv2d + BatchNormalization + LeakyReLU
+#-------------------------------------------------#
+class BasicConv(nn.Module):
+    def __init__(self, in_channels, out_channels, kernel_size, stride=1):
+        super(BasicConv, self).__init__()
+        self.conv = nn.Conv2d(in_channels, out_channels, kernel_size, stride, kernel_size//2, bias=False)
+        self.bn = nn.BatchNorm2d(out_channels)
+        self.activation = nn.LeakyReLU(0.1)
+    def forward(self, x):
+        x = self.conv(x)
+        x = self.bn(x)
+        x = self.activation(x)
+        return x
+#---------------------------------------------------#
+#   卷积 + 上采样
+#---------------------------------------------------#
+class Upsample(nn.Module):
+    def __init__(self, in_channels, out_channels):
+        super(Upsample, self).__init__()
+        self.upsample = nn.Sequential(
+            BasicConv(in_channels, out_channels, 1),
+            nn.Upsample(scale_factor=2, mode='nearest')
+        )
+    def forward(self, x,):
+        x = self.upsample(x)
+        return x
+#---------------------------------------------------#
+#   最后获得yolov4的输出
+#---------------------------------------------------#
+def yolo_head(filters_list, in_filters):
+    m = nn.Sequential(
+        BasicConv(in_filters, filters_list[0], 3),
+        nn.Conv2d(filters_list[0], filters_list[1], 1),
+    )
+    return m
+#---------------------------------------------------#
+#   yolo_body
+#---------------------------------------------------#
+class YoloBodytiny(nn.Module):
+    def __init__(self, anchors_mask, num_classes, phi=0, pretrained=False):
+        super(YoloBodytiny, self).__init__()
+        self.phi            = phi
+        self.backbone       = darknet53_tiny(pretrained)
+        self.conv_for_P5    = BasicConv(512,256,1)
+        self.yolo_headP5    = yolo_head([512, len(anchors_mask[0]) * (5 + num_classes)],256)
+        self.upsample       = Upsample(256,128)
+        self.yolo_headP4    = yolo_head([256, len(anchors_mask[1]) * (5 + num_classes)],384)
+        if 1 <= self.phi and self.phi <= 4:
+            self.feat1_att      = attention_block[self.phi - 1](256)
+            self.feat2_att      = attention_block[self.phi - 1](512)
+            self.upsample_att   = attention_block[self.phi - 1](128)
+    def forward(self, x):
+        #---------------------------------------------------#
+        #   生成CSPdarknet53_tiny的主干模型
+        #   feat1的shape为26,26,256
+        #   feat2的shape为13,13,512
+        #---------------------------------------------------#
+        feat1, feat2 = self.backbone(x)
+        if 1 <= self.phi and self.phi <= 4:
+            feat1 = self.feat1_att(feat1)
+            feat2 = self.feat2_att(feat2)
+        # 13,13,512 -> 13,13,256
+        P5 = self.conv_for_P5(feat2)
+        # 13,13,256 -> 13,13,512 -> 13,13,255
+        out0 = self.yolo_headP5(P5)
+        # 13,13,256 -> 13,13,128 -> 26,26,128
+        P5_Upsample = self.upsample(P5)
+        # 26,26,256 + 26,26,128 -> 26,26,384
+        if 1 <= self.phi and self.phi <= 4:
+            P5_Upsample = self.upsample_att(P5_Upsample)
+        P4 = torch.cat([P5_Upsample,feat1],axis=1)
+        # 26,26,384 -> 26,26,256 -> 26,26,255
+        out1 = self.yolo_headP4(P4)
+        return out0, out1

nets/yolo_training.py ADDED Viewed

	@@ -0,0 +1,476 @@

+import math
+from functools import partial
+import numpy as np
+import torch
+import torch.nn as nn
+class YOLOLoss(nn.Module):
+    def __init__(self, anchors, num_classes, input_shape, cuda, anchors_mask = [[6,7,8], [3,4,5], [0,1,2]], label_smoothing = 0, focal_loss = False, alpha = 0.25, gamma = 2):
+        super(YOLOLoss, self).__init__()
+        #-----------------------------------------------------------#
+        #   13x13的特征层对应的anchor是[142, 110],[192, 243],[459, 401]
+        #   26x26的特征层对应的anchor是[36, 75],[76, 55],[72, 146]
+        #   52x52的特征层对应的anchor是[12, 16],[19, 36],[40, 28]
+        #-----------------------------------------------------------#
+        self.anchors        = anchors
+        self.num_classes    = num_classes
+        self.bbox_attrs     = 5 + num_classes
+        self.input_shape    = input_shape
+        self.anchors_mask   = anchors_mask
+        self.label_smoothing = label_smoothing
+        self.balance        = [0.4, 1.0, 4]
+        self.box_ratio      = 0.05
+        self.obj_ratio      = 5 * (input_shape[0] * input_shape[1]) / (416 ** 2)
+        self.cls_ratio      = 1 * (num_classes / 80)
+        self.focal_loss         = focal_loss
+        self.focal_loss_ratio   = 10
+        self.alpha              = alpha
+        self.gamma              = gamma
+        self.ignore_threshold = 0.5
+        self.cuda           = cuda
+    def clip_by_tensor(self, t, t_min, t_max):
+        t = t.float()
+        result = (t >= t_min).float() * t + (t < t_min).float() * t_min
+        result = (result <= t_max).float() * result + (result > t_max).float() * t_max
+        return result
+    def MSELoss(self, pred, target):
+        return torch.pow(pred - target, 2)
+    def BCELoss(self, pred, target):
+        epsilon = 1e-7
+        pred    = self.clip_by_tensor(pred, epsilon, 1.0 - epsilon)
+        output  = - target * torch.log(pred) - (1.0 - target) * torch.log(1.0 - pred)
+        return output
+    def box_ciou(self, b1, b2):
+        """
+        输入为：
+        ----------
+        b1: tensor, shape=(batch, feat_w, feat_h, anchor_num, 4), xywh
+        b2: tensor, shape=(batch, feat_w, feat_h, anchor_num, 4), xywh
+        返回为：
+        -------
+        ciou: tensor, shape=(batch, feat_w, feat_h, anchor_num, 1)
+        """
+        #----------------------------------------------------#
+        #   求出预测框左上角右下角
+        #----------------------------------------------------#
+        b1_xy       = b1[..., :2]
+        b1_wh       = b1[..., 2:4]
+        b1_wh_half  = b1_wh/2.
+        b1_mins     = b1_xy - b1_wh_half
+        b1_maxes    = b1_xy + b1_wh_half
+        #----------------------------------------------------#
+        #   求出真实框左上角右下角
+        #----------------------------------------------------#
+        b2_xy       = b2[..., :2]
+        b2_wh       = b2[..., 2:4]
+        b2_wh_half  = b2_wh/2.
+        b2_mins     = b2_xy - b2_wh_half
+        b2_maxes    = b2_xy + b2_wh_half
+        #----------------------------------------------------#
+        #   求真实框和预测框所有的iou
+        #----------------------------------------------------#
+        intersect_mins  = torch.max(b1_mins, b2_mins)
+        intersect_maxes = torch.min(b1_maxes, b2_maxes)
+        intersect_wh    = torch.max(intersect_maxes - intersect_mins, torch.zeros_like(intersect_maxes))
+        intersect_area  = intersect_wh[..., 0] * intersect_wh[..., 1]
+        b1_area         = b1_wh[..., 0] * b1_wh[..., 1]
+        b2_area         = b2_wh[..., 0] * b2_wh[..., 1]
+        union_area      = b1_area + b2_area - intersect_area
+        iou             = intersect_area / torch.clamp(union_area,min = 1e-6)
+        #----------------------------------------------------#
+        #   计算中心的差距
+        #----------------------------------------------------#
+        center_distance = torch.sum(torch.pow((b1_xy - b2_xy), 2), axis=-1)
+        #----------------------------------------------------#
+        #   找到包裹两个框的最小框的左上角和右下角
+        #----------------------------------------------------#
+        enclose_mins    = torch.min(b1_mins, b2_mins)
+        enclose_maxes   = torch.max(b1_maxes, b2_maxes)
+        enclose_wh      = torch.max(enclose_maxes - enclose_mins, torch.zeros_like(intersect_maxes))
+        #----------------------------------------------------#
+        #   计算对角线距离
+        #----------------------------------------------------#
+        enclose_diagonal = torch.sum(torch.pow(enclose_wh,2), axis=-1)
+        ciou            = iou - 1.0 * (center_distance) / torch.clamp(enclose_diagonal,min = 1e-6)
+        v       = (4 / (math.pi ** 2)) * torch.pow((torch.atan(b1_wh[..., 0] / torch.clamp(b1_wh[..., 1],min = 1e-6)) - torch.atan(b2_wh[..., 0] / torch.clamp(b2_wh[..., 1], min = 1e-6))), 2)
+        alpha   = v / torch.clamp((1.0 - iou + v), min=1e-6)
+        ciou    = ciou - alpha * v
+        return ciou
+    #---------------------------------------------------#
+    #   平滑标签
+    #---------------------------------------------------#
+    def smooth_labels(self, y_true, label_smoothing, num_classes):
+        return y_true * (1.0 - label_smoothing) + label_smoothing / num_classes
+    def forward(self, l, input, targets=None):
+        #----------------------------------------------------#
+        #   l 代表使用的是第几个有效特征层
+        #   input的shape为  bs, 3*(5+num_classes), 13, 13
+        #                   bs, 3*(5+num_classes), 26, 26
+        #                   bs, 3*(5+num_classes), 52, 52
+        #   targets 真实框的标签情况 [batch_size, num_gt, 5]
+        #----------------------------------------------------#
+        #--------------------------------#
+        #   获得图片数量，特征层的高和宽
+        #--------------------------------#
+        bs      = input.size(0)
+        in_h    = input.size(2)
+        in_w    = input.size(3)
+        #-----------------------------------------------------------------------#
+        #   计算步长
+        #   每一个特征点对应原来的图片上多少个像素点
+        #
+        #   如果特征层为13x13的话，一个特征点就对应原来的图片上的32个像素点
+        #   如果特征层为26x26的话，一个特征点就对应原来的图片上的16个像素点
+        #   如果特征层为52x52的话，一个特征点就对应原来的图片上的8个像素点
+        #   stride_h = stride_w = 32、16、8
+        #-----------------------------------------------------------------------#
+        stride_h = self.input_shape[0] / in_h
+        stride_w = self.input_shape[1] / in_w
+        #-------------------------------------------------#
+        #   此时获得的scaled_anchors大小是相对于特征层的
+        #-------------------------------------------------#
+        scaled_anchors  = [(a_w / stride_w, a_h / stride_h) for a_w, a_h in self.anchors]
+        #-----------------------------------------------#
+        #   输入的input一共有三个，他们的shape分别是
+        #   bs, 3 * (5+num_classes), 13, 13 => bs, 3, 5 + num_classes, 13, 13 => batch_size, 3, 13, 13, 5 + num_classes
+        #   batch_size, 3, 13, 13, 5 + num_classes
+        #   batch_size, 3, 26, 26, 5 + num_classes
+        #   batch_size, 3, 52, 52, 5 + num_classes
+        #-----------------------------------------------#
+        prediction = input.view(bs, len(self.anchors_mask[l]), self.bbox_attrs, in_h, in_w).permute(0, 1, 3, 4, 2).contiguous()
+        #-----------------------------------------------#
+        #   先验框的中心位置的调整参数
+        #-----------------------------------------------#
+        x = torch.sigmoid(prediction[..., 0])
+        y = torch.sigmoid(prediction[..., 1])
+        #-----------------------------------------------#
+        #   先验框的宽高调整参数
+        #-----------------------------------------------#
+        w = prediction[..., 2]
+        h = prediction[..., 3]
+        #-----------------------------------------------#
+        #   获得置信度，是否有物体
+        #-----------------------------------------------#
+        conf = torch.sigmoid(prediction[..., 4])
+        #-----------------------------------------------#
+        #   种类置信度
+        #-----------------------------------------------#
+        pred_cls = torch.sigmoid(prediction[..., 5:])
+        #-----------------------------------------------#
+        #   获得网络应该有的预测结果
+        #-----------------------------------------------#
+        y_true, noobj_mask, box_loss_scale = self.get_target(l, targets, scaled_anchors, in_h, in_w)
+        #---------------------------------------------------------------#
+        #   将预测结果进行解码，判断预测结果和真实值的重合程度
+        #   如果重合程度过大则忽略，因为这些特征点属于预测比较准确的特征点
+        #   作为负样本不合适
+        #----------------------------------------------------------------#
+        noobj_mask, pred_boxes = self.get_ignore(l, x, y, h, w, targets, scaled_anchors, in_h, in_w, noobj_mask)
+        if self.cuda:
+            y_true          = y_true.type_as(x)
+            noobj_mask      = noobj_mask.type_as(x)
+            box_loss_scale  = box_loss_scale.type_as(x)
+        #--------------------------------------------------------------------------#
+        #   box_loss_scale是真实框宽高的乘积，宽高均在0-1之间，因此乘积也在0-1之间。
+        #   2-宽高的乘积代表真实框越大，比重越小，小框的比重更大。
+        #   使用iou损失时，大中小目标的回归损失不存在比例失衡问题，故弃用
+        #--------------------------------------------------------------------------#
+        box_loss_scale = 2 - box_loss_scale
+        loss        = 0
+        obj_mask    = y_true[..., 4] == 1
+        n           = torch.sum(obj_mask)
+        if n != 0:
+            #---------------------------------------------------------------#
+            #   计算预测结果和真实结果的差距
+            #   loss_loc ciou回归损失
+            #   loss_cls 分类损失
+            #---------------------------------------------------------------#
+            ciou        = self.box_ciou(pred_boxes, y_true[..., :4]).type_as(x)
+            # loss_loc    = torch.mean((1 - ciou)[obj_mask] * box_loss_scale[obj_mask])
+            loss_loc    = torch.mean((1 - ciou)[obj_mask])
+            loss_cls    = torch.mean(self.BCELoss(pred_cls[obj_mask], y_true[..., 5:][obj_mask]))
+            loss        += loss_loc * self.box_ratio + loss_cls * self.cls_ratio
+        #---------------------------------------------------------------#
+        #   计算是否包含物体的置信度损失
+        #---------------------------------------------------------------#
+        if self.focal_loss:
+            pos_neg_ratio   = torch.where(obj_mask, torch.ones_like(conf) * self.alpha, torch.ones_like(conf) * (1 - self.alpha))
+            hard_easy_ratio = torch.where(obj_mask, torch.ones_like(conf) - conf, conf) ** self.gamma
+            loss_conf   = torch.mean((self.BCELoss(conf, obj_mask.type_as(conf)) * pos_neg_ratio * hard_easy_ratio)[noobj_mask.bool() | obj_mask]) * self.focal_loss_ratio
+        else:
+            loss_conf   = torch.mean(self.BCELoss(conf, obj_mask.type_as(conf))[noobj_mask.bool() | obj_mask])
+        loss        += loss_conf * self.balance[l] * self.obj_ratio
+        # if n != 0:
+        #     print(loss_loc * self.box_ratio, loss_cls * self.cls_ratio, loss_conf * self.balance[l] * self.obj_ratio)
+        return loss
+    def calculate_iou(self, _box_a, _box_b):
+        #-----------------------------------------------------------#
+        #   计算真实框的左上角和右下角
+        #-----------------------------------------------------------#
+        b1_x1, b1_x2 = _box_a[:, 0] - _box_a[:, 2] / 2, _box_a[:, 0] + _box_a[:, 2] / 2
+        b1_y1, b1_y2 = _box_a[:, 1] - _box_a[:, 3] / 2, _box_a[:, 1] + _box_a[:, 3] / 2
+        #-----------------------------------------------------------#
+        #   计算先验框获得的预测框的左上角和右下角
+        #-----------------------------------------------------------#
+        b2_x1, b2_x2 = _box_b[:, 0] - _box_b[:, 2] / 2, _box_b[:, 0] + _box_b[:, 2] / 2
+        b2_y1, b2_y2 = _box_b[:, 1] - _box_b[:, 3] / 2, _box_b[:, 1] + _box_b[:, 3] / 2
+        #-----------------------------------------------------------#
+        #   将真实框和预测框都转化成左上角右下角的形式
+        #-----------------------------------------------------------#
+        box_a = torch.zeros_like(_box_a)
+        box_b = torch.zeros_like(_box_b)
+        box_a[:, 0], box_a[:, 1], box_a[:, 2], box_a[:, 3] = b1_x1, b1_y1, b1_x2, b1_y2
+        box_b[:, 0], box_b[:, 1], box_b[:, 2], box_b[:, 3] = b2_x1, b2_y1, b2_x2, b2_y2
+        #-----------------------------------------------------------#
+        #   A为真实框的数量，B为先验框的数量
+        #-----------------------------------------------------------#
+        A = box_a.size(0)
+        B = box_b.size(0)
+        #-----------------------------------------------------------#
+        #   计算交的面积
+        #-----------------------------------------------------------#
+        max_xy  = torch.min(box_a[:, 2:].unsqueeze(1).expand(A, B, 2), box_b[:, 2:].unsqueeze(0).expand(A, B, 2))
+        min_xy  = torch.max(box_a[:, :2].unsqueeze(1).expand(A, B, 2), box_b[:, :2].unsqueeze(0).expand(A, B, 2))
+        inter   = torch.clamp((max_xy - min_xy), min=0)
+        inter   = inter[:, :, 0] * inter[:, :, 1]
+        #-----------------------------------------------------------#
+        #   计算预测框和真实框各自的面积
+        #-----------------------------------------------------------#
+        area_a = ((box_a[:, 2]-box_a[:, 0]) * (box_a[:, 3]-box_a[:, 1])).unsqueeze(1).expand_as(inter)  # [A,B]
+        area_b = ((box_b[:, 2]-box_b[:, 0]) * (box_b[:, 3]-box_b[:, 1])).unsqueeze(0).expand_as(inter)  # [A,B]
+        #-----------------------------------------------------------#
+        #   求IOU
+        #-----------------------------------------------------------#
+        union = area_a + area_b - inter
+        return inter / union  # [A,B]
+    def get_target(self, l, targets, anchors, in_h, in_w):
+        #-----------------------------------------------------#
+        #   计算一共有多少张图片
+        #-----------------------------------------------------#
+        bs              = len(targets)
+        #-----------------------------------------------------#
+        #   用于选取哪些先验框不包含物体
+        #-----------------------------------------------------#
+        noobj_mask      = torch.ones(bs, len(self.anchors_mask[l]), in_h, in_w, requires_grad = False)
+        #-----------------------------------------------------#
+        #   让网络更加去关注小目标
+        #-----------------------------------------------------#
+        box_loss_scale  = torch.zeros(bs, len(self.anchors_mask[l]), in_h, in_w, requires_grad = False)
+        #-----------------------------------------------------#
+        #   batch_size, 3, 13, 13, 5 + num_classes
+        #-----------------------------------------------------#
+        y_true          = torch.zeros(bs, len(self.anchors_mask[l]), in_h, in_w, self.bbox_attrs, requires_grad = False)
+        for b in range(bs):
+            if len(targets[b])==0:
+                continue
+            batch_target = torch.zeros_like(targets[b])
+            #-------------------------------------------------------#
+            #   计算出正样本在特征层上的中心点
+            #-------------------------------------------------------#
+            batch_target[:, [0,2]] = targets[b][:, [0,2]] * in_w
+            batch_target[:, [1,3]] = targets[b][:, [1,3]] * in_h
+            batch_target[:, 4] = targets[b][:, 4]
+            batch_target = batch_target.cpu()
+            #-------------------------------------------------------#
+            #   将真实框转换一个形式
+            #   num_true_box, 4
+            #-------------------------------------------------------#
+            gt_box          = torch.FloatTensor(torch.cat((torch.zeros((batch_target.size(0), 2)), batch_target[:, 2:4]), 1))
+            #-------------------------------------------------------#
+            #   将先验框转换一个形式
+            #   9, 4
+            #-------------------------------------------------------#
+            anchor_shapes   = torch.FloatTensor(torch.cat((torch.zeros((len(anchors), 2)), torch.FloatTensor(anchors)), 1))
+            #-------------------------------------------------------#
+            #   计算交并比
+            #   self.calculate_iou(gt_box, anchor_shapes) = [num_true_box, 9]每一个真实框和9个先验框的重合情况
+            #   best_ns:
+            #   [每个真实框最大的重合度max_iou, 每一个真实框最重合的先验框的序号]
+            #-------------------------------------------------------#
+            best_ns = torch.argmax(self.calculate_iou(gt_box, anchor_shapes), dim=-1)
+            for t, best_n in enumerate(best_ns):
+                if best_n not in self.anchors_mask[l]:
+                    continue
+                #----------------------------------------#
+                #   判断这个先验框是当前特征点的哪一个先验框
+                #----------------------------------------#
+                k = self.anchors_mask[l].index(best_n)
+                #----------------------------------------#
+                #   获得真实框属于哪个网格点
+                #----------------------------------------#
+                i = torch.floor(batch_target[t, 0]).long()
+                j = torch.floor(batch_target[t, 1]).long()
+                #----------------------------------------#
+                #   取出真实框的种类
+                #----------------------------------------#
+                c = batch_target[t, 4].long()
+                #----------------------------------------#
+                #   noobj_mask代表无目标的特征点
+                #----------------------------------------#
+                noobj_mask[b, k, j, i] = 0
+                #----------------------------------------#
+                #   tx、ty代表中心调整参数的真实值
+                #----------------------------------------#
+                y_true[b, k, j, i, 0] = batch_target[t, 0]
+                y_true[b, k, j, i, 1] = batch_target[t, 1]
+                y_true[b, k, j, i, 2] = batch_target[t, 2]
+                y_true[b, k, j, i, 3] = batch_target[t, 3]
+                y_true[b, k, j, i, 4] = 1
+                y_true[b, k, j, i, c + 5] = 1
+                #----------------------------------------#
+                #   用于获得xywh的比例
+                #   大目标loss权重小，小目标loss权重大
+                #----------------------------------------#
+                box_loss_scale[b, k, j, i] = batch_target[t, 2] * batch_target[t, 3] / in_w / in_h
+        return y_true, noobj_mask, box_loss_scale
+    def get_ignore(self, l, x, y, h, w, targets, scaled_anchors, in_h, in_w, noobj_mask):
+        #-----------------------------------------------------#
+        #   计算一共有多少张图片
+        #-----------------------------------------------------#
+        bs = len(targets)
+        #-----------------------------------------------------#
+        #   生成网格，先验框中心，网格左上角
+        #-----------------------------------------------------#
+        grid_x = torch.linspace(0, in_w - 1, in_w).repeat(in_h, 1).repeat(
+            int(bs * len(self.anchors_mask[l])), 1, 1).view(x.shape).type_as(x)
+        grid_y = torch.linspace(0, in_h - 1, in_h).repeat(in_w, 1).t().repeat(
+            int(bs * len(self.anchors_mask[l])), 1, 1).view(y.shape).type_as(x)
+        # 生成先验框的宽高
+        scaled_anchors_l = np.array(scaled_anchors)[self.anchors_mask[l]]
+        anchor_w = torch.Tensor(scaled_anchors_l).index_select(1, torch.LongTensor([0])).type_as(x)
+        anchor_h = torch.Tensor(scaled_anchors_l).index_select(1, torch.LongTensor([1])).type_as(x)
+        anchor_w = anchor_w.repeat(bs, 1).repeat(1, 1, in_h * in_w).view(w.shape)
+        anchor_h = anchor_h.repeat(bs, 1).repeat(1, 1, in_h * in_w).view(h.shape)
+        #-------------------------------------------------------#
+        #   计算调整后的先验框中心与宽高
+        #-------------------------------------------------------#
+        pred_boxes_x    = torch.unsqueeze(x + grid_x, -1)
+        pred_boxes_y    = torch.unsqueeze(y + grid_y, -1)
+        pred_boxes_w    = torch.unsqueeze(torch.exp(w) * anchor_w, -1)
+        pred_boxes_h    = torch.unsqueeze(torch.exp(h) * anchor_h, -1)
+        pred_boxes      = torch.cat([pred_boxes_x, pred_boxes_y, pred_boxes_w, pred_boxes_h], dim = -1)
+        for b in range(bs):
+            #-------------------------------------------------------#
+            #   将预测结果转换一个形式
+            #   pred_boxes_for_ignore      num_anchors, 4
+            #-------------------------------------------------------#
+            pred_boxes_for_ignore = pred_boxes[b].view(-1, 4)
+            #-------------------------------------------------------#
+            #   计算真实框，并把真实框转换成相对于特征层的大小
+            #   gt_box      num_true_box, 4
+            #-------------------------------------------------------#
+            if len(targets[b]) > 0:
+                batch_target = torch.zeros_like(targets[b])
+                #-------------------------------------------------------#
+                #   计算出正样本在特征层上的中心点
+                #-------------------------------------------------------#
+                batch_target[:, [0,2]] = targets[b][:, [0,2]] * in_w
+                batch_target[:, [1,3]] = targets[b][:, [1,3]] * in_h
+                batch_target = batch_target[:, :4].type_as(x)
+                #-------------------------------------------------------#
+                #   计算交并比
+                #   anch_ious       num_true_box, num_anchors
+                #-------------------------------------------------------#
+                anch_ious = self.calculate_iou(batch_target, pred_boxes_for_ignore)
+                #-------------------------------------------------------#
+                #   每个先验框对应真实框的最大重合度
+                #   anch_ious_max   num_anchors
+                #-------------------------------------------------------#
+                anch_ious_max, _    = torch.max(anch_ious, dim = 0)
+                anch_ious_max       = anch_ious_max.view(pred_boxes[b].size()[:3])
+                noobj_mask[b][anch_ious_max > self.ignore_threshold] = 0
+        return noobj_mask, pred_boxes
+def weights_init(net, init_type='normal', init_gain = 0.02):
+    def init_func(m):
+        classname = m.__class__.__name__
+        if hasattr(m, 'weight') and classname.find('Conv') != -1:
+            if init_type == 'normal':
+                torch.nn.init.normal_(m.weight.data, 0.0, init_gain)
+            elif init_type == 'xavier':
+                torch.nn.init.xavier_normal_(m.weight.data, gain=init_gain)
+            elif init_type == 'kaiming':
+                torch.nn.init.kaiming_normal_(m.weight.data, a=0, mode='fan_in')
+            elif init_type == 'orthogonal':
+                torch.nn.init.orthogonal_(m.weight.data, gain=init_gain)
+            else:
+                raise NotImplementedError('initialization method [%s] is not implemented' % init_type)
+        elif classname.find('BatchNorm2d') != -1:
+            torch.nn.init.normal_(m.weight.data, 1.0, 0.02)
+            torch.nn.init.constant_(m.bias.data, 0.0)
+    print('initialize network with %s type' % init_type)
+    net.apply(init_func)
+def get_lr_scheduler(lr_decay_type, lr, min_lr, total_iters, warmup_iters_ratio = 0.05, warmup_lr_ratio = 0.1, no_aug_iter_ratio = 0.05, step_num = 10):
+    def yolox_warm_cos_lr(lr, min_lr, total_iters, warmup_total_iters, warmup_lr_start, no_aug_iter, iters):
+        if iters <= warmup_total_iters:
+            # lr = (lr - warmup_lr_start) * iters / float(warmup_total_iters) + warmup_lr_start
+            lr = (lr - warmup_lr_start) * pow(iters / float(warmup_total_iters), 2) + warmup_lr_start
+        elif iters >= total_iters - no_aug_iter:
+            lr = min_lr
+        else:
+            lr = min_lr + 0.5 * (lr - min_lr) * (
+                1.0 + math.cos(math.pi* (iters - warmup_total_iters) / (total_iters - warmup_total_iters - no_aug_iter))
+            )
+        return lr
+    def step_lr(lr, decay_rate, step_size, iters):
+        if step_size < 1:
+            raise ValueError("step_size must above 1.")
+        n       = iters // step_size
+        out_lr  = lr * decay_rate ** n
+        return out_lr
+    if lr_decay_type == "cos":
+        warmup_total_iters  = min(max(warmup_iters_ratio * total_iters, 1), 3)
+        warmup_lr_start     = max(warmup_lr_ratio * lr, 1e-6)
+        no_aug_iter         = min(max(no_aug_iter_ratio * total_iters, 1), 15)
+        func = partial(yolox_warm_cos_lr ,lr, min_lr, total_iters, warmup_total_iters, warmup_lr_start, no_aug_iter)
+    else:
+        decay_rate  = (min_lr / lr) ** (1 / (step_num - 1))
+        step_size   = total_iters / step_num
+        func = partial(step_lr, lr, decay_rate, step_size)
+    return func
+def set_optimizer_lr(optimizer, lr_scheduler_func, epoch):
+    lr = lr_scheduler_func(epoch)
+    for param_group in optimizer.param_groups:
+        param_group['lr'] = lr

nets/yolotiny_training.py ADDED Viewed

	@@ -0,0 +1,474 @@

+import math
+from functools import partial
+import numpy as np
+import torch
+import torch.nn as nn
+class YOLOLosstiny(nn.Module):
+    def __init__(self, anchors, num_classes, input_shape, cuda, anchors_mask = [[6,7,8], [3,4,5], [0,1,2]], label_smoothing = 0):
+        super(YOLOLosstiny, self).__init__()
+        #-----------------------------------------------------------#
+        #   13x13的特征层对应的anchor是[81,82],[135,169],[344,319]
+        #   26x26的特征层对应的anchor是[10,14],[23,27],[37,58]
+        #-----------------------------------------------------------#
+        self.anchors        = anchors
+        self.num_classes    = num_classes
+        self.bbox_attrs     = 5 + num_classes
+        self.input_shape    = input_shape
+        self.anchors_mask   = anchors_mask
+        self.label_smoothing = label_smoothing
+        self.balance        = [0.4, 1.0, 4]
+        self.box_ratio      = 0.05
+        self.obj_ratio      = 5 * (input_shape[0] * input_shape[1]) / (416 ** 2)
+        self.cls_ratio      = 1 * (num_classes / 80)
+        self.ignore_threshold = 0.5
+        self.cuda           = cuda
+    def clip_by_tensor(self, t, t_min, t_max):
+        t = t.float()
+        result = (t >= t_min).float() * t + (t < t_min).float() * t_min
+        result = (result <= t_max).float() * result + (result > t_max).float() * t_max
+        return result
+    def MSELoss(self, pred, target):
+        return torch.pow(pred - target, 2)
+    def BCELoss(self, pred, target):
+        epsilon = 1e-7
+        pred    = self.clip_by_tensor(pred, epsilon, 1.0 - epsilon)
+        output  = - target * torch.log(pred) - (1.0 - target) * torch.log(1.0 - pred)
+        return output
+    def box_ciou(self, b1, b2):
+        """
+        输入为：
+        ----------
+        b1: tensor, shape=(batch, feat_w, feat_h, anchor_num, 4), xywh
+        b2: tensor, shape=(batch, feat_w, feat_h, anchor_num, 4), xywh
+        返回为：
+        -------
+        ciou: tensor, shape=(batch, feat_w, feat_h, anchor_num, 1)
+        """
+        #----------------------------------------------------#
+        #   求出预测框左上角右下角
+        #----------------------------------------------------#
+        b1_xy       = b1[..., :2]
+        b1_wh       = b1[..., 2:4]
+        b1_wh_half  = b1_wh/2.
+        b1_mins     = b1_xy - b1_wh_half
+        b1_maxes    = b1_xy + b1_wh_half
+        #----------------------------------------------------#
+        #   求出真实框左上角右下角
+        #----------------------------------------------------#
+        b2_xy       = b2[..., :2]
+        b2_wh       = b2[..., 2:4]
+        b2_wh_half  = b2_wh/2.
+        b2_mins     = b2_xy - b2_wh_half
+        b2_maxes    = b2_xy + b2_wh_half
+        #----------------------------------------------------#
+        #   求真实框和预测框所有的iou
+        #----------------------------------------------------#
+        intersect_mins  = torch.max(b1_mins, b2_mins)
+        intersect_maxes = torch.min(b1_maxes, b2_maxes)
+        intersect_wh    = torch.max(intersect_maxes - intersect_mins, torch.zeros_like(intersect_maxes))
+        intersect_area  = intersect_wh[..., 0] * intersect_wh[..., 1]
+        b1_area         = b1_wh[..., 0] * b1_wh[..., 1]
+        b2_area         = b2_wh[..., 0] * b2_wh[..., 1]
+        union_area      = b1_area + b2_area - intersect_area
+        iou             = intersect_area / torch.clamp(union_area,min = 1e-6)
+        #----------------------------------------------------#
+        #   计算中心的差距
+        #----------------------------------------------------#
+        center_distance = torch.sum(torch.pow((b1_xy - b2_xy), 2), axis=-1)
+        #----------------------------------------------------#
+        #   找到包裹两个框的最小框的左上角和右下角
+        #----------------------------------------------------#
+        enclose_mins    = torch.min(b1_mins, b2_mins)
+        enclose_maxes   = torch.max(b1_maxes, b2_maxes)
+        enclose_wh      = torch.max(enclose_maxes - enclose_mins, torch.zeros_like(intersect_maxes))
+        #----------------------------------------------------#
+        #   计算对角线距离
+        #----------------------------------------------------#
+        enclose_diagonal = torch.sum(torch.pow(enclose_wh,2), axis=-1)
+        ciou            = iou - 1.0 * (center_distance) / torch.clamp(enclose_diagonal,min = 1e-6)
+        v       = (4 / (math.pi ** 2)) * torch.pow((torch.atan(b1_wh[..., 0] / torch.clamp(b1_wh[..., 1],min = 1e-6)) - torch.atan(b2_wh[..., 0] / torch.clamp(b2_wh[..., 1], min = 1e-6))), 2)
+        alpha   = v / torch.clamp((1.0 - iou + v), min=1e-6)
+        ciou    = ciou - alpha * v
+        return ciou
+    #---------------------------------------------------#
+    #   平滑标签
+    #---------------------------------------------------#
+    def smooth_labels(self, y_true, label_smoothing, num_classes):
+        return y_true * (1.0 - label_smoothing) + label_smoothing / num_classes
+    def forward(self, l, input, targets=None):
+        #----------------------------------------------------#
+        #   l 代表使用的是第几个有效特征层
+        #   input的shape为  bs, 3*(5+num_classes), 13, 13
+        #                   bs, 3*(5+num_classes), 26, 26
+        #   targets 真实框的标签情况 [batch_size, num_gt, 5]
+        #----------------------------------------------------#
+        #--------------------------------#
+        #   获得图片数量，特征层的高和宽
+        #--------------------------------#
+        bs      = input.size(0)
+        in_h    = input.size(2)
+        in_w    = input.size(3)
+        #-----------------------------------------------------------------------#
+        #   计算步长
+        #   每一个特征点对应原来的图片上多少个像素点
+        #
+        #   如果特征层为13x13的话，一个特征点就对应原来的图片上的32个像素点
+        #   如果特征层为26x26的话，一个特征点就对应原来的图片上的16个像素点
+        #   stride_h = stride_w = 32、16
+        #-----------------------------------------------------------------------#
+        stride_h = self.input_shape[0] / in_h
+        stride_w = self.input_shape[1] / in_w
+        #-------------------------------------------------#
+        #   此时获得的scaled_anchors大小是相对于特征层的
+        #-------------------------------------------------#
+        scaled_anchors  = [(a_w / stride_w, a_h / stride_h) for a_w, a_h in self.anchors]
+        #-----------------------------------------------#
+        #   输入的input一共有三个，他们的shape分别是
+        #   bs, 3 * (5+num_classes), 13, 13 => bs, 3, 5 + num_classes, 13, 13 => batch_size, 3, 13, 13, 5 + num_classes
+        #   batch_size, 3, 13, 13, 5 + num_classes
+        #   batch_size, 3, 26, 26, 5 + num_classes
+        #-----------------------------------------------#
+        prediction = input.view(bs, len(self.anchors_mask[l]), self.bbox_attrs, in_h, in_w).permute(0, 1, 3, 4, 2).contiguous()
+        #-----------------------------------------------#
+        #   先验框的中心位置的调整参数
+        #-----------------------------------------------#
+        x = torch.sigmoid(prediction[..., 0])
+        y = torch.sigmoid(prediction[..., 1])
+        #-----------------------------------------------#
+        #   先验框的宽高调整参数
+        #-----------------------------------------------#
+        w = prediction[..., 2]
+        h = prediction[..., 3]
+        #-----------------------------------------------#
+        #   获得置信度，是否有物体
+        #-----------------------------------------------#
+        conf = torch.sigmoid(prediction[..., 4])
+        #-----------------------------------------------#
+        #   种类置信度
+        #-----------------------------------------------#
+        pred_cls = torch.sigmoid(prediction[..., 5:])
+        #-----------------------------------------------#
+        #   获得网络应该有的预测结果
+        #-----------------------------------------------#
+        y_true, noobj_mask, box_loss_scale = self.get_target(l, targets, scaled_anchors, in_h, in_w)
+        #---------------------------------------------------------------#
+        #   将预测结果进行解码，判断预测结果和真实值的重合程度
+        #   如果重合程度过大则忽略，因为这些特征点属于预测比较准确的特征点
+        #   作为负样本不合适
+        #----------------------------------------------------------------#
+        noobj_mask, pred_boxes = self.get_ignore(l, x, y, h, w, targets, scaled_anchors, in_h, in_w, noobj_mask)
+        if self.cuda:
+            y_true          = y_true.type_as(x)
+            noobj_mask      = noobj_mask.type_as(x)
+            box_loss_scale  = box_loss_scale.type_as(x)
+        #--------------------------------------------------------------------------#
+        #   box_loss_scale是真实框宽高的乘积，宽高均在0-1之间，因此乘积也在0-1之间。
+        #   2-宽高的乘积代表真实框越大，比重越小，小框的比重更大。
+        #   使用iou损失时，大中小目标的回归损失不存在比例失衡问题，故弃用
+        #--------------------------------------------------------------------------#
+        box_loss_scale = 2 - box_loss_scale
+        loss        = 0
+        obj_mask    = y_true[..., 4] == 1
+        n           = torch.sum(obj_mask)
+        if n != 0:
+            #---------------------------------------------------------------#
+            #   计算预测结果和真实结果的差距
+            #   loss_loc ciou回归损失
+            #   loss_cls 分类损失
+            #---------------------------------------------------------------#
+            ciou        = self.box_ciou(pred_boxes, y_true[..., :4]).type_as(x)
+            # loss_loc    = torch.mean((1 - ciou)[obj_mask] * box_loss_scale[obj_mask])
+            loss_loc    = torch.mean((1 - ciou)[obj_mask])
+            loss_cls    = torch.mean(self.BCELoss(pred_cls[obj_mask], y_true[..., 5:][obj_mask]))
+            loss        += loss_loc * self.box_ratio + loss_cls * self.cls_ratio
+        loss_conf   = torch.mean(self.BCELoss(conf, obj_mask.type_as(conf))[noobj_mask.bool() | obj_mask])
+        loss        += loss_conf * self.balance[l] * self.obj_ratio
+        # if n != 0:
+        #     print(loss_loc * self.box_ratio, loss_cls * self.cls_ratio, loss_conf * self.balance[l] * self.obj_ratio)
+        return loss
+    def calculate_iou(self, _box_a, _box_b):
+        #-----------------------------------------------------------#
+        #   计算真实框的左上角和右下角
+        #-----------------------------------------------------------#
+        b1_x1, b1_x2 = _box_a[:, 0] - _box_a[:, 2] / 2, _box_a[:, 0] + _box_a[:, 2] / 2
+        b1_y1, b1_y2 = _box_a[:, 1] - _box_a[:, 3] / 2, _box_a[:, 1] + _box_a[:, 3] / 2
+        #-----------------------------------------------------------#
+        #   计算先验框获得的预测框的左上角和右下角
+        #-----------------------------------------------------------#
+        b2_x1, b2_x2 = _box_b[:, 0] - _box_b[:, 2] / 2, _box_b[:, 0] + _box_b[:, 2] / 2
+        b2_y1, b2_y2 = _box_b[:, 1] - _box_b[:, 3] / 2, _box_b[:, 1] + _box_b[:, 3] / 2
+        #-----------------------------------------------------------#
+        #   将真实框和预测框都转化成左上角右下角的形式
+        #-----------------------------------------------------------#
+        box_a = torch.zeros_like(_box_a)
+        box_b = torch.zeros_like(_box_b)
+        box_a[:, 0], box_a[:, 1], box_a[:, 2], box_a[:, 3] = b1_x1, b1_y1, b1_x2, b1_y2
+        box_b[:, 0], box_b[:, 1], box_b[:, 2], box_b[:, 3] = b2_x1, b2_y1, b2_x2, b2_y2
+        #-----------------------------------------------------------#
+        #   A为真实框的数量，B为先验框的数量
+        #-----------------------------------------------------------#
+        A = box_a.size(0)
+        B = box_b.size(0)
+        #-----------------------------------------------------------#
+        #   计算交的面积
+        #-----------------------------------------------------------#
+        max_xy  = torch.min(box_a[:, 2:].unsqueeze(1).expand(A, B, 2), box_b[:, 2:].unsqueeze(0).expand(A, B, 2))
+        min_xy  = torch.max(box_a[:, :2].unsqueeze(1).expand(A, B, 2), box_b[:, :2].unsqueeze(0).expand(A, B, 2))
+        inter   = torch.clamp((max_xy - min_xy), min=0)
+        inter   = inter[:, :, 0] * inter[:, :, 1]
+        #-----------------------------------------------------------#
+        #   计算预测框和真实框各自的面积
+        #-----------------------------------------------------------#
+        area_a = ((box_a[:, 2]-box_a[:, 0]) * (box_a[:, 3]-box_a[:, 1])).unsqueeze(1).expand_as(inter)  # [A,B]
+        area_b = ((box_b[:, 2]-box_b[:, 0]) * (box_b[:, 3]-box_b[:, 1])).unsqueeze(0).expand_as(inter)  # [A,B]
+        #-----------------------------------------------------------#
+        #   求IOU
+        #-----------------------------------------------------------#
+        union = area_a + area_b - inter
+        return inter / union  # [A,B]
+    def get_target(self, l, targets, anchors, in_h, in_w):
+        #-----------------------------------------------------#
+        #   计算一共有多少张图片
+        #-----------------------------------------------------#
+        bs              = len(targets)
+        #-----------------------------------------------------#
+        #   用于选取哪些先验框不包含物体
+        #-----------------------------------------------------#
+        noobj_mask      = torch.ones(bs, len(self.anchors_mask[l]), in_h, in_w, requires_grad = False)
+        #-----------------------------------------------------#
+        #   让网络更加去关注小目标
+        #-----------------------------------------------------#
+        box_loss_scale  = torch.zeros(bs, len(self.anchors_mask[l]), in_h, in_w, requires_grad = False)
+        #-----------------------------------------------------#
+        #   batch_size, 3, 13, 13, 5 + num_classes
+        #-----------------------------------------------------#
+        y_true          = torch.zeros(bs, len(self.anchors_mask[l]), in_h, in_w, self.bbox_attrs, requires_grad = False)
+        for b in range(bs):
+            if len(targets[b])==0:
+                continue
+            batch_target = torch.zeros_like(targets[b])
+            #-------------------------------------------------------#
+            #   计算出正样本在特征层上的中心点
+            #-------------------------------------------------------#
+            batch_target[:, [0,2]] = targets[b][:, [0,2]] * in_w
+            batch_target[:, [1,3]] = targets[b][:, [1,3]] * in_h
+            batch_target[:, 4] = targets[b][:, 4]
+            batch_target = batch_target.cpu()
+            #-------------------------------------------------------#
+            #   将真实框转换一个形式
+            #   num_true_box, 4
+            #-------------------------------------------------------#
+            gt_box          = torch.FloatTensor(torch.cat((torch.zeros((batch_target.size(0), 2)), batch_target[:, 2:4]), 1))
+            #-------------------------------------------------------#
+            #   将先验框转换一个形式
+            #   9, 4
+            #-------------------------------------------------------#
+            anchor_shapes   = torch.FloatTensor(torch.cat((torch.zeros((len(anchors), 2)), torch.FloatTensor(anchors)), 1))
+            #-------------------------------------------------------#
+            #   计算交并比
+            #   self.calculate_iou(gt_box, anchor_shapes) = [num_true_box, 9]每一个真实框和9个先验框的重合情况
+            #   best_ns:
+            #   [每个真实框最大的重合度max_iou, 每一个真实框最重合的先验框的序号]
+            #-------------------------------------------------------#
+            iou     = self.calculate_iou(gt_box, anchor_shapes)
+            best_ns = torch.argmax(iou, dim=-1)
+            sort_ns = torch.argsort(iou, dim=-1, descending=True)
+            def check_in_anchors_mask(index, anchors_mask):
+                for sub_anchors_mask in anchors_mask:
+                    if index in sub_anchors_mask:
+                        return True
+                return False
+            for t, best_n in enumerate(best_ns):
+                #----------------------------------------#
+                #   防止匹配到的先验框不在anchors_mask中
+                #----------------------------------------#
+                if not check_in_anchors_mask(best_n, self.anchors_mask):
+                    for index in sort_ns[t]:
+                        if check_in_anchors_mask(index, self.anchors_mask):
+                            best_n = index
+                            break
+                if best_n not in self.anchors_mask[l]:
+                    continue
+                #----------------------------------------#
+                #   判断这个先验框是当前特征点的哪一个先验框
+                #----------------------------------------#
+                k = self.anchors_mask[l].index(best_n)
+                #----------------------------------------#
+                #   获得真实框属于哪个网格点
+                #----------------------------------------#
+                i = torch.floor(batch_target[t, 0]).long()
+                j = torch.floor(batch_target[t, 1]).long()
+                #----------------------------------------#
+                #   取出真实框的种类
+                #----------------------------------------#
+                c = batch_target[t, 4].long()
+                #----------------------------------------#
+                #   noobj_mask代表无目标的特征点
+                #----------------------------------------#
+                noobj_mask[b, k, j, i] = 0
+                #----------------------------------------#
+                #   tx、ty代表中心调整参数的真实值
+                #----------------------------------------#
+                y_true[b, k, j, i, 0] = batch_target[t, 0]
+                y_true[b, k, j, i, 1] = batch_target[t, 1]
+                y_true[b, k, j, i, 2] = batch_target[t, 2]
+                y_true[b, k, j, i, 3] = batch_target[t, 3]
+                y_true[b, k, j, i, 4] = 1
+                y_true[b, k, j, i, c + 5] = 1
+                #----------------------------------------#
+                #   用于获得xywh的比例
+                #   大目标loss权重小，小目标loss权重大
+                #----------------------------------------#
+                box_loss_scale[b, k, j, i] = batch_target[t, 2] * batch_target[t, 3] / in_w / in_h
+        return y_true, noobj_mask, box_loss_scale
+    def get_ignore(self, l, x, y, h, w, targets, scaled_anchors, in_h, in_w, noobj_mask):
+        #-----------------------------------------------------#
+        #   计算一共有多少张图片
+        #-----------------------------------------------------#
+        bs = len(targets)
+        #-----------------------------------------------------#
+        #   生成网格，先验框中心，网格左上角
+        #-----------------------------------------------------#
+        grid_x = torch.linspace(0, in_w - 1, in_w).repeat(in_h, 1).repeat(
+            int(bs * len(self.anchors_mask[l])), 1, 1).view(x.shape).type_as(x)
+        grid_y = torch.linspace(0, in_h - 1, in_h).repeat(in_w, 1).t().repeat(
+            int(bs * len(self.anchors_mask[l])), 1, 1).view(y.shape).type_as(x)
+        # 生成先验框的宽高
+        scaled_anchors_l = np.array(scaled_anchors)[self.anchors_mask[l]]
+        anchor_w = torch.Tensor(scaled_anchors_l).index_select(1, torch.LongTensor([0])).type_as(x)
+        anchor_h = torch.Tensor(scaled_anchors_l).index_select(1, torch.LongTensor([1])).type_as(x)
+        anchor_w = anchor_w.repeat(bs, 1).repeat(1, 1, in_h * in_w).view(w.shape)
+        anchor_h = anchor_h.repeat(bs, 1).repeat(1, 1, in_h * in_w).view(h.shape)
+        #-------------------------------------------------------#
+        #   计算调整后的先验框中心与宽高
+        #-------------------------------------------------------#
+        pred_boxes_x    = torch.unsqueeze(x + grid_x, -1)
+        pred_boxes_y    = torch.unsqueeze(y + grid_y, -1)
+        pred_boxes_w    = torch.unsqueeze(torch.exp(w) * anchor_w, -1)
+        pred_boxes_h    = torch.unsqueeze(torch.exp(h) * anchor_h, -1)
+        pred_boxes      = torch.cat([pred_boxes_x, pred_boxes_y, pred_boxes_w, pred_boxes_h], dim = -1)
+        for b in range(bs):
+            #-------------------------------------------------------#
+            #   将预测结果转换一个形式
+            #   pred_boxes_for_ignore      num_anchors, 4
+            #-------------------------------------------------------#
+            pred_boxes_for_ignore = pred_boxes[b].view(-1, 4)
+            #-------------------------------------------------------#
+            #   计算真实框，并把真实框转换成相对于特征层的大小
+            #   gt_box      num_true_box, 4
+            #-------------------------------------------------------#
+            if len(targets[b]) > 0:
+                batch_target = torch.zeros_like(targets[b])
+                #-------------------------------------------------------#
+                #   计算出正样本在特征层上的中心点
+                #-------------------------------------------------------#
+                batch_target[:, [0,2]] = targets[b][:, [0,2]] * in_w
+                batch_target[:, [1,3]] = targets[b][:, [1,3]] * in_h
+                batch_target = batch_target[:, :4].type_as(x)
+                #-------------------------------------------------------#
+                #   计算交并比
+                #   anch_ious       num_true_box, num_anchors
+                #-------------------------------------------------------#
+                anch_ious = self.calculate_iou(batch_target, pred_boxes_for_ignore)
+                #-------------------------------------------------------#
+                #   每个先验框对应真实框的最大重合度
+                #   anch_ious_max   num_anchors
+                #-------------------------------------------------------#
+                anch_ious_max, _    = torch.max(anch_ious, dim = 0)
+                anch_ious_max       = anch_ious_max.view(pred_boxes[b].size()[:3])
+                noobj_mask[b][anch_ious_max > self.ignore_threshold] = 0
+        return noobj_mask, pred_boxes
+def weights_init(net, init_type='normal', init_gain = 0.02):
+    def init_func(m):
+        classname = m.__class__.__name__
+        if hasattr(m, 'weight') and classname.find('Conv') != -1:
+            if init_type == 'normal':
+                torch.nn.init.normal_(m.weight.data, 0.0, init_gain)
+            elif init_type == 'xavier':
+                torch.nn.init.xavier_normal_(m.weight.data, gain=init_gain)
+            elif init_type == 'kaiming':
+                torch.nn.init.kaiming_normal_(m.weight.data, a=0, mode='fan_in')
+            elif init_type == 'orthogonal':
+                torch.nn.init.orthogonal_(m.weight.data, gain=init_gain)
+            else:
+                raise NotImplementedError('initialization method [%s] is not implemented' % init_type)
+        elif classname.find('BatchNorm2d') != -1:
+            torch.nn.init.normal_(m.weight.data, 1.0, 0.02)
+            torch.nn.init.constant_(m.bias.data, 0.0)
+    print('initialize network with %s type' % init_type)
+    net.apply(init_func)
+def get_lr_scheduler(lr_decay_type, lr, min_lr, total_iters, warmup_iters_ratio = 0.05, warmup_lr_ratio = 0.1, no_aug_iter_ratio = 0.05, step_num = 10):
+    def yolox_warm_cos_lr(lr, min_lr, total_iters, warmup_total_iters, warmup_lr_start, no_aug_iter, iters):
+        if iters <= warmup_total_iters:
+            # lr = (lr - warmup_lr_start) * iters / float(warmup_total_iters) + warmup_lr_start
+            lr = (lr - warmup_lr_start) * pow(iters / float(warmup_total_iters), 2) + warmup_lr_start
+        elif iters >= total_iters - no_aug_iter:
+            lr = min_lr
+        else:
+            lr = min_lr + 0.5 * (lr - min_lr) * (
+                1.0 + math.cos(math.pi* (iters - warmup_total_iters) / (total_iters - warmup_total_iters - no_aug_iter))
+            )
+        return lr
+    def step_lr(lr, decay_rate, step_size, iters):
+        if step_size < 1:
+            raise ValueError("step_size must above 1.")
+        n       = iters // step_size
+        out_lr  = lr * decay_rate ** n
+        return out_lr
+    if lr_decay_type == "cos":
+        warmup_total_iters  = min(max(warmup_iters_ratio * total_iters, 1), 3)
+        warmup_lr_start     = max(warmup_lr_ratio * lr, 1e-6)
+        no_aug_iter         = min(max(no_aug_iter_ratio * total_iters, 1), 15)
+        func = partial(yolox_warm_cos_lr ,lr, min_lr, total_iters, warmup_total_iters, warmup_lr_start, no_aug_iter)
+    else:
+        decay_rate  = (min_lr / lr) ** (1 / (step_num - 1))
+        step_size   = total_iters / step_num
+        func = partial(step_lr, lr, decay_rate, step_size)
+    return func
+def set_optimizer_lr(optimizer, lr_scheduler_func, epoch):
+    lr = lr_scheduler_func(epoch)
+    for param_group in optimizer.param_groups:
+        param_group['lr'] = lr

utils/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ #

utils/callbacks.py ADDED Viewed

	@@ -0,0 +1,71 @@

+import datetime
+import os
+import torch
+import matplotlib
+matplotlib.use('Agg')
+import scipy.signal
+from matplotlib import pyplot as plt
+from torch.utils.tensorboard import SummaryWriter
+class LossHistory():
+    def __init__(self, log_dir, model, input_shape):
+        time_str        = datetime.datetime.strftime(datetime.datetime.now(),'%Y_%m_%d_%H_%M_%S')
+        self.log_dir    = os.path.join(log_dir, "loss_" + str(time_str))
+        self.losses     = []
+        self.val_loss   = []
+        os.makedirs(self.log_dir)
+        self.writer     = SummaryWriter(self.log_dir)
+        try:
+            dummy_input     = torch.randn(2, 3, input_shape[0], input_shape[1])
+            self.writer.add_graph(model, dummy_input)
+        except:
+            pass
+    def append_loss(self, epoch, loss, val_loss):
+        if not os.path.exists(self.log_dir):
+            os.makedirs(self.log_dir)
+        self.losses.append(loss)
+        self.val_loss.append(val_loss)
+        with open(os.path.join(self.log_dir, "epoch_loss.txt"), 'a') as f:
+            f.write(str(loss))
+            f.write("\n")
+        with open(os.path.join(self.log_dir, "epoch_val_loss.txt"), 'a') as f:
+            f.write(str(val_loss))
+            f.write("\n")
+        self.writer.add_scalar('loss', loss, epoch)
+        self.writer.add_scalar('val_loss', val_loss, epoch)
+        self.loss_plot()
+    def loss_plot(self):
+        iters = range(len(self.losses))
+        plt.figure()
+        plt.plot(iters, self.losses, 'red', linewidth = 2, label='train loss')
+        plt.plot(iters, self.val_loss, 'coral', linewidth = 2, label='val loss')
+        try:
+            if len(self.losses) < 25:
+                num = 5
+            else:
+                num = 15
+            plt.plot(iters, scipy.signal.savgol_filter(self.losses, num, 3), 'green', linestyle = '--', linewidth = 2, label='smooth train loss')
+            plt.plot(iters, scipy.signal.savgol_filter(self.val_loss, num, 3), '#8B4513', linestyle = '--', linewidth = 2, label='smooth val loss')
+        except:
+            pass
+        plt.grid(True)
+        plt.xlabel('Epoch')
+        plt.ylabel('Loss')
+        plt.legend(loc="upper right")
+        plt.savefig(os.path.join(self.log_dir, "epoch_loss.png"))
+        plt.cla()
+        plt.close("all")

utils/dataloader.py ADDED Viewed

	@@ -0,0 +1,360 @@

+from random import sample, shuffle
+import cv2
+import numpy as np
+import torch
+from PIL import Image
+from torch.utils.data.dataset import Dataset
+from utils.utils import cvtColor, preprocess_input
+class YoloDataset(Dataset):
+    def __init__(self, annotation_lines, input_shape, num_classes, epoch_length, mosaic, train, mosaic_ratio = 0.7):
+        super(YoloDataset, self).__init__()
+        self.annotation_lines   = annotation_lines
+        self.input_shape        = input_shape
+        self.num_classes        = num_classes
+        self.epoch_length       = epoch_length
+        self.mosaic             = mosaic
+        self.train              = train
+        self.mosaic_ratio       = mosaic_ratio
+        self.epoch_now          = -1
+        self.length             = len(self.annotation_lines)
+    def __len__(self):
+        return self.length
+    def __getitem__(self, index):
+        index       = index % self.length
+        #---------------------------------------------------#
+        #   训练时进行数据的随机增强
+        #   验证时不进行数据的随机增强
+        #---------------------------------------------------#
+        if self.mosaic:
+            if self.rand() < 0.5 and self.epoch_now < self.epoch_length * self.mosaic_ratio:
+                lines = sample(self.annotation_lines, 3)
+                lines.append(self.annotation_lines[index])
+                shuffle(lines)
+                image, box  = self.get_random_data_with_Mosaic(lines, self.input_shape)
+            else:
+                image, box  = self.get_random_data(self.annotation_lines[index], self.input_shape, random = self.train)
+        else:
+            image, box      = self.get_random_data(self.annotation_lines[index], self.input_shape, random = self.train)
+        image       = np.transpose(preprocess_input(np.array(image, dtype=np.float32)), (2, 0, 1))
+        box         = np.array(box, dtype=np.float32)
+        if len(box) != 0:
+            box[:, [0, 2]] = box[:, [0, 2]] / self.input_shape[1]
+            box[:, [1, 3]] = box[:, [1, 3]] / self.input_shape[0]
+            box[:, 2:4] = box[:, 2:4] - box[:, 0:2]
+            box[:, 0:2] = box[:, 0:2] + box[:, 2:4] / 2
+        return image, box
+    def rand(self, a=0, b=1):
+        return np.random.rand()*(b-a) + a
+    def get_random_data(self, annotation_line, input_shape, jitter=.3, hue=.1, sat=0.7, val=0.4, random=True):
+        line    = annotation_line.split()
+        #------------------------------#
+        #   读取图像并转换成RGB图像
+        #------------------------------#
+        image   = Image.open(line[0])
+        image   = cvtColor(image)
+        #------------------------------#
+        #   获得图像的高宽与目标高宽
+        #------------------------------#
+        iw, ih  = image.size
+        h, w    = input_shape
+        #------------------------------#
+        #   获得预测框
+        #------------------------------#
+        box     = np.array([np.array(list(map(int,box.split(',')))) for box in line[1:]])
+        if not random:
+            scale = min(w/iw, h/ih)
+            nw = int(iw*scale)
+            nh = int(ih*scale)
+            dx = (w-nw)//2
+            dy = (h-nh)//2
+            #---------------------------------#
+            #   将图像多余的部分加上灰条
+            #---------------------------------#
+            image       = image.resize((nw,nh), Image.BICUBIC)
+            new_image   = Image.new('RGB', (w,h), (128,128,128))
+            new_image.paste(image, (dx, dy))
+            image_data  = np.array(new_image, np.float32)
+            #---------------------------------#
+            #   对真实框进行调整
+            #---------------------------------#
+            if len(box)>0:
+                np.random.shuffle(box)
+                box[:, [0,2]] = box[:, [0,2]]*nw/iw + dx
+                box[:, [1,3]] = box[:, [1,3]]*nh/ih + dy
+                box[:, 0:2][box[:, 0:2]<0] = 0
+                box[:, 2][box[:, 2]>w] = w
+                box[:, 3][box[:, 3]>h] = h
+                box_w = box[:, 2] - box[:, 0]
+                box_h = box[:, 3] - box[:, 1]
+                box = box[np.logical_and(box_w>1, box_h>1)] # discard invalid box
+            return image_data, box
+        #------------------------------------------#
+        #   对图像进行缩放并且进行长和宽的扭曲
+        #------------------------------------------#
+        new_ar = iw/ih * self.rand(1-jitter,1+jitter) / self.rand(1-jitter,1+jitter)
+        scale = self.rand(.25, 2)
+        if new_ar < 1:
+            nh = int(scale*h)
+            nw = int(nh*new_ar)
+        else:
+            nw = int(scale*w)
+            nh = int(nw/new_ar)
+        image = image.resize((nw,nh), Image.BICUBIC)
+        #------------------------------------------#
+        #   将图像多余的部分加上灰条
+        #------------------------------------------#
+        dx = int(self.rand(0, w-nw))
+        dy = int(self.rand(0, h-nh))
+        new_image = Image.new('RGB', (w,h), (128,128,128))
+        new_image.paste(image, (dx, dy))
+        image = new_image
+        #------------------------------------------#
+        #   翻转图像
+        #------------------------------------------#
+        flip = self.rand()<.5
+        if flip: image = image.transpose(Image.FLIP_LEFT_RIGHT)
+        image_data      = np.array(image, np.uint8)
+        #---------------------------------#
+        #   对图像进行色域变换
+        #   计算色域变换的参数
+        #---------------------------------#
+        r               = np.random.uniform(-1, 1, 3) * [hue, sat, val] + 1
+        #---------------------------------#
+        #   将图像转到HSV上
+        #---------------------------------#
+        hue, sat, val   = cv2.split(cv2.cvtColor(image_data, cv2.COLOR_RGB2HSV))
+        dtype           = image_data.dtype
+        #---------------------------------#
+        #   应用变换
+        #---------------------------------#
+        x       = np.arange(0, 256, dtype=r.dtype)
+        lut_hue = ((x * r[0]) % 180).astype(dtype)
+        lut_sat = np.clip(x * r[1], 0, 255).astype(dtype)
+        lut_val = np.clip(x * r[2], 0, 255).astype(dtype)
+        image_data = cv2.merge((cv2.LUT(hue, lut_hue), cv2.LUT(sat, lut_sat), cv2.LUT(val, lut_val)))
+        image_data = cv2.cvtColor(image_data, cv2.COLOR_HSV2RGB)
+        #---------------------------------#
+        #   对真实框进行调整
+        #---------------------------------#
+        if len(box)>0:
+            np.random.shuffle(box)
+            box[:, [0,2]] = box[:, [0,2]]*nw/iw + dx
+            box[:, [1,3]] = box[:, [1,3]]*nh/ih + dy
+            if flip: box[:, [0,2]] = w - box[:, [2,0]]
+            box[:, 0:2][box[:, 0:2]<0] = 0
+            box[:, 2][box[:, 2]>w] = w
+            box[:, 3][box[:, 3]>h] = h
+            box_w = box[:, 2] - box[:, 0]
+            box_h = box[:, 3] - box[:, 1]
+            box = box[np.logical_and(box_w>1, box_h>1)]
+        return image_data, box
+    def merge_bboxes(self, bboxes, cutx, cuty):
+        merge_bbox = []
+        for i in range(len(bboxes)):
+            for box in bboxes[i]:
+                tmp_box = []
+                x1, y1, x2, y2 = box[0], box[1], box[2], box[3]
+                if i == 0:
+                    if y1 > cuty or x1 > cutx:
+                        continue
+                    if y2 >= cuty and y1 <= cuty:
+                        y2 = cuty
+                    if x2 >= cutx and x1 <= cutx:
+                        x2 = cutx
+                if i == 1:
+                    if y2 < cuty or x1 > cutx:
+                        continue
+                    if y2 >= cuty and y1 <= cuty:
+                        y1 = cuty
+                    if x2 >= cutx and x1 <= cutx:
+                        x2 = cutx
+                if i == 2:
+                    if y2 < cuty or x2 < cutx:
+                        continue
+                    if y2 >= cuty and y1 <= cuty:
+                        y1 = cuty
+                    if x2 >= cutx and x1 <= cutx:
+                        x1 = cutx
+                if i == 3:
+                    if y1 > cuty or x2 < cutx:
+                        continue
+                    if y2 >= cuty and y1 <= cuty:
+                        y2 = cuty
+                    if x2 >= cutx and x1 <= cutx:
+                        x1 = cutx
+                tmp_box.append(x1)
+                tmp_box.append(y1)
+                tmp_box.append(x2)
+                tmp_box.append(y2)
+                tmp_box.append(box[-1])
+                merge_bbox.append(tmp_box)
+        return merge_bbox
+    def get_random_data_with_Mosaic(self, annotation_line, input_shape, jitter=0.3, hue=.1, sat=0.7, val=0.4):
+        h, w = input_shape
+        min_offset_x = self.rand(0.3, 0.7)
+        min_offset_y = self.rand(0.3, 0.7)
+        image_datas = []
+        box_datas   = []
+        index       = 0
+        for line in annotation_line:
+            #---------------------------------#
+            #   每一行进行分割
+            #---------------------------------#
+            line_content = line.split()
+            #---------------------------------#
+            #   打开图片
+            #---------------------------------#
+            image = Image.open(line_content[0])
+            image = cvtColor(image)
+            #---------------------------------#
+            #   图片的大小
+            #---------------------------------#
+            iw, ih = image.size
+            #---------------------------------#
+            #   保存框的位置
+            #---------------------------------#
+            box = np.array([np.array(list(map(int,box.split(',')))) for box in line_content[1:]])
+            #---------------------------------#
+            #   是否翻转图片
+            #---------------------------------#
+            flip = self.rand()<.5
+            if flip and len(box)>0:
+                image = image.transpose(Image.FLIP_LEFT_RIGHT)
+                box[:, [0,2]] = iw - box[:, [2,0]]
+            #------------------------------------------#
+            #   对图像进行缩放并且进行长和宽的扭曲
+            #------------------------------------------#
+            new_ar = iw/ih * self.rand(1-jitter,1+jitter) / self.rand(1-jitter,1+jitter)
+            scale = self.rand(.4, 1)
+            if new_ar < 1:
+                nh = int(scale*h)
+                nw = int(nh*new_ar)
+            else:
+                nw = int(scale*w)
+                nh = int(nw/new_ar)
+            image = image.resize((nw, nh), Image.BICUBIC)
+            #-----------------------------------------------#
+            #   将图片进行放置，分别对应四张分割图片的位置
+            #-----------------------------------------------#
+            if index == 0:
+                dx = int(w*min_offset_x) - nw
+                dy = int(h*min_offset_y) - nh
+            elif index == 1:
+                dx = int(w*min_offset_x) - nw
+                dy = int(h*min_offset_y)
+            elif index == 2:
+                dx = int(w*min_offset_x)
+                dy = int(h*min_offset_y)
+            elif index == 3:
+                dx = int(w*min_offset_x)
+                dy = int(h*min_offset_y) - nh
+            new_image = Image.new('RGB', (w,h), (128,128,128))
+            new_image.paste(image, (dx, dy))
+            image_data = np.array(new_image)
+            index = index + 1
+            box_data = []
+            #---------------------------------#
+            #   对box进行重新处理
+            #---------------------------------#
+            if len(box)>0:
+                np.random.shuffle(box)
+                box[:, [0,2]] = box[:, [0,2]]*nw/iw + dx
+                box[:, [1,3]] = box[:, [1,3]]*nh/ih + dy
+                box[:, 0:2][box[:, 0:2]<0] = 0
+                box[:, 2][box[:, 2]>w] = w
+                box[:, 3][box[:, 3]>h] = h
+                box_w = box[:, 2] - box[:, 0]
+                box_h = box[:, 3] - box[:, 1]
+                box = box[np.logical_and(box_w>1, box_h>1)]
+                box_data = np.zeros((len(box),5))
+                box_data[:len(box)] = box
+            image_datas.append(image_data)
+            box_datas.append(box_data)
+        #---------------------------------#
+        #   将图片分割，放在一起
+        #---------------------------------#
+        cutx = int(w * min_offset_x)
+        cuty = int(h * min_offset_y)
+        new_image = np.zeros([h, w, 3])
+        new_image[:cuty, :cutx, :] = image_datas[0][:cuty, :cutx, :]
+        new_image[cuty:, :cutx, :] = image_datas[1][cuty:, :cutx, :]
+        new_image[cuty:, cutx:, :] = image_datas[2][cuty:, cutx:, :]
+        new_image[:cuty, cutx:, :] = image_datas[3][:cuty, cutx:, :]
+        new_image       = np.array(new_image, np.uint8)
+        #---------------------------------#
+        #   对图像进行色域变换
+        #   计算色域变换的参数
+        #---------------------------------#
+        r               = np.random.uniform(-1, 1, 3) * [hue, sat, val] + 1
+        #---------------------------------#
+        #   将图像转到HSV上
+        #---------------------------------#
+        hue, sat, val   = cv2.split(cv2.cvtColor(new_image, cv2.COLOR_RGB2HSV))
+        dtype           = new_image.dtype
+        #---------------------------------#
+        #   应用变换
+        #---------------------------------#
+        x       = np.arange(0, 256, dtype=r.dtype)
+        lut_hue = ((x * r[0]) % 180).astype(dtype)
+        lut_sat = np.clip(x * r[1], 0, 255).astype(dtype)
+        lut_val = np.clip(x * r[2], 0, 255).astype(dtype)
+        new_image = cv2.merge((cv2.LUT(hue, lut_hue), cv2.LUT(sat, lut_sat), cv2.LUT(val, lut_val)))
+        new_image = cv2.cvtColor(new_image, cv2.COLOR_HSV2RGB)
+        #---------------------------------#
+        #   对框进行进一步的处理
+        #---------------------------------#
+        new_boxes = self.merge_bboxes(box_datas, cutx, cuty)
+        return new_image, new_boxes
+# DataLoader中collate_fn使用
+def yolo_dataset_collate(batch):
+    images = []
+    bboxes = []
+    for img, box in batch:
+        images.append(img)
+        bboxes.append(box)
+    images = torch.from_numpy(np.array(images)).type(torch.FloatTensor)
+    bboxes = [torch.from_numpy(ann).type(torch.FloatTensor) for ann in bboxes]
+    return images, bboxes

utils/utils.py ADDED Viewed

	@@ -0,0 +1,62 @@

+import numpy as np
+from PIL import Image
+#---------------------------------------------------------#
+#   将图像转换成RGB图像，防止灰度图在预测时报错。
+#   代码仅仅支持RGB图像的预测，所有其它类型的图像都会转化成RGB
+#---------------------------------------------------------#
+def cvtColor(image):
+    if len(np.shape(image)) == 3 and np.shape(image)[2] == 3:
+        return image
+    else:
+        image = image.convert('RGB')
+        return image
+#---------------------------------------------------#
+#   对输入图像进行resize
+#---------------------------------------------------#
+def resize_image(image, size, letterbox_image):
+    iw, ih  = image.size
+    w, h    = size
+    if letterbox_image:
+        scale   = min(w/iw, h/ih)
+        nw      = int(iw*scale)
+        nh      = int(ih*scale)
+        image   = image.resize((nw,nh), Image.BICUBIC)
+        new_image = Image.new('RGB', size, (128,128,128))
+        new_image.paste(image, ((w-nw)//2, (h-nh)//2))
+    else:
+        new_image = image.resize((w, h), Image.BICUBIC)
+    return new_image
+#---------------------------------------------------#
+#   获得类
+#---------------------------------------------------#
+def get_classes(classes_path):
+    with open(classes_path, encoding='utf-8') as f:
+        class_names = f.readlines()
+    class_names = [c.strip() for c in class_names]
+    return class_names, len(class_names)
+#---------------------------------------------------#
+#   获得先验框
+#---------------------------------------------------#
+def get_anchors(anchors_path):
+    '''loads the anchors from a file'''
+    with open(anchors_path, encoding='utf-8') as f:
+        anchors = f.readline()
+    anchors = [float(x) for x in anchors.split(',')]
+    anchors = np.array(anchors).reshape(-1, 2)
+    return anchors, len(anchors)
+#---------------------------------------------------#
+#   获得学习率
+#---------------------------------------------------#
+def get_lr(optimizer):
+    for param_group in optimizer.param_groups:
+        return param_group['lr']
+def preprocess_input(image):
+    image /= 255.0
+    return image

utils/utils_bbox.py ADDED Viewed

	@@ -0,0 +1,227 @@

+import torch
+import torch.nn as nn
+from torchvision.ops import nms
+import numpy as np
+class DecodeBox():
+    def __init__(self, anchors, num_classes, input_shape, anchors_mask = [[6,7,8], [3,4,5], [0,1,2]]):
+        super(DecodeBox, self).__init__()
+        self.anchors        = anchors
+        self.num_classes    = num_classes
+        self.bbox_attrs     = 5 + num_classes
+        self.input_shape    = input_shape
+        #-----------------------------------------------------------#
+        #   13x13的特征层对应的anchor是[142, 110],[192, 243],[459, 401]
+        #   26x26的特征层对应的anchor是[36, 75],[76, 55],[72, 146]
+        #   52x52的特征层对应的anchor是[12, 16],[19, 36],[40, 28]
+        #-----------------------------------------------------------#
+        self.anchors_mask   = anchors_mask
+    def decode_box(self, inputs):
+        outputs = []
+        for i, input in enumerate(inputs):
+            #-----------------------------------------------#
+            #   输入的input一共有三个，他们的shape分别是
+            #   batch_size, 255, 13, 13
+            #   batch_size, 255, 26, 26
+            #   batch_size, 255, 52, 52
+            #-----------------------------------------------#
+            batch_size      = input.size(0)
+            input_height    = input.size(2)
+            input_width     = input.size(3)
+            #-----------------------------------------------#
+            #   输入为416x416时
+            #   stride_h = stride_w = 32、16、8
+            #-----------------------------------------------#
+            stride_h = self.input_shape[0] / input_height
+            stride_w = self.input_shape[1] / input_width
+            #-------------------------------------------------#
+            #   此时获得的scaled_anchors大小是相对于特征层的
+            #-------------------------------------------------#
+            scaled_anchors = [(anchor_width / stride_w, anchor_height / stride_h) for anchor_width, anchor_height in self.anchors[self.anchors_mask[i]]]
+            #-----------------------------------------------#
+            #   输入的input一共有三个，他们的shape分别是
+            #   batch_size, 3, 13, 13, 85
+            #   batch_size, 3, 26, 26, 85
+            #   batch_size, 3, 52, 52, 85
+            #-----------------------------------------------#
+            prediction = input.view(batch_size, len(self.anchors_mask[i]),
+                                    self.bbox_attrs, input_height, input_width).permute(0, 1, 3, 4, 2).contiguous()
+            #-----------------------------------------------#
+            #   先验框的中心位置的调整参数
+            #-----------------------------------------------#
+            x = torch.sigmoid(prediction[..., 0])
+            y = torch.sigmoid(prediction[..., 1])
+            #-----------------------------------------------#
+            #   先验框的宽高调整参数
+            #-----------------------------------------------#
+            w = prediction[..., 2]
+            h = prediction[..., 3]
+            #-----------------------------------------------#
+            #   获得置信度，是否有物体
+            #-----------------------------------------------#
+            conf        = torch.sigmoid(prediction[..., 4])
+            #-----------------------------------------------#
+            #   种类置信度
+            #-----------------------------------------------#
+            pred_cls    = torch.sigmoid(prediction[..., 5:])
+            FloatTensor = torch.cuda.FloatTensor if x.is_cuda else torch.FloatTensor
+            LongTensor  = torch.cuda.LongTensor if x.is_cuda else torch.LongTensor
+            #----------------------------------------------------------#
+            #   生成网格，先验框中心，网格左上角
+            #   batch_size,3,13,13
+            #----------------------------------------------------------#
+            grid_x = torch.linspace(0, input_width - 1, input_width).repeat(input_height, 1).repeat(
+                batch_size * len(self.anchors_mask[i]), 1, 1).view(x.shape).type(FloatTensor)
+            grid_y = torch.linspace(0, input_height - 1, input_height).repeat(input_width, 1).t().repeat(
+                batch_size * len(self.anchors_mask[i]), 1, 1).view(y.shape).type(FloatTensor)
+            #----------------------------------------------------------#
+            #   按照网格格式生成先验框的宽高
+            #   batch_size,3,13,13
+            #----------------------------------------------------------#
+            anchor_w = FloatTensor(scaled_anchors).index_select(1, LongTensor([0]))
+            anchor_h = FloatTensor(scaled_anchors).index_select(1, LongTensor([1]))
+            anchor_w = anchor_w.repeat(batch_size, 1).repeat(1, 1, input_height * input_width).view(w.shape)
+            anchor_h = anchor_h.repeat(batch_size, 1).repeat(1, 1, input_height * input_width).view(h.shape)
+            #----------------------------------------------------------#
+            #   利用预测结果对先验框进行调整
+            #   首先调整先验框的中心，从先验框中心向右下角偏移
+            #   再调整先验框的宽高。
+            #----------------------------------------------------------#
+            pred_boxes          = FloatTensor(prediction[..., :4].shape)
+            pred_boxes[..., 0]  = x.data + grid_x
+            pred_boxes[..., 1]  = y.data + grid_y
+            pred_boxes[..., 2]  = torch.exp(w.data) * anchor_w
+            pred_boxes[..., 3]  = torch.exp(h.data) * anchor_h
+            #----------------------------------------------------------#
+            #   将输出结果归一化成小数的形式
+            #----------------------------------------------------------#
+            _scale = torch.Tensor([input_width, input_height, input_width, input_height]).type(FloatTensor)
+            output = torch.cat((pred_boxes.view(batch_size, -1, 4) / _scale,
+                                conf.view(batch_size, -1, 1), pred_cls.view(batch_size, -1, self.num_classes)), -1)
+            outputs.append(output.data)
+        return outputs
+    def yolo_correct_boxes(self, box_xy, box_wh, input_shape, image_shape, letterbox_image):
+        #-----------------------------------------------------------------#
+        #   把y轴放前面是因为方便预测框和图像的宽高进行相乘
+        #-----------------------------------------------------------------#
+        box_yx = box_xy[..., ::-1]
+        box_hw = box_wh[..., ::-1]
+        input_shape = np.array(input_shape)
+        image_shape = np.array(image_shape)
+        if letterbox_image:
+            #-----------------------------------------------------------------#
+            #   这里求出来的offset是图像有效区域相对于图像左上角的偏移情况
+            #   new_shape指的是宽高缩放情况
+            #-----------------------------------------------------------------#
+            new_shape = np.round(image_shape * np.min(input_shape/image_shape))
+            offset  = (input_shape - new_shape)/2./input_shape
+            scale   = input_shape/new_shape
+            box_yx  = (box_yx - offset) * scale
+            box_hw *= scale
+        box_mins    = box_yx - (box_hw / 2.)
+        box_maxes   = box_yx + (box_hw / 2.)
+        boxes  = np.concatenate([box_mins[..., 0:1], box_mins[..., 1:2], box_maxes[..., 0:1], box_maxes[..., 1:2]], axis=-1)
+        boxes *= np.concatenate([image_shape, image_shape], axis=-1)
+        return boxes
+    def non_max_suppression(self, prediction, num_classes, input_shape, image_shape, letterbox_image, conf_thres=0.5, nms_thres=0.4):
+        #----------------------------------------------------------#
+        #   将预测结果的格式转换成左上角右下角的格式。
+        #   prediction  [batch_size, num_anchors, 85]
+        #----------------------------------------------------------#
+        box_corner          = prediction.new(prediction.shape)
+        box_corner[:, :, 0] = prediction[:, :, 0] - prediction[:, :, 2] / 2
+        box_corner[:, :, 1] = prediction[:, :, 1] - prediction[:, :, 3] / 2
+        box_corner[:, :, 2] = prediction[:, :, 0] + prediction[:, :, 2] / 2
+        box_corner[:, :, 3] = prediction[:, :, 1] + prediction[:, :, 3] / 2
+        prediction[:, :, :4] = box_corner[:, :, :4]
+        output = [None for _ in range(len(prediction))]
+        for i, image_pred in enumerate(prediction):
+            #----------------------------------------------------------#
+            #   对种类预测部分取max。
+            #   class_conf  [num_anchors, 1]    种类置信度
+            #   class_pred  [num_anchors, 1]    种类
+            #----------------------------------------------------------#
+            class_conf, class_pred = torch.max(image_pred[:, 5:5 + num_classes], 1, keepdim=True)
+            #----------------------------------------------------------#
+            #   利用置信度进行第一轮筛选
+            #----------------------------------------------------------#
+            conf_mask = (image_pred[:, 4] * class_conf[:, 0] >= conf_thres).squeeze()
+            #----------------------------------------------------------#
+            #   根据置信度进行预测结果的筛选
+            #----------------------------------------------------------#
+            image_pred = image_pred[conf_mask]
+            class_conf = class_conf[conf_mask]
+            class_pred = class_pred[conf_mask]
+            if not image_pred.size(0):
+                continue
+            #-------------------------------------------------------------------------#
+            #   detections  [num_anchors, 7]
+            #   7的内容为：x1, y1, x2, y2, obj_conf, class_conf, class_pred
+            #-------------------------------------------------------------------------#
+            detections = torch.cat((image_pred[:, :5], class_conf.float(), class_pred.float()), 1)
+            #------------------------------------------#
+            #   获得预测结果中包含的所有种类
+            #------------------------------------------#
+            unique_labels = detections[:, -1].cpu().unique()
+            if prediction.is_cuda:
+                unique_labels = unique_labels.cuda()
+                detections = detections.cuda()
+            for c in unique_labels:
+                #------------------------------------------#
+                #   获得某一类得分筛选后全部的预测结果
+                #------------------------------------------#
+                detections_class = detections[detections[:, -1] == c]
+                #------------------------------------------#
+                #   使用官方自带的非极大抑制会速度更快一些！
+                #------------------------------------------#
+                keep = nms(
+                    detections_class[:, :4],
+                    detections_class[:, 4] * detections_class[:, 5],
+                    nms_thres
+                )
+                max_detections = detections_class[keep]
+                # # 按照存在物体的置信度排序
+                # _, conf_sort_index = torch.sort(detections_class[:, 4]*detections_class[:, 5], descending=True)
+                # detections_class = detections_class[conf_sort_index]
+                # # 进行非极大抑制
+                # max_detections = []
+                # while detections_class.size(0):
+                #     # 取出这一类置信度最高的，一步一步往下判断，判断重合程度是否大于nms_thres，如果是则去除掉
+                #     max_detections.append(detections_class[0].unsqueeze(0))
+                #     if len(detections_class) == 1:
+                #         break
+                #     ious = bbox_iou(max_detections[-1], detections_class[1:])
+                #     detections_class = detections_class[1:][ious < nms_thres]
+                # # 堆叠
+                # max_detections = torch.cat(max_detections).data
+                # Add max detections to outputs
+                output[i] = max_detections if output[i] is None else torch.cat((output[i], max_detections))
+            if output[i] is not None:
+                output[i]           = output[i].cpu().numpy()
+                box_xy, box_wh      = (output[i][:, 0:2] + output[i][:, 2:4])/2, output[i][:, 2:4] - output[i][:, 0:2]
+                output[i][:, :4]    = self.yolo_correct_boxes(box_xy, box_wh, input_shape, image_shape, letterbox_image)
+        return output

utils/utils_fit.py ADDED Viewed

	@@ -0,0 +1,128 @@

+import os
+import torch
+from tqdm import tqdm
+from utils.utils import get_lr
+def fit_one_epoch(model_train, model, yolo_loss, loss_history, optimizer, epoch, epoch_step, epoch_step_val, gen, gen_val, Epoch, cuda, fp16, scaler, save_period, save_dir, local_rank=0):
+    loss        = 0
+    val_loss    = 0
+    if local_rank == 0:
+        print('Start Train')
+        pbar = tqdm(total=epoch_step,desc=f'Epoch {epoch + 1}/{Epoch}',postfix=dict,mininterval=0.3)
+    model_train.train()
+    for iteration, batch in enumerate(gen):
+        if iteration >= epoch_step:
+            break
+        images, targets = batch[0], batch[1]
+        with torch.no_grad():
+            if cuda:
+                images  = images.cuda()
+                targets = [ann.cuda() for ann in targets]
+        #----------------------#
+        #   清零梯度
+        #----------------------#
+        optimizer.zero_grad()
+        if not fp16:
+            #----------------------#
+            #   前向传播
+            #----------------------#
+            outputs         = model_train(images)
+            loss_value_all  = 0
+            #----------------------#
+            #   计算损失
+            #----------------------#
+            for l in range(len(outputs)):
+                loss_item = yolo_loss(l, outputs[l], targets)
+                loss_value_all  += loss_item
+            loss_value = loss_value_all
+            #----------------------#
+            #   反向传播
+            #----------------------#
+            loss_value.backward()
+            optimizer.step()
+        else:
+            from torch.cuda.amp import autocast
+            with autocast():
+                #----------------------#
+                #   前向传播
+                #----------------------#
+                outputs         = model_train(images)
+                loss_value_all  = 0
+                #----------------------#
+                #   计算损失
+                #----------------------#
+                for l in range(len(outputs)):
+                    loss_item = yolo_loss(l, outputs[l], targets)
+                    loss_value_all  += loss_item
+                loss_value = loss_value_all
+            #----------------------#
+            #   反向传播
+            #----------------------#
+            scaler.scale(loss_value).backward()
+            scaler.step(optimizer)
+            scaler.update()
+        loss += loss_value.item()
+        if local_rank == 0:
+            pbar.set_postfix(**{'loss'  : loss / (iteration + 1),
+                                'lr'    : get_lr(optimizer)})
+            pbar.update(1)
+    if local_rank == 0:
+        pbar.close()
+        print('Finish Train')
+        print('Start Validation')
+        pbar = tqdm(total=epoch_step_val, desc=f'Epoch {epoch + 1}/{Epoch}',postfix=dict,mininterval=0.3)
+    model_train.eval()
+    for iteration, batch in enumerate(gen_val):
+        if iteration >= epoch_step_val:
+            break
+        images, targets = batch[0], batch[1]
+        with torch.no_grad():
+            if cuda:
+                images  = images.cuda()
+                targets = [ann.cuda() for ann in targets]
+            #----------------------#
+            #   清零梯度
+            #----------------------#
+            optimizer.zero_grad()
+            #----------------------#
+            #   前向传播
+            #----------------------#
+            outputs         = model_train(images)
+            loss_value_all  = 0
+            #----------------------#
+            #   计算损失
+            #----------------------#
+            for l in range(len(outputs)):
+                loss_item = yolo_loss(l, outputs[l], targets)
+                loss_value_all  += loss_item
+            loss_value  = loss_value_all
+        val_loss += loss_value.item()
+        if local_rank == 0:
+            pbar.set_postfix(**{'val_loss': val_loss / (iteration + 1)})
+            pbar.update(1)
+    if local_rank == 0:
+        pbar.close()
+        print('Finish Validation')
+        loss_history.append_loss(epoch + 1, loss / epoch_step, val_loss / epoch_step_val)
+        print('Epoch:'+ str(epoch + 1) + '/' + str(Epoch))
+        print('Total Loss: %.3f || Val Loss: %.3f ' % (loss / epoch_step, val_loss / epoch_step_val))
+        if (epoch + 1) % save_period == 0 or epoch + 1 == Epoch:
+            torch.save(model.state_dict(), os.path.join(save_dir, "ep%03d-loss%.3f-val_loss%.3f.pth" % (epoch + 1, loss / epoch_step, val_loss / epoch_step_val)))
+        # 每次保存最后一个权重
+        torch.save(model.state_dict(), os.path.join(save_dir, "last.pth" ))

utils/utils_map.py ADDED Viewed

	@@ -0,0 +1,901 @@

+import glob
+import json
+import math
+import operator
+import os
+import shutil
+import sys
+import cv2
+import matplotlib.pyplot as plt
+import numpy as np
+'''
+    0,0 ------> x (width)
+     |
+     |  (Left,Top)
+     |      *_________
+     |      |         |
+            |         |
+     y      |_________|
+  (height)            *
+                (Right,Bottom)
+'''
+def log_average_miss_rate(precision, fp_cumsum, num_images):
+    """
+        log-average miss rate:
+            Calculated by averaging miss rates at 9 evenly spaced FPPI points
+            between 10e-2 and 10e0, in log-space.
+        output:
+                lamr | log-average miss rate
+                mr | miss rate
+                fppi | false positives per image
+        references:
+            [1] Dollar, Piotr, et al. "Pedestrian Detection: An Evaluation of the
+               State of the Art." Pattern Analysis and Machine Intelligence, IEEE
+               Transactions on 34.4 (2012): 743 - 761.
+    """
+    if precision.size == 0:
+        lamr = 0
+        mr = 1
+        fppi = 0
+        return lamr, mr, fppi
+    fppi = fp_cumsum / float(num_images)
+    mr = (1 - precision)
+    fppi_tmp = np.insert(fppi, 0, -1.0)
+    mr_tmp = np.insert(mr, 0, 1.0)
+    ref = np.logspace(-2.0, 0.0, num = 9)
+    for i, ref_i in enumerate(ref):
+        j = np.where(fppi_tmp <= ref_i)[-1][-1]
+        ref[i] = mr_tmp[j]
+    lamr = math.exp(np.mean(np.log(np.maximum(1e-10, ref))))
+    return lamr, mr, fppi
+"""
+ throw error and exit
+"""
+def error(msg):
+    print(msg)
+    sys.exit(0)
+"""
+ check if the number is a float between 0.0 and 1.0
+"""
+def is_float_between_0_and_1(value):
+    try:
+        val = float(value)
+        if val > 0.0 and val < 1.0:
+            return True
+        else:
+            return False
+    except ValueError:
+        return False
+"""
+ Calculate the AP given the recall and precision array
+    1st) We compute a version of the measured precision/recall curve with
+         precision monotonically decreasing
+    2nd) We compute the AP as the area under this curve by numerical integration.
+"""
+def voc_ap(rec, prec):
+    """
+    --- Official matlab code VOC2012---
+    mrec=[0 ; rec ; 1];
+    mpre=[0 ; prec ; 0];
+    for i=numel(mpre)-1:-1:1
+            mpre(i)=max(mpre(i),mpre(i+1));
+    end
+    i=find(mrec(2:end)~=mrec(1:end-1))+1;
+    ap=sum((mrec(i)-mrec(i-1)).*mpre(i));
+    """
+    rec.insert(0, 0.0) # insert 0.0 at begining of list
+    rec.append(1.0) # insert 1.0 at end of list
+    mrec = rec[:]
+    prec.insert(0, 0.0) # insert 0.0 at begining of list
+    prec.append(0.0) # insert 0.0 at end of list
+    mpre = prec[:]
+    """
+     This part makes the precision monotonically decreasing
+        (goes from the end to the beginning)
+        matlab: for i=numel(mpre)-1:-1:1
+                    mpre(i)=max(mpre(i),mpre(i+1));
+    """
+    for i in range(len(mpre)-2, -1, -1):
+        mpre[i] = max(mpre[i], mpre[i+1])
+    """
+     This part creates a list of indexes where the recall changes
+        matlab: i=find(mrec(2:end)~=mrec(1:end-1))+1;
+    """
+    i_list = []
+    for i in range(1, len(mrec)):
+        if mrec[i] != mrec[i-1]:
+            i_list.append(i) # if it was matlab would be i + 1
+    """
+     The Average Precision (AP) is the area under the curve
+        (numerical integration)
+        matlab: ap=sum((mrec(i)-mrec(i-1)).*mpre(i));
+    """
+    ap = 0.0
+    for i in i_list:
+        ap += ((mrec[i]-mrec[i-1])*mpre[i])
+    return ap, mrec, mpre
+"""
+ Convert the lines of a file to a list
+"""
+def file_lines_to_list(path):
+    # open txt file lines to a list
+    with open(path) as f:
+        content = f.readlines()
+    # remove whitespace characters like `\n` at the end of each line
+    content = [x.strip() for x in content]
+    return content
+"""
+ Draws text in image
+"""
+def draw_text_in_image(img, text, pos, color, line_width):
+    font = cv2.FONT_HERSHEY_PLAIN
+    fontScale = 1
+    lineType = 1
+    bottomLeftCornerOfText = pos
+    cv2.putText(img, text,
+            bottomLeftCornerOfText,
+            font,
+            fontScale,
+            color,
+            lineType)
+    text_width, _ = cv2.getTextSize(text, font, fontScale, lineType)[0]
+    return img, (line_width + text_width)
+"""
+ Plot - adjust axes
+"""
+def adjust_axes(r, t, fig, axes):
+    # get text width for re-scaling
+    bb = t.get_window_extent(renderer=r)
+    text_width_inches = bb.width / fig.dpi
+    # get axis width in inches
+    current_fig_width = fig.get_figwidth()
+    new_fig_width = current_fig_width + text_width_inches
+    propotion = new_fig_width / current_fig_width
+    # get axis limit
+    x_lim = axes.get_xlim()
+    axes.set_xlim([x_lim[0], x_lim[1]*propotion])
+"""
+ Draw plot using Matplotlib
+"""
+def draw_plot_func(dictionary, n_classes, window_title, plot_title, x_label, output_path, to_show, plot_color, true_p_bar):
+    # sort the dictionary by decreasing value, into a list of tuples
+    sorted_dic_by_value = sorted(dictionary.items(), key=operator.itemgetter(1))
+    # unpacking the list of tuples into two lists
+    sorted_keys, sorted_values = zip(*sorted_dic_by_value)
+    #
+    if true_p_bar != "":
+        """
+         Special case to draw in:
+            - green -> TP: True Positives (object detected and matches ground-truth)
+            - red -> FP: False Positives (object detected but does not match ground-truth)
+            - orange -> FN: False Negatives (object not detected but present in the ground-truth)
+        """
+        fp_sorted = []
+        tp_sorted = []
+        for key in sorted_keys:
+            fp_sorted.append(dictionary[key] - true_p_bar[key])
+            tp_sorted.append(true_p_bar[key])
+        plt.barh(range(n_classes), fp_sorted, align='center', color='crimson', label='False Positive')
+        plt.barh(range(n_classes), tp_sorted, align='center', color='forestgreen', label='True Positive', left=fp_sorted)
+        # add legend
+        plt.legend(loc='lower right')
+        """
+         Write number on side of bar
+        """
+        fig = plt.gcf() # gcf - get current figure
+        axes = plt.gca()
+        r = fig.canvas.get_renderer()
+        for i, val in enumerate(sorted_values):
+            fp_val = fp_sorted[i]
+            tp_val = tp_sorted[i]
+            fp_str_val = " " + str(fp_val)
+            tp_str_val = fp_str_val + " " + str(tp_val)
+            # trick to paint multicolor with offset:
+            # first paint everything and then repaint the first number
+            t = plt.text(val, i, tp_str_val, color='forestgreen', va='center', fontweight='bold')
+            plt.text(val, i, fp_str_val, color='crimson', va='center', fontweight='bold')
+            if i == (len(sorted_values)-1): # largest bar
+                adjust_axes(r, t, fig, axes)
+    else:
+        plt.barh(range(n_classes), sorted_values, color=plot_color)
+        """
+         Write number on side of bar
+        """
+        fig = plt.gcf() # gcf - get current figure
+        axes = plt.gca()
+        r = fig.canvas.get_renderer()
+        for i, val in enumerate(sorted_values):
+            str_val = " " + str(val) # add a space before
+            if val < 1.0:
+                str_val = " {0:.2f}".format(val)
+            t = plt.text(val, i, str_val, color=plot_color, va='center', fontweight='bold')
+            # re-set axes to show number inside the figure
+            if i == (len(sorted_values)-1): # largest bar
+                adjust_axes(r, t, fig, axes)
+    # set window title
+    fig.canvas.set_window_title(window_title)
+    # write classes in y axis
+    tick_font_size = 12
+    plt.yticks(range(n_classes), sorted_keys, fontsize=tick_font_size)
+    """
+     Re-scale height accordingly
+    """
+    init_height = fig.get_figheight()
+    # comput the matrix height in points and inches
+    dpi = fig.dpi
+    height_pt = n_classes * (tick_font_size * 1.4) # 1.4 (some spacing)
+    height_in = height_pt / dpi
+    # compute the required figure height
+    top_margin = 0.15 # in percentage of the figure height
+    bottom_margin = 0.05 # in percentage of the figure height
+    figure_height = height_in / (1 - top_margin - bottom_margin)
+    # set new height
+    if figure_height > init_height:
+        fig.set_figheight(figure_height)
+    # set plot title
+    plt.title(plot_title, fontsize=14)
+    # set axis titles
+    # plt.xlabel('classes')
+    plt.xlabel(x_label, fontsize='large')
+    # adjust size of window
+    fig.tight_layout()
+    # save the plot
+    fig.savefig(output_path)
+    # show image
+    if to_show:
+        plt.show()
+    # close the plot
+    plt.close()
+def get_map(MINOVERLAP, draw_plot, path = './map_out'):
+    GT_PATH             = os.path.join(path, 'ground-truth')
+    DR_PATH             = os.path.join(path, 'detection-results')
+    IMG_PATH            = os.path.join(path, 'images-optional')
+    TEMP_FILES_PATH     = os.path.join(path, '.temp_files')
+    RESULTS_FILES_PATH  = os.path.join(path, 'results')
+    show_animation = True
+    if os.path.exists(IMG_PATH):
+        for dirpath, dirnames, files in os.walk(IMG_PATH):
+            if not files:
+                show_animation = False
+    else:
+        show_animation = False
+    if not os.path.exists(TEMP_FILES_PATH):
+        os.makedirs(TEMP_FILES_PATH)
+    if os.path.exists(RESULTS_FILES_PATH):
+        shutil.rmtree(RESULTS_FILES_PATH)
+    if draw_plot:
+        os.makedirs(os.path.join(RESULTS_FILES_PATH, "AP"))
+        os.makedirs(os.path.join(RESULTS_FILES_PATH, "F1"))
+        os.makedirs(os.path.join(RESULTS_FILES_PATH, "Recall"))
+        os.makedirs(os.path.join(RESULTS_FILES_PATH, "Precision"))
+    if show_animation:
+        os.makedirs(os.path.join(RESULTS_FILES_PATH, "images", "detections_one_by_one"))
+    ground_truth_files_list = glob.glob(GT_PATH + '/*.txt')
+    if len(ground_truth_files_list) == 0:
+        error("Error: No ground-truth files found!")
+    ground_truth_files_list.sort()
+    gt_counter_per_class     = {}
+    counter_images_per_class = {}
+    for txt_file in ground_truth_files_list:
+        file_id     = txt_file.split(".txt", 1)[0]
+        file_id     = os.path.basename(os.path.normpath(file_id))
+        temp_path   = os.path.join(DR_PATH, (file_id + ".txt"))
+        if not os.path.exists(temp_path):
+            error_msg = "Error. File not found: {}\n".format(temp_path)
+            error(error_msg)
+        lines_list      = file_lines_to_list(txt_file)
+        bounding_boxes  = []
+        is_difficult    = False
+        already_seen_classes = []
+        for line in lines_list:
+            try:
+                if "difficult" in line:
+                    class_name, left, top, right, bottom, _difficult = line.split()
+                    is_difficult = True
+                else:
+                    class_name, left, top, right, bottom = line.split()
+            except:
+                if "difficult" in line:
+                    line_split  = line.split()
+                    _difficult  = line_split[-1]
+                    bottom      = line_split[-2]
+                    right       = line_split[-3]
+                    top         = line_split[-4]
+                    left        = line_split[-5]
+                    class_name  = ""
+                    for name in line_split[:-5]:
+                        class_name += name + " "
+                    class_name  = class_name[:-1]
+                    is_difficult = True
+                else:
+                    line_split  = line.split()
+                    bottom      = line_split[-1]
+                    right       = line_split[-2]
+                    top         = line_split[-3]
+                    left        = line_split[-4]
+                    class_name  = ""
+                    for name in line_split[:-4]:
+                        class_name += name + " "
+                    class_name = class_name[:-1]
+            bbox = left + " " + top + " " + right + " " + bottom
+            if is_difficult:
+                bounding_boxes.append({"class_name":class_name, "bbox":bbox, "used":False, "difficult":True})
+                is_difficult = False
+            else:
+                bounding_boxes.append({"class_name":class_name, "bbox":bbox, "used":False})
+                if class_name in gt_counter_per_class:
+                    gt_counter_per_class[class_name] += 1
+                else:
+                    gt_counter_per_class[class_name] = 1
+                if class_name not in already_seen_classes:
+                    if class_name in counter_images_per_class:
+                        counter_images_per_class[class_name] += 1
+                    else:
+                        counter_images_per_class[class_name] = 1
+                    already_seen_classes.append(class_name)
+        with open(TEMP_FILES_PATH + "/" + file_id + "_ground_truth.json", 'w') as outfile:
+            json.dump(bounding_boxes, outfile)
+    gt_classes  = list(gt_counter_per_class.keys())
+    gt_classes  = sorted(gt_classes)
+    n_classes   = len(gt_classes)
+    dr_files_list = glob.glob(DR_PATH + '/*.txt')
+    dr_files_list.sort()
+    for class_index, class_name in enumerate(gt_classes):
+        bounding_boxes = []
+        for txt_file in dr_files_list:
+            file_id = txt_file.split(".txt",1)[0]
+            file_id = os.path.basename(os.path.normpath(file_id))
+            temp_path = os.path.join(GT_PATH, (file_id + ".txt"))
+            if class_index == 0:
+                if not os.path.exists(temp_path):
+                    error_msg = "Error. File not found: {}\n".format(temp_path)
+                    error(error_msg)
+            lines = file_lines_to_list(txt_file)
+            for line in lines:
+                try:
+                    tmp_class_name, confidence, left, top, right, bottom = line.split()
+                except:
+                    line_split      = line.split()
+                    bottom          = line_split[-1]
+                    right           = line_split[-2]
+                    top             = line_split[-3]
+                    left            = line_split[-4]
+                    confidence      = line_split[-5]
+                    tmp_class_name  = ""
+                    for name in line_split[:-5]:
+                        tmp_class_name += name + " "
+                    tmp_class_name  = tmp_class_name[:-1]
+                if tmp_class_name == class_name:
+                    bbox = left + " " + top + " " + right + " " +bottom
+                    bounding_boxes.append({"confidence":confidence, "file_id":file_id, "bbox":bbox})
+        bounding_boxes.sort(key=lambda x:float(x['confidence']), reverse=True)
+        with open(TEMP_FILES_PATH + "/" + class_name + "_dr.json", 'w') as outfile:
+            json.dump(bounding_boxes, outfile)
+    sum_AP = 0.0
+    ap_dictionary = {}
+    lamr_dictionary = {}
+    with open(RESULTS_FILES_PATH + "/results.txt", 'w') as results_file:
+        results_file.write("# AP and precision/recall per class\n")
+        count_true_positives = {}
+        for class_index, class_name in enumerate(gt_classes):
+            count_true_positives[class_name] = 0
+            dr_file = TEMP_FILES_PATH + "/" + class_name + "_dr.json"
+            dr_data = json.load(open(dr_file))
+            nd          = len(dr_data)
+            tp          = [0] * nd
+            fp          = [0] * nd
+            score       = [0] * nd
+            score05_idx = 0
+            for idx, detection in enumerate(dr_data):
+                file_id     = detection["file_id"]
+                score[idx]  = float(detection["confidence"])
+                if score[idx] > 0.5:
+                    score05_idx = idx
+                if show_animation:
+                    ground_truth_img = glob.glob1(IMG_PATH, file_id + ".*")
+                    if len(ground_truth_img) == 0:
+                        error("Error. Image not found with id: " + file_id)
+                    elif len(ground_truth_img) > 1:
+                        error("Error. Multiple image with id: " + file_id)
+                    else:
+                        img = cv2.imread(IMG_PATH + "/" + ground_truth_img[0])
+                        img_cumulative_path = RESULTS_FILES_PATH + "/images/" + ground_truth_img[0]
+                        if os.path.isfile(img_cumulative_path):
+                            img_cumulative = cv2.imread(img_cumulative_path)
+                        else:
+                            img_cumulative = img.copy()
+                        bottom_border = 60
+                        BLACK = [0, 0, 0]
+                        img = cv2.copyMakeBorder(img, 0, bottom_border, 0, 0, cv2.BORDER_CONSTANT, value=BLACK)
+                gt_file             = TEMP_FILES_PATH + "/" + file_id + "_ground_truth.json"
+                ground_truth_data   = json.load(open(gt_file))
+                ovmax       = -1
+                gt_match    = -1
+                bb          = [float(x) for x in detection["bbox"].split()]
+                for obj in ground_truth_data:
+                    if obj["class_name"] == class_name:
+                        bbgt    = [ float(x) for x in obj["bbox"].split() ]
+                        bi      = [max(bb[0],bbgt[0]), max(bb[1],bbgt[1]), min(bb[2],bbgt[2]), min(bb[3],bbgt[3])]
+                        iw      = bi[2] - bi[0] + 1
+                        ih      = bi[3] - bi[1] + 1
+                        if iw > 0 and ih > 0:
+                            ua = (bb[2] - bb[0] + 1) * (bb[3] - bb[1] + 1) + (bbgt[2] - bbgt[0]
+                                            + 1) * (bbgt[3] - bbgt[1] + 1) - iw * ih
+                            ov = iw * ih / ua
+                            if ov > ovmax:
+                                ovmax = ov
+                                gt_match = obj
+                if show_animation:
+                    status = "NO MATCH FOUND!"
+                min_overlap = MINOVERLAP
+                if ovmax >= min_overlap:
+                    if "difficult" not in gt_match:
+                        if not bool(gt_match["used"]):
+                            tp[idx] = 1
+                            gt_match["used"] = True
+                            count_true_positives[class_name] += 1
+                            with open(gt_file, 'w') as f:
+                                    f.write(json.dumps(ground_truth_data))
+                            if show_animation:
+                                status = "MATCH!"
+                        else:
+                            fp[idx] = 1
+                            if show_animation:
+                                status = "REPEATED MATCH!"
+                else:
+                    fp[idx] = 1
+                    if ovmax > 0:
+                        status = "INSUFFICIENT OVERLAP"
+                """
+                Draw image to show animation
+                """
+                if show_animation:
+                    height, widht = img.shape[:2]
+                    white           = (255,255,255)
+                    light_blue      = (255,200,100)
+                    green           = (0,255,0)
+                    light_red       = (30,30,255)
+                    margin          = 10
+                    # 1nd line
+                    v_pos           = int(height - margin - (bottom_border / 2.0))
+                    text            = "Image: " + ground_truth_img[0] + " "
+                    img, line_width = draw_text_in_image(img, text, (margin, v_pos), white, 0)
+                    text            = "Class [" + str(class_index) + "/" + str(n_classes) + "]: " + class_name + " "
+                    img, line_width = draw_text_in_image(img, text, (margin + line_width, v_pos), light_blue, line_width)
+                    if ovmax != -1:
+                        color       = light_red
+                        if status   == "INSUFFICIENT OVERLAP":
+                            text    = "IoU: {0:.2f}% ".format(ovmax*100) + "< {0:.2f}% ".format(min_overlap*100)
+                        else:
+                            text    = "IoU: {0:.2f}% ".format(ovmax*100) + ">= {0:.2f}% ".format(min_overlap*100)
+                            color   = green
+                        img, _ = draw_text_in_image(img, text, (margin + line_width, v_pos), color, line_width)
+                    # 2nd line
+                    v_pos           += int(bottom_border / 2.0)
+                    rank_pos        = str(idx+1)
+                    text            = "Detection #rank: " + rank_pos + " confidence: {0:.2f}% ".format(float(detection["confidence"])*100)
+                    img, line_width = draw_text_in_image(img, text, (margin, v_pos), white, 0)
+                    color           = light_red
+                    if status == "MATCH!":
+                        color = green
+                    text            = "Result: " + status + " "
+                    img, line_width = draw_text_in_image(img, text, (margin + line_width, v_pos), color, line_width)
+                    font = cv2.FONT_HERSHEY_SIMPLEX
+                    if ovmax > 0:
+                        bbgt = [ int(round(float(x))) for x in gt_match["bbox"].split() ]
+                        cv2.rectangle(img,(bbgt[0],bbgt[1]),(bbgt[2],bbgt[3]),light_blue,2)
+                        cv2.rectangle(img_cumulative,(bbgt[0],bbgt[1]),(bbgt[2],bbgt[3]),light_blue,2)
+                        cv2.putText(img_cumulative, class_name, (bbgt[0],bbgt[1] - 5), font, 0.6, light_blue, 1, cv2.LINE_AA)
+                    bb = [int(i) for i in bb]
+                    cv2.rectangle(img,(bb[0],bb[1]),(bb[2],bb[3]),color,2)
+                    cv2.rectangle(img_cumulative,(bb[0],bb[1]),(bb[2],bb[3]),color,2)
+                    cv2.putText(img_cumulative, class_name, (bb[0],bb[1] - 5), font, 0.6, color, 1, cv2.LINE_AA)
+                    cv2.imshow("Animation", img)
+                    cv2.waitKey(20)
+                    output_img_path = RESULTS_FILES_PATH + "/images/detections_one_by_one/" + class_name + "_detection" + str(idx) + ".jpg"
+                    cv2.imwrite(output_img_path, img)
+                    cv2.imwrite(img_cumulative_path, img_cumulative)
+            cumsum = 0
+            for idx, val in enumerate(fp):
+                fp[idx] += cumsum
+                cumsum += val
+            cumsum = 0
+            for idx, val in enumerate(tp):
+                tp[idx] += cumsum
+                cumsum += val
+            rec = tp[:]
+            for idx, val in enumerate(tp):
+                rec[idx] = float(tp[idx]) / np.maximum(gt_counter_per_class[class_name], 1)
+            prec = tp[:]
+            for idx, val in enumerate(tp):
+                prec[idx] = float(tp[idx]) / np.maximum((fp[idx] + tp[idx]), 1)
+            ap, mrec, mprec = voc_ap(rec[:], prec[:])
+            F1  = np.array(rec)*np.array(prec)*2 / np.where((np.array(prec)+np.array(rec))==0, 1, (np.array(prec)+np.array(rec)))
+            sum_AP  += ap
+            text    = "{0:.2f}%".format(ap*100) + " = " + class_name + " AP " #class_name + " AP = {0:.2f}%".format(ap*100)
+            if len(prec)>0:
+                F1_text         = "{0:.2f}".format(F1[score05_idx]) + " = " + class_name + " F1 "
+                Recall_text     = "{0:.2f}%".format(rec[score05_idx]*100) + " = " + class_name + " Recall "
+                Precision_text  = "{0:.2f}%".format(prec[score05_idx]*100) + " = " + class_name + " Precision "
+            else:
+                F1_text         = "0.00" + " = " + class_name + " F1 "
+                Recall_text     = "0.00%" + " = " + class_name + " Recall "
+                Precision_text  = "0.00%" + " = " + class_name + " Precision "
+            rounded_prec    = [ '%.2f' % elem for elem in prec ]
+            rounded_rec     = [ '%.2f' % elem for elem in rec ]
+            results_file.write(text + "\n Precision: " + str(rounded_prec) + "\n Recall :" + str(rounded_rec) + "\n\n")
+            if len(prec)>0:
+                print(text + "\t||\tscore_threhold=0.5 : " + "F1=" + "{0:.2f}".format(F1[score05_idx])\
+                    + " ; Recall=" + "{0:.2f}%".format(rec[score05_idx]*100) + " ; Precision=" + "{0:.2f}%".format(prec[score05_idx]*100))
+            else:
+                print(text + "\t||\tscore_threhold=0.5 : F1=0.00% ; Recall=0.00% ; Precision=0.00%")
+            ap_dictionary[class_name] = ap
+            n_images = counter_images_per_class[class_name]
+            lamr, mr, fppi = log_average_miss_rate(np.array(rec), np.array(fp), n_images)
+            lamr_dictionary[class_name] = lamr
+            if draw_plot:
+                plt.plot(rec, prec, '-o')
+                area_under_curve_x = mrec[:-1] + [mrec[-2]] + [mrec[-1]]
+                area_under_curve_y = mprec[:-1] + [0.0] + [mprec[-1]]
+                plt.fill_between(area_under_curve_x, 0, area_under_curve_y, alpha=0.2, edgecolor='r')
+                fig = plt.gcf()
+                fig.canvas.set_window_title('AP ' + class_name)
+                plt.title('class: ' + text)
+                plt.xlabel('Recall')
+                plt.ylabel('Precision')
+                axes = plt.gca()
+                axes.set_xlim([0.0,1.0])
+                axes.set_ylim([0.0,1.05])
+                fig.savefig(RESULTS_FILES_PATH + "/AP/" + class_name + ".png")
+                plt.cla()
+                plt.plot(score, F1, "-", color='orangered')
+                plt.title('class: ' + F1_text + "\nscore_threhold=0.5")
+                plt.xlabel('Score_Threhold')
+                plt.ylabel('F1')
+                axes = plt.gca()
+                axes.set_xlim([0.0,1.0])
+                axes.set_ylim([0.0,1.05])
+                fig.savefig(RESULTS_FILES_PATH + "/F1/" + class_name + ".png")
+                plt.cla()
+                plt.plot(score, rec, "-H", color='gold')
+                plt.title('class: ' + Recall_text + "\nscore_threhold=0.5")
+                plt.xlabel('Score_Threhold')
+                plt.ylabel('Recall')
+                axes = plt.gca()
+                axes.set_xlim([0.0,1.0])
+                axes.set_ylim([0.0,1.05])
+                fig.savefig(RESULTS_FILES_PATH + "/Recall/" + class_name + ".png")
+                plt.cla()
+                plt.plot(score, prec, "-s", color='palevioletred')
+                plt.title('class: ' + Precision_text + "\nscore_threhold=0.5")
+                plt.xlabel('Score_Threhold')
+                plt.ylabel('Precision')
+                axes = plt.gca()
+                axes.set_xlim([0.0,1.0])
+                axes.set_ylim([0.0,1.05])
+                fig.savefig(RESULTS_FILES_PATH + "/Precision/" + class_name + ".png")
+                plt.cla()
+        if show_animation:
+            cv2.destroyAllWindows()
+        results_file.write("\n# mAP of all classes\n")
+        mAP     = sum_AP / n_classes
+        text    = "mAP = {0:.2f}%".format(mAP*100)
+        results_file.write(text + "\n")
+        print(text)
+    shutil.rmtree(TEMP_FILES_PATH)
+    """
+    Count total of detection-results
+    """
+    det_counter_per_class = {}
+    for txt_file in dr_files_list:
+        lines_list = file_lines_to_list(txt_file)
+        for line in lines_list:
+            class_name = line.split()[0]
+            if class_name in det_counter_per_class:
+                det_counter_per_class[class_name] += 1
+            else:
+                det_counter_per_class[class_name] = 1
+    dr_classes = list(det_counter_per_class.keys())
+    """
+    Write number of ground-truth objects per class to results.txt
+    """
+    with open(RESULTS_FILES_PATH + "/results.txt", 'a') as results_file:
+        results_file.write("\n# Number of ground-truth objects per class\n")
+        for class_name in sorted(gt_counter_per_class):
+            results_file.write(class_name + ": " + str(gt_counter_per_class[class_name]) + "\n")
+    """
+    Finish counting true positives
+    """
+    for class_name in dr_classes:
+        if class_name not in gt_classes:
+            count_true_positives[class_name] = 0
+    """
+    Write number of detected objects per class to results.txt
+    """
+    with open(RESULTS_FILES_PATH + "/results.txt", 'a') as results_file:
+        results_file.write("\n# Number of detected objects per class\n")
+        for class_name in sorted(dr_classes):
+            n_det = det_counter_per_class[class_name]
+            text = class_name + ": " + str(n_det)
+            text += " (tp:" + str(count_true_positives[class_name]) + ""
+            text += ", fp:" + str(n_det - count_true_positives[class_name]) + ")\n"
+            results_file.write(text)
+    """
+    Plot the total number of occurences of each class in the ground-truth
+    """
+    if draw_plot:
+        window_title = "ground-truth-info"
+        plot_title = "ground-truth\n"
+        plot_title += "(" + str(len(ground_truth_files_list)) + " files and " + str(n_classes) + " classes)"
+        x_label = "Number of objects per class"
+        output_path = RESULTS_FILES_PATH + "/ground-truth-info.png"
+        to_show = False
+        plot_color = 'forestgreen'
+        draw_plot_func(
+            gt_counter_per_class,
+            n_classes,
+            window_title,
+            plot_title,
+            x_label,
+            output_path,
+            to_show,
+            plot_color,
+            '',
+            )
+    # """
+    # Plot the total number of occurences of each class in the "detection-results" folder
+    # """
+    # if draw_plot:
+    #     window_title = "detection-results-info"
+    #     # Plot title
+    #     plot_title = "detection-results\n"
+    #     plot_title += "(" + str(len(dr_files_list)) + " files and "
+    #     count_non_zero_values_in_dictionary = sum(int(x) > 0 for x in list(det_counter_per_class.values()))
+    #     plot_title += str(count_non_zero_values_in_dictionary) + " detected classes)"
+    #     # end Plot title
+    #     x_label = "Number of objects per class"
+    #     output_path = RESULTS_FILES_PATH + "/detection-results-info.png"
+    #     to_show = False
+    #     plot_color = 'forestgreen'
+    #     true_p_bar = count_true_positives
+    #     draw_plot_func(
+    #         det_counter_per_class,
+    #         len(det_counter_per_class),
+    #         window_title,
+    #         plot_title,
+    #         x_label,
+    #         output_path,
+    #         to_show,
+    #         plot_color,
+    #         true_p_bar
+    #         )
+    """
+    Draw log-average miss rate plot (Show lamr of all classes in decreasing order)
+    """
+    if draw_plot:
+        window_title = "lamr"
+        plot_title = "log-average miss rate"
+        x_label = "log-average miss rate"
+        output_path = RESULTS_FILES_PATH + "/lamr.png"
+        to_show = False
+        plot_color = 'royalblue'
+        draw_plot_func(
+            lamr_dictionary,
+            n_classes,
+            window_title,
+            plot_title,
+            x_label,
+            output_path,
+            to_show,
+            plot_color,
+            ""
+            )
+    """
+    Draw mAP plot (Show AP's of all classes in decreasing order)
+    """
+    if draw_plot:
+        window_title = "mAP"
+        plot_title = "mAP = {0:.2f}%".format(mAP*100)
+        x_label = "Average Precision"
+        output_path = RESULTS_FILES_PATH + "/mAP.png"
+        to_show = True
+        plot_color = 'royalblue'
+        draw_plot_func(
+            ap_dictionary,
+            n_classes,
+            window_title,
+            plot_title,
+            x_label,
+            output_path,
+            to_show,
+            plot_color,
+            ""
+            )
+def preprocess_gt(gt_path, class_names):
+    image_ids   = os.listdir(gt_path)
+    results = {}
+    images = []
+    bboxes = []
+    for i, image_id in enumerate(image_ids):
+        lines_list      = file_lines_to_list(os.path.join(gt_path, image_id))
+        boxes_per_image = []
+        image           = {}
+        image_id        = os.path.splitext(image_id)[0]
+        image['file_name'] = image_id + '.jpg'
+        image['width']     = 1
+        image['height']    = 1
+        #-----------------------------------------------------------------#
+        #   感谢 多学学英语吧 的提醒
+        #   解决了'Results do not correspond to current coco set'问题
+        #-----------------------------------------------------------------#
+        image['id']        = str(image_id)
+        for line in lines_list:
+            difficult = 0
+            if "difficult" in line:
+                line_split  = line.split()
+                left, top, right, bottom, _difficult = line_split[-5:]
+                class_name  = ""
+                for name in line_split[:-5]:
+                    class_name += name + " "
+                class_name  = class_name[:-1]
+                difficult = 1
+            else:
+                line_split  = line.split()
+                left, top, right, bottom = line_split[-4:]
+                class_name  = ""
+                for name in line_split[:-4]:
+                    class_name += name + " "
+                class_name = class_name[:-1]
+            left, top, right, bottom = float(left), float(top), float(right), float(bottom)
+            cls_id  = class_names.index(class_name) + 1
+            bbox    = [left, top, right - left, bottom - top, difficult, str(image_id), cls_id, (right - left) * (bottom - top) - 10.0]
+            boxes_per_image.append(bbox)
+        images.append(image)
+        bboxes.extend(boxes_per_image)
+    results['images']        = images
+    categories = []
+    for i, cls in enumerate(class_names):
+        category = {}
+        category['supercategory']   = cls
+        category['name']            = cls
+        category['id']              = i + 1
+        categories.append(category)
+    results['categories']   = categories
+    annotations = []
+    for i, box in enumerate(bboxes):
+        annotation = {}
+        annotation['area']        = box[-1]
+        annotation['category_id'] = box[-2]
+        annotation['image_id']    = box[-3]
+        annotation['iscrowd']     = box[-4]
+        annotation['bbox']        = box[:4]
+        annotation['id']          = i
+        annotations.append(annotation)
+    results['annotations'] = annotations
+    return results
+def preprocess_dr(dr_path, class_names):
+    image_ids = os.listdir(dr_path)
+    results = []
+    for image_id in image_ids:
+        lines_list      = file_lines_to_list(os.path.join(dr_path, image_id))
+        image_id        = os.path.splitext(image_id)[0]
+        for line in lines_list:
+            line_split  = line.split()
+            confidence, left, top, right, bottom = line_split[-5:]
+            class_name  = ""
+            for name in line_split[:-5]:
+                class_name += name + " "
+            class_name  = class_name[:-1]
+            left, top, right, bottom = float(left), float(top), float(right), float(bottom)
+            result                  = {}
+            result["image_id"]      = str(image_id)
+            result["category_id"]   = class_names.index(class_name) + 1
+            result["bbox"]          = [left, top, right - left, bottom - top]
+            result["score"]         = float(confidence)
+            results.append(result)
+    return results
+def get_coco_map(class_names, path):
+    from pycocotools.coco import COCO
+    from pycocotools.cocoeval import COCOeval
+    GT_PATH     = os.path.join(path, 'ground-truth')
+    DR_PATH     = os.path.join(path, 'detection-results')
+    COCO_PATH   = os.path.join(path, 'coco_eval')
+    if not os.path.exists(COCO_PATH):
+        os.makedirs(COCO_PATH)
+    GT_JSON_PATH = os.path.join(COCO_PATH, 'instances_gt.json')
+    DR_JSON_PATH = os.path.join(COCO_PATH, 'instances_dr.json')
+    with open(GT_JSON_PATH, "w") as f:
+        results_gt  = preprocess_gt(GT_PATH, class_names)
+        json.dump(results_gt, f, indent=4)
+    with open(DR_JSON_PATH, "w") as f:
+        results_dr  = preprocess_dr(DR_PATH, class_names)
+        json.dump(results_dr, f, indent=4)
+    cocoGt      = COCO(GT_JSON_PATH)
+    cocoDt      = cocoGt.loadRes(DR_JSON_PATH)
+    cocoEval    = COCOeval(cocoGt, cocoDt, 'bbox')
+    cocoEval.evaluate()
+    cocoEval.accumulate()
+    cocoEval.summarize()