Shad0ws commited on
Commit
68b4f36
1 Parent(s): 5d0590c

Upload 7 files

Browse files
Files changed (8) hide show
  1. .gitattributes +1 -0
  2. Example_1.jpg +0 -0
  3. Example_2.jpg +0 -0
  4. Example_3.jpg +0 -0
  5. app.py +49 -0
  6. model.py +81 -0
  7. part_B_pre.pth.tar +3 -0
  8. requirements.txt +5 -0
.gitattributes CHANGED
@@ -32,3 +32,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
32
  *.zip filter=lfs diff=lfs merge=lfs -text
33
  *.zst filter=lfs diff=lfs merge=lfs -text
34
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
32
  *.zip filter=lfs diff=lfs merge=lfs -text
33
  *.zst filter=lfs diff=lfs merge=lfs -text
34
  *tfevents* filter=lfs diff=lfs merge=lfs -text
35
+ part_B_pre.pth.tar filter=lfs diff=lfs merge=lfs -text
Example_1.jpg ADDED
Example_2.jpg ADDED
Example_3.jpg ADDED
app.py ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy
2
+ import torch
3
+ import gradio as gr
4
+ from einops import rearrange
5
+ from torchvision import transforms
6
+
7
+ from model import CANNet
8
+ model = CANNet()
9
+ checkpoint = torch.load('part_B_pre.pth.tar',map_location=torch.device('cpu'))
10
+ model.load_state_dict(checkpoint['state_dict'])
11
+ model.eval()
12
+
13
+ ## Defining the transform function
14
+ transform = transforms.Compose([transforms.ToTensor(),transforms.Normalize(mean=[0.485, 0.456, 0.406],std=[0.229, 0.224, 0.225])])
15
+
16
+
17
+ def crowd(img):
18
+ ## Transforming the image
19
+ img = transform(img)
20
+
21
+ ## Adding batch dimension
22
+ img = rearrange(img, "c h w -> 1 c h w")
23
+
24
+ ## Slicing the image into four parts
25
+ h = img.shape[2]
26
+ w = img.shape[3]
27
+ h_d = int(h/2)
28
+ w_d = int(w/2)
29
+ img_1 = img[:,:,:h_d,:w_d]
30
+ img_2 = img[:,:,:h_d,w_d:]
31
+ img_3 = img[:,:,h_d:,:w_d]
32
+ img_4 = img[:,:,h_d:,w_d:]
33
+
34
+ ## Inputting the 4 images into the model, converting it to numpy array, and summing to get the density
35
+ with torch.no_grad():
36
+ density_1 = model(img_1).numpy().sum()
37
+ density_2 = model(img_2).numpy().sum()
38
+ density_3 = model(img_3).numpy().sum()
39
+ density_4 = model(img_4).numpy().sum()
40
+
41
+ ## Summing up the estimated density and rounding the result to get an integer
42
+ pred = density_1 + density_2 + density_3 + density_4
43
+ pred = int(pred.round())
44
+ return pred
45
+
46
+ outputs = gr.outputs.Textbox(type="auto", label="Estimated crowd density:")
47
+ inputs = gr.inputs.Image(type="numpy", label="Input the image here:")
48
+
49
+ gr.Interface(fn=crowd, inputs=inputs, outputs=outputs, allow_flagging="never", examples=["Example_1.jpg", "Example_3.jpg", "Example_2.jpg"], title = "Crowd Counting Model", description = "Interface").launch(inbrowser=True)
model.py ADDED
@@ -0,0 +1,81 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch.nn as nn
2
+ import torch
3
+ from torch.nn import functional as F
4
+ from torchvision import models
5
+
6
+ class ContextualModule(nn.Module):
7
+ def __init__(self, features, out_features=512, sizes=(1, 2, 3, 6)):
8
+ super(ContextualModule, self).__init__()
9
+ self.scales = []
10
+ self.scales = nn.ModuleList([self._make_scale(features, size) for size in sizes])
11
+ self.bottleneck = nn.Conv2d(features * 2, out_features, kernel_size=1)
12
+ self.relu = nn.ReLU()
13
+ self.weight_net = nn.Conv2d(features,features,kernel_size=1)
14
+
15
+ def __make_weight(self,feature,scale_feature):
16
+ weight_feature = feature - scale_feature
17
+ return F.sigmoid(self.weight_net(weight_feature))
18
+
19
+ def _make_scale(self, features, size):
20
+ prior = nn.AdaptiveAvgPool2d(output_size=(size, size))
21
+ conv = nn.Conv2d(features, features, kernel_size=1, bias=False)
22
+ return nn.Sequential(prior, conv)
23
+
24
+ def forward(self, feats):
25
+ h, w = feats.size(2), feats.size(3)
26
+ multi_scales = [F.upsample(input=stage(feats), size=(h, w), mode='bilinear') for stage in self.scales]
27
+ weights = [self.__make_weight(feats,scale_feature) for scale_feature in multi_scales]
28
+ overall_features = [(multi_scales[0]*weights[0]+multi_scales[1]*weights[1]+multi_scales[2]*weights[2]+multi_scales[3]*weights[3])/(weights[0]+weights[1]+weights[2]+weights[3])]+ [feats]
29
+ bottle = self.bottleneck(torch.cat(overall_features, 1))
30
+ return self.relu(bottle)
31
+
32
+ class CANNet(nn.Module):
33
+ def __init__(self, load_weights=False):
34
+ super(CANNet, self).__init__()
35
+ self.seen = 0
36
+ self.context = ContextualModule(512, 512)
37
+ self.frontend_feat = [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512]
38
+ self.backend_feat = [512, 512, 512,256,128,64]
39
+ self.frontend = make_layers(self.frontend_feat)
40
+ self.backend = make_layers(self.backend_feat,in_channels = 512,batch_norm=True, dilation = True)
41
+ self.output_layer = nn.Conv2d(64, 1, kernel_size=1)
42
+ if not load_weights:
43
+ mod = models.vgg16(pretrained = True)
44
+ self._initialize_weights()
45
+ for i in range(len(self.frontend.state_dict().items())):
46
+ list(self.frontend.state_dict().items())[i][1].data[:] = list(mod.state_dict().items())[i][1].data[:]
47
+
48
+ def forward(self,x):
49
+ x = self.frontend(x)
50
+ x = self.context(x)
51
+ x = self.backend(x)
52
+ x = self.output_layer(x)
53
+ return x
54
+
55
+ def _initialize_weights(self):
56
+ for m in self.modules():
57
+ if isinstance(m, nn.Conv2d):
58
+ nn.init.normal_(m.weight, std=0.01)
59
+ if m.bias is not None:
60
+ nn.init.constant_(m.bias, 0)
61
+ elif isinstance(m, nn.BatchNorm2d):
62
+ nn.init.constant_(m.weight, 1)
63
+ nn.init.constant_(m.bias, 0)
64
+
65
+ def make_layers(cfg, in_channels = 3,batch_norm=False,dilation = False):
66
+ if dilation:
67
+ d_rate = 2
68
+ else:
69
+ d_rate = 1
70
+ layers = []
71
+ for v in cfg:
72
+ if v == 'M':
73
+ layers += [nn.MaxPool2d(kernel_size=2, stride=2)]
74
+ else:
75
+ conv2d = nn.Conv2d(in_channels, v, kernel_size=3, padding=d_rate,dilation = d_rate)
76
+ if batch_norm:
77
+ layers += [conv2d, nn.BatchNorm2d(v), nn.ReLU(inplace=True)]
78
+ else:
79
+ layers += [conv2d, nn.ReLU(inplace=True)]
80
+ in_channels = v
81
+ return nn.Sequential(*layers)
part_B_pre.pth.tar ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f76422976fabd5524c5d243578b516b8981b8e43e99495ea149f64bac68b779f
3
+ size 72441427
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ numpy
2
+ torch
3
+ torchvision
4
+ einops
5
+ gradio