|
import numpy as np |
|
from tqdm import tqdm |
|
from pathlib import Path |
|
import json |
|
from collections import defaultdict |
|
import sys |
|
import pathlib |
|
|
|
CURRENT_DIR = pathlib.Path(__file__).parent |
|
sys.path.append(str(CURRENT_DIR)) |
|
|
|
|
|
def make_dirs(path='coco'): |
|
|
|
path = Path(path) |
|
for p in [path / 'labels']: |
|
p.mkdir(parents=True, exist_ok=True) |
|
return path |
|
|
|
|
|
def coco91_to_coco80_class(): |
|
|
|
x = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, None, 11, 12, 13, 14, 15, 16, 17, |
|
18, 19, 20, 21, 22, 23, None, 24, 25, None, None, 26, 27, 28, 29, |
|
30, 31, 32, 33, 34, 35, 36, 37, 38, 39, None, 40, 41, 42, 43, 44, |
|
45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, None, |
|
60, None, None, 61, None, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, |
|
72, None, 73, 74, 75, 76, 77, 78, 79, None] |
|
return x |
|
|
|
|
|
def convert_coco_json( |
|
json_dir='coco/annotations/', |
|
use_segments=False, |
|
cls91to80=False): |
|
save_dir = make_dirs() |
|
coco80 = coco91_to_coco80_class() |
|
"""Convert raw COCO dataset to YOLO style |
|
""" |
|
|
|
|
|
for json_file in sorted(Path(json_dir).resolve().glob('instances_val2017.json')): |
|
fn = Path(save_dir) / 'labels' / \ |
|
json_file.stem.replace('instances_', '') |
|
fn.mkdir() |
|
with open(json_file) as f: |
|
data = json.load(f) |
|
|
|
|
|
images = {'%g' % x['id']: x for x in data['images']} |
|
|
|
imgToAnns = defaultdict(list) |
|
for ann in data['annotations']: |
|
imgToAnns[ann['image_id']].append(ann) |
|
|
|
txt_file = open(Path(save_dir / 'val2017'). |
|
with_suffix('.txt'), 'a') |
|
|
|
for img_id, anns in tqdm( |
|
imgToAnns.items(), desc=f'Annotations {json_file}'): |
|
img = images['%g' % img_id] |
|
h, w, f = img['height'], img['width'], img['file_name'] |
|
bboxes = [] |
|
segments = [] |
|
|
|
txt_file.write( |
|
'./images/' + '/'. |
|
join(img['coco_url'].split('/')[-2:]) + '\n') |
|
for ann in anns: |
|
if ann['iscrowd']: |
|
continue |
|
|
|
|
|
|
|
box = np.array(ann['bbox'], dtype=np.float64) |
|
box[:2] += box[2:] / 2 |
|
box[[0, 2]] /= w |
|
box[[1, 3]] /= h |
|
if box[2] <= 0 or box[3] <= 0: |
|
continue |
|
cls = coco80[ann['category_id'] - 1] \ |
|
if cls91to80 else ann['category_id'] - 1 |
|
box = [cls] + box.tolist() |
|
if box not in bboxes: |
|
bboxes.append(box) |
|
|
|
if use_segments: |
|
if len(ann['segmentation']) > 1: |
|
s = merge_multi_segment(ann['segmentation']) |
|
s = (np.concatenate(s, axis=0) / |
|
np.array([w, h])).reshape(-1).tolist() |
|
else: |
|
s = [j for i in ann['segmentation'] |
|
for j in i] |
|
s = (np.array(s).reshape(-1, 2) / |
|
np.array([w, h])).reshape(-1).tolist() |
|
s = [cls] + s |
|
if s not in segments: |
|
segments.append(s) |
|
|
|
|
|
with open((fn / f).with_suffix('.txt'), 'a') as file: |
|
for i in range(len(bboxes)): |
|
|
|
line = *(segments[i] if |
|
use_segments else bboxes[i]), |
|
file.write(('%g ' * len(line)). |
|
rstrip() % line + '\n') |
|
txt_file.close() |
|
|
|
|
|
def min_index(arr1, arr2): |
|
"""Find a pair of indexes with the shortest distance. |
|
Args: |
|
arr1: (N, 2). |
|
arr2: (M, 2). |
|
Return: |
|
a pair of indexes(tuple). |
|
""" |
|
dis = ((arr1[:, None, :] - arr2[None, :, :]) ** 2).sum(-1) |
|
return np.unravel_index(np.argmin(dis, axis=None), dis.shape) |
|
|
|
|
|
def merge_multi_segment(segments): |
|
"""Merge multi segments to one list. |
|
Find the coordinates with min distance between each segment, |
|
then connect these coordinates with one thin line to merge all |
|
segments into one. |
|
|
|
Args: |
|
segments(List(List)): original |
|
segmentations in coco's json file. |
|
like [segmentation1, segmentation2,...], |
|
each segmentation is a list of coordinates. |
|
""" |
|
s = [] |
|
segments = [np.array(i).reshape(-1, 2) for i in segments] |
|
idx_list = [[] for _ in range(len(segments))] |
|
|
|
|
|
for i in range(1, len(segments)): |
|
idx1, idx2 = min_index(segments[i - 1], segments[i]) |
|
idx_list[i - 1].append(idx1) |
|
idx_list[i].append(idx2) |
|
|
|
|
|
for k in range(2): |
|
|
|
if k == 0: |
|
for i, idx in enumerate(idx_list): |
|
|
|
|
|
if len(idx) == 2 and idx[0] > idx[1]: |
|
idx = idx[::-1] |
|
segments[i] = segments[i][::-1, :] |
|
|
|
segments[i] = np.roll(segments[i], -idx[0], axis=0) |
|
segments[i] = np.concatenate([segments[i], |
|
segments[i][:1]]) |
|
|
|
if i in [0, len(idx_list) - 1]: |
|
s.append(segments[i]) |
|
else: |
|
idx = [0, idx[1] - idx[0]] |
|
s.append(segments[i][idx[0]:idx[1] + 1]) |
|
|
|
else: |
|
for i in range(len(idx_list) - 1, -1, -1): |
|
if i not in [0, len(idx_list) - 1]: |
|
idx = idx_list[i] |
|
nidx = abs(idx[1] - idx[0]) |
|
s.append(segments[i][nidx:]) |
|
return s |
|
|
|
|
|
if __name__ == '__main__': |
|
convert_coco_json('coco/annotations', |
|
use_segments=False, |
|
cls91to80=True) |
|
|