yolov3 / general_json2yolo.py

init model

32865f3 11 months ago

6.7 kB

	import numpy as np
	from tqdm import tqdm
	from pathlib import Path
	import json
	from collections import defaultdict
	import sys
	import pathlib

	CURRENT_DIR = pathlib.Path(__file__).parent
	sys.path.append(str(CURRENT_DIR))


	def make_dirs(path='coco'):
	# Create folders
	path = Path(path)
	for p in [path / 'labels']:
	p.mkdir(parents=True, exist_ok=True) # make dir
	return path


	def coco91_to_coco80_class(): # converts 80-index (val2014) to 91-index (paper)
	# https://tech.amikelive.com/node-718/what-object-categories-labels-are-in-coco-dataset/
	x = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, None, 11, 12, 13, 14, 15, 16, 17,
	18, 19, 20, 21, 22, 23, None, 24, 25, None, None, 26, 27, 28, 29,
	30, 31, 32, 33, 34, 35, 36, 37, 38, 39, None, 40, 41, 42, 43, 44,
	45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, None,
	60, None, None, 61, None, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71,
	72, None, 73, 74, 75, 76, 77, 78, 79, None]
	return x


	def convert_coco_json(
	json_dir='coco/annotations/',
	use_segments=False,
	cls91to80=False):
	save_dir = make_dirs() # output directory
	coco80 = coco91_to_coco80_class()
	"""Convert raw COCO dataset to YOLO style
	"""

	# Import json
	for json_file in sorted(Path(json_dir).resolve().glob('instances_val2017.json')):
	fn = Path(save_dir) / 'labels' / \
	json_file.stem.replace('instances_', '') # folder name
	fn.mkdir()
	with open(json_file) as f:
	data = json.load(f)

	# Create image dict
	images = {'%g' % x['id']: x for x in data['images']}
	# Create image-annotations dict
	imgToAnns = defaultdict(list)
	for ann in data['annotations']:
	imgToAnns[ann['image_id']].append(ann)

	txt_file = open(Path(save_dir / 'val2017').
	with_suffix('.txt'), 'a')
	# Write labels file
	for img_id, anns in tqdm(
	imgToAnns.items(), desc=f'Annotations {json_file}'):
	img = images['%g' % img_id]
	h, w, f = img['height'], img['width'], img['file_name']
	bboxes = []
	segments = []

	txt_file.write(
	'./images/' + '/'.
	join(img['coco_url'].split('/')[-2:]) + '\n')
	for ann in anns:
	if ann['iscrowd']:
	continue
	# The COCO box format is
	# [top left x, top left y, width,
	# height]
	box = np.array(ann['bbox'], dtype=np.float64)
	box[:2] += box[2:] / 2 # xy top-left corner to center
	box[[0, 2]] /= w # normalize x
	box[[1, 3]] /= h # normalize y
	if box[2] <= 0 or box[3] <= 0: # if w <= 0 and h <= 0
	continue
	cls = coco80[ann['category_id'] - 1] \
	if cls91to80 else ann['category_id'] - 1 # class
	box = [cls] + box.tolist()
	if box not in bboxes:
	bboxes.append(box)
	# Segments
	if use_segments:
	if len(ann['segmentation']) > 1:
	s = merge_multi_segment(ann['segmentation'])
	s = (np.concatenate(s, axis=0) /
	np.array([w, h])).reshape(-1).tolist()
	else:
	s = [j for i in ann['segmentation']
	for j in i] # all segments concatenated
	s = (np.array(s).reshape(-1, 2) /
	np.array([w, h])).reshape(-1).tolist()
	s = [cls] + s
	if s not in segments:
	segments.append(s)

	# Write
	with open((fn / f).with_suffix('.txt'), 'a') as file:
	for i in range(len(bboxes)):
	# cls, box or segments
	line = *(segments[i] if
	use_segments else bboxes[i]),
	file.write(('%g ' * len(line)).
	rstrip() % line + '\n')
	txt_file.close()


	def min_index(arr1, arr2):
	"""Find a pair of indexes with the shortest distance.
	Args:
	arr1: (N, 2).
	arr2: (M, 2).
	Return:
	a pair of indexes(tuple).
	"""
	dis = ((arr1[:, None, :] - arr2[None, :, :]) ** 2).sum(-1)
	return np.unravel_index(np.argmin(dis, axis=None), dis.shape)


	def merge_multi_segment(segments):
	"""Merge multi segments to one list.
	Find the coordinates with min distance between each segment,
	then connect these coordinates with one thin line to merge all
	segments into one.

	Args:
	segments(List(List)): original
	segmentations in coco's json file.
	like [segmentation1, segmentation2,...],
	each segmentation is a list of coordinates.
	"""
	s = []
	segments = [np.array(i).reshape(-1, 2) for i in segments]
	idx_list = [[] for _ in range(len(segments))]

	# record the indexes with min distance between each segment
	for i in range(1, len(segments)):
	idx1, idx2 = min_index(segments[i - 1], segments[i])
	idx_list[i - 1].append(idx1)
	idx_list[i].append(idx2)

	# use two round to connect all the segments
	for k in range(2):
	# forward connection
	if k == 0:
	for i, idx in enumerate(idx_list):
	# middle segments have two indexes
	# reverse the index of middle segments
	if len(idx) == 2 and idx[0] > idx[1]:
	idx = idx[::-1]
	segments[i] = segments[i][::-1, :]

	segments[i] = np.roll(segments[i], -idx[0], axis=0)
	segments[i] = np.concatenate([segments[i],
	segments[i][:1]])
	# deal with the first segment and the last one
	if i in [0, len(idx_list) - 1]:
	s.append(segments[i])
	else:
	idx = [0, idx[1] - idx[0]]
	s.append(segments[i][idx[0]:idx[1] + 1])

	else:
	for i in range(len(idx_list) - 1, -1, -1):
	if i not in [0, len(idx_list) - 1]:
	idx = idx_list[i]
	nidx = abs(idx[1] - idx[0])
	s.append(segments[i][nidx:])
	return s


	if __name__ == '__main__':
	convert_coco_json('coco/annotations',
	use_segments=False,
	cls91to80=True)