@inproceedings{9f8576662c3840a0b89ddba1ac78132b,
title = "PoseTED: A Novel Regression-Based Technique for Recognizing Multiple Pose Instances",
abstract = "Pose estimation for multiple people can be viewed as a hierarchical set predicting challenge. Algorithms are needed to classify all persons according to their physical components appropriately. Pose estimation methods are divided into two categories: (1) heatmap-based, (2) regression-based. Heatmap-based techniques are susceptible to various heuristic designs and are not end-to-end trainable, while regression-based methods involve fewer intermediary non-differentiable stages. This paper presents a novel regression-based multi-instance human pose recognition network called PoseTED. It utilizes the well-known object detector YOLOv4 for person detection, and the spatial transformer network (STN) used as a cropping filter. After that, we used a CNN-based backbone that extracts deep features and positional encoding with an encoder-decoder transformer applied for keypoint detection, solving the heuristic design problem before regression-based techniques and increasing overall performance. A prediction-based feed-forward network (FFN) is used to predict several key locations{\textquoteright} posture as a group and display the body components as an output. Two available public datasets are tested in this experiment. Experimental results are shown on the COCO and MPII datasets, with an average precision (AP) of 73.7% on the COCO val. dataset, 72.7% on the COCO test dev. dataset, and 89.7% on the MPII datasets, respectively. These results are comparable to the state-of-the-art methods.",
keywords = "FFN, Keypoints estimation, Person detection, Pose recognition, STN, Transformer encoder-decoder",
author = "Jeny, {Afsana Ahsan} and Junayed, {Masum Shah} and Islam, {Md Baharul}",
note = "Publisher Copyright: {\textcopyright} 2021, Springer Nature Switzerland AG.; 16th International Symposium on Visual Computing, ISVC 2021 ; Conference date: 04-10-2021 Through 06-10-2021",
year = "2021",
doi = "10.1007/978-3-030-90439-5_45",
language = "English",
isbn = "9783030904388",
series = "Lecture Notes in Computer Science (including subseries Lecture Notes in Artificial Intelligence and Lecture Notes in Bioinformatics)",
publisher = "Springer Science and Business Media Deutschland GmbH",
pages = "573--585",
editor = "George Bebis and Vassilis Athitsos and Tong Yan and Manfred Lau and Frederick Li and Conglei Shi and Xiaoru Yuan and Christos Mousas and Gerd Bruder",
booktitle = "Advances in Visual Computing - 16th International Symposium, ISVC 2021, Proceedings",
}