Skip to content
New issue

Have a question about this project? # for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “#”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? # to your account

webcam_demo.py #1812

Closed
ChenZhenGui opened this issue Nov 17, 2022 · 4 comments
Closed

webcam_demo.py #1812

ChenZhenGui opened this issue Nov 17, 2022 · 4 comments
Assignees

Comments

@ChenZhenGui
Copy link
Contributor

I used my trained model to run the camera demo. When no one was detected, everything was normal. When someone was detected, the error was as follows
1668671594181
I tried to print the texts:
1668671653532

@Ben-Louis
Copy link
Collaborator

Hi, thanks for using MMPose. It seems like a problem with the format of dataset metainfo. Could you please provide the information about the configs (webcam & pose estimator)? They will help us find out where the problem lies.

@ChenZhenGui
Copy link
Contributor Author

yeah this is my config:
base = ['../../../base/default_runtime.py',]
channel_cfg = dict(
num_output_channels=17,
dataset_joints=17,
dataset_channel=[
[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16],
],
inference_channel=[
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
])

runtime

train_cfg = dict(max_epochs=300, val_interval=50)

optimizer

optim_wrapper = dict(optimizer=dict(
type='AdamW',
lr=5e-3,
))

#resume = True

#load_from = None

log_config = dict(
interval=50,
hooks=[
dict(type='TextLoggerHook'),
dict(type='TensorboardLoggerHook')
])

learning policy

param_scheduler = [
dict(
type='LinearLR', begin=0, end=500, start_factor=0.001,
by_epoch=False), # warm-up
dict(
type='MultiStepLR',
begin=0,
end=300,
milestones=[170, 260],
gamma=0.1,
by_epoch=True)
]

automatically scaling LR based on the actual training batch size

auto_scale_lr = dict(base_batch_size=512)

hooks

default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater'))

codec settings

codec = dict(
type='MSRAHeatmap', input_size=(192, 256), heatmap_size=(48, 64), sigma=2)

model settings

model = dict(
type='TopdownPoseEstimator',
data_preprocessor=dict(
type='PoseDataPreprocessor',
mean=[123.675, 116.28, 103.53],
std=[58.395, 57.12, 57.375],
bgr_to_rgb=True),
backbone=dict(
type='MyMobileViT',
model_cfg={'layer1': {'out_channels': 32, 'expand_ratio': 4, 'num_blocks': 1, 'stride': 1, 'block_type': 'mv2', 'dropout': 0.1, 'ffn_dropout': 0.0, 'attn_dropout': 0.0}, 'layer2': {'out_channels': 64, 'expand_ratio': 4, 'num_blocks': 3, 'stride': 2, 'block_type': 'mv2', 'dropout': 0.1, 'ffn_dropout': 0.0, 'attn_dropout': 0.0}, 'layer3': {'out_channels': 96, 'transformer_channels': 144, 'ffn_dim': 288, 'transformer_blocks': 2, 'patch_h': 2, 'patch_w': 2, 'stride': 2, 'mv_expand_ratio': 4, 'num_heads': 4, 'block_type': 'mobilevit', 'dropout': 0.1, 'ffn_dropout': 0.0, 'attn_dropout': 0.0}, 'layer4': {'out_channels': 128, 'transformer_channels': 192, 'ffn_dim': 384, 'transformer_blocks': 4, 'patch_h': 2, 'patch_w': 2, 'stride': 2, 'mv_expand_ratio': 4, 'num_heads': 4, 'block_type': 'mobilevit', 'dropout': 0.1, 'ffn_dropout': 0.0, 'attn_dropout': 0.0}, 'layer5': {'out_channels': 160, 'transformer_channels': 240, 'ffn_dim': 480, 'transformer_blocks': 3, 'patch_h': 2, 'patch_w': 2, 'stride': 2, 'mv_expand_ratio': 4, 'num_heads': 4, 'block_type': 'mobilevit', 'dropout': 0.1, 'ffn_dropout': 0.0, 'attn_dropout': 0.0}, 'last_layer_exp_factor': 4, 'cls_dropout': 0.1},
),
head=dict(
type='TopdownHeatmapSimpleHead',
in_channels=640,
out_channels=channel_cfg['num_output_channels'],
loss_keypoint=dict(type='KeypointMSELoss', use_target_weight=True),
decoder=codec),
test_cfg=dict(
flip_test=True,
flip_mode='heatmap',
shift_heatmap=True,
output_heatmaps=True
))

base dataset settings

dataset_type = 'CocoDataset'
data_mode = 'topdown'
data_root = '/data/zgchen/ViTPose/tools/data/coco/'

pipelines

train_pipeline = [
dict(type='LoadImage', file_client_args={{base.file_client_args}}),
dict(type='GetBBoxCenterScale'),
dict(type='RandomFlip', direction='horizontal'),
dict(type='RandomHalfBody'),
dict(type='RandomBBoxTransform'),
dict(type='TopdownAffine', input_size=codec['input_size']),
dict(type='GenerateTarget', target_type='heatmap', encoder=codec),
dict(type='PackPoseInputs')
]
val_pipeline = [
dict(type='LoadImage', file_client_args={{base.file_client_args}}),
dict(type='GetBBoxCenterScale',padding=1.5),
dict(type='TopdownAffine', input_size=codec['input_size']),
dict(type='PackPoseInputs'),
]

data loaders

train_dataloader = dict(
batch_size=64,
num_workers=4,
persistent_workers=True,
sampler=dict(type='DefaultSampler', shuffle=True),
dataset=dict(
type=dataset_type,
data_root=data_root,
data_mode=data_mode,
ann_file='annotations/person_keypoints_train2017.json',
data_prefix=dict(img='train2017/'),
pipeline=train_pipeline,
))
val_dataloader = dict(
batch_size=64,
num_workers=4,
persistent_workers=True,
drop_last=False,
sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
dataset=dict(
type=dataset_type,
data_root=data_root,
data_mode=data_mode,
ann_file='annotations/person_keypoints_val2017.json',
bbox_file='/data/zgchen/ViTPose/tools/data/coco/person_detection_results/'
'COCO_val2017_detections_AP_H_56_person.json',
data_prefix=dict(img='val2017/'),
test_mode=True,
pipeline=val_pipeline,
))
test_dataloader = val_dataloader

evaluators

val_evaluator = dict(
type='CocoMetric',
ann_file=data_root + 'annotations/person_keypoints_val2017.json')
test_evaluator = val_evaluator

@ChenZhenGui
Copy link
Contributor Author

and this is the cam_cfg:

Copyright (c) OpenMMLab. All rights reserved.

executor_cfg = dict(
# Basic configurations of the executor
name='Pose Estimation',
camera_id=0,
# Define nodes.
# The configuration of a node usually includes:
# 1. 'type': Node class name
# 2. 'name': Node name
# 3. I/O buffers (e.g. 'input_buffer', 'output_buffer'): specify the
# input and output buffer names. This may depend on the node class.
# 4. 'enable_key': assign a hot-key to toggle enable/disable this node.
# This may depend on the node class.
# 5. Other class-specific arguments
nodes=[
# 'DetectorNode':
# This node performs object detection from the frame image using an
# MMDetection model.
dict(
type='DetectorNode',
name='detector',
model_config='D://pythonProject//mmpose//demo//mmdetection_cfg//'
'ssdlite_mobilenetv2-scratch_8xb24-600e_coco.py',
model_checkpoint='https://download.openmmlab.com'
'/mmdetection/v2.0/ssd/'
'ssdlite_mobilenetv2_scratch_600e_coco/ssdlite_mobilenetv2_'
'scratch_600e_coco_20210629_110627-974d9307.pth',
input_buffer='input', # _input_ is an executor-reserved buffer
output_buffer='det_result'),
# 'TopDownPoseEstimatorNode':
# This node performs keypoint detection from the frame image using an
# MMPose top-down model. Detection results is needed.
dict(
type='TopDownPoseEstimatorNode',
name='human pose estimator',
model_config='D:/pythonProject/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/mobilevit_coco-256x192.py',
model_checkpoint="D:/pythonProject/mmpose/work_dirs/AP_epoch_300.pth",
labels=['person'],
input_buffer='det_result',
output_buffer='human_pose'),
dict(
type='TopDownPoseEstimatorNode',
name='animal pose estimator',
model_config='configs/animal_2d_keypoint/topdown_heatmap/'
'animalpose/td-hm_hrnet-w32_8xb64-210e_animalpose-256x256.py',
model_checkpoint='https://download.openmmlab.com/mmpose/animal/'
'hrnet/hrnet_w32_animalpose_256x256-1aa7f075_20210426.pth',
labels=['cat', 'dog', 'horse', 'sheep', 'cow'],
input_buffer='human_pose',
output_buffer='animal_pose'),
# 'ObjectAssignerNode':
# This node binds the latest model inference result with the current
# frame. (This means the frame image and inference result may be
# asynchronous).
dict(
type='ObjectAssignerNode',
name='object assigner',
frame_buffer='frame', # _frame_ is an executor-reserved buffer
object_buffer='animal_pose',
output_buffer='frame'),
# 'ObjectVisualizerNode':
# This node draw the pose visualization result in the frame image.
# Pose results is needed.
dict(
type='ObjectVisualizerNode',
name='object visualizer',
enable_key='v',
enable=True,
show_bbox=True,
must_have_keypoint=False,
show_keypoint=True,
input_buffer='frame',
output_buffer='vis'),
# 'SunglassesNode':
# This node draw the sunglasses effect in the frame image.
# Pose results is needed.
dict(
type='SunglassesEffectNode',
name='sunglasses',
enable_key='s',
enable=False,
input_buffer='vis',
output_buffer='vis_sunglasses'),
# # 'BigeyeEffectNode':
# # This node draw the big-eye effetc in the frame image.
# # Pose results is needed.
dict(
type='BigeyeEffectNode',
name='big-eye',
enable_key='b',
enable=False,
input_buffer='vis_sunglasses',
output_buffer='vis_bigeye'),
# 'NoticeBoardNode':
# This node show a notice board with given content, e.g. help
# information.
dict(
type='NoticeBoardNode',
name='instruction',
enable_key='h',
enable=True,
input_buffer='vis_bigeye',
output_buffer='vis_notice',
content_lines=[
'This is a demo for pose visualization and simple image '
'effects. Have fun!', '', 'Hot-keys:',
'"v": Pose estimation result visualization',
'"s": Sunglasses effect B-)', '"b": Big-eye effect 0_0',
'"h": Show help information',
'"m": Show diagnostic information', '"q": Exit'
],
),
# 'MonitorNode':
# This node show diagnostic information in the frame image. It can
# be used for debugging or monitoring system resource status.
dict(
type='MonitorNode',
name='monitor',
enable_key='m',
enable=False,
input_buffer='vis_notice',
output_buffer='display'),
# 'RecorderNode':
# This node save the output video into a file.
dict(
type='RecorderNode',
name='recorder',
out_video_file='webcam_demo.mp4',
input_buffer='display',
output_buffer='display'
# _display_ is an executor-reserved buffer
)
])

@Ben-Louis
Copy link
Collaborator

This bug is caused by the distinct format of metainfo in MMPose and MMDetection. Thank you very much for pointing it out. You can modify your code as in #1813 to fix this bug.

@Tau-J Tau-J closed this as completed Apr 13, 2023
# for free to join this conversation on GitHub. Already have an account? # to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

3 participants