{"payload":{"pageCount":3,"repositories":[{"type":"Public","name":"Dynamic-MDETR","owner":"MCG-NJU","isFork":false,"description":"[TPAMI 2024] Dynamic MDETR: A Dynamic Multimodal Transformer Decoder for Visual Grounding","allTopics":[],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":0,"issueCount":0,"starsCount":13,"forksCount":0,"license":"Apache License 2.0","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-09-11T05:00:24.549Z"}},{"type":"Public","name":"PRVG","owner":"MCG-NJU","isFork":false,"description":"[CVIU 2024] End-to-end dense video grounding via parallel regression","allTopics":[],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":0,"issueCount":0,"starsCount":6,"forksCount":0,"license":"Apache License 2.0","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-09-11T04:58:59.719Z"}},{"type":"Public","name":"BIVDiff","owner":"MCG-NJU","isFork":false,"description":"[CVPR 2024] BIVDiff: A Training-free Framework for General-Purpose Video Synthesis via Bridging Image and Video Diffusion Models","allTopics":[],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":0,"issueCount":0,"starsCount":49,"forksCount":1,"license":"Apache License 2.0","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-09-11T04:58:05.656Z"}},{"type":"Public","name":"VFIMamba","owner":"MCG-NJU","isFork":false,"description":"VFIMamba: Video Frame Interpolation with State Space Models ","allTopics":[],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":0,"issueCount":0,"starsCount":33,"forksCount":4,"license":"Apache License 2.0","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-09-03T12:30:13.041Z"}},{"type":"Public","name":"SPLAM","owner":"MCG-NJU","isFork":false,"description":"[ECCV 2024 Oral] SPLAM: Accelerating Image Generation with Sub-path Linear Approximation Model","allTopics":[],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":0,"issueCount":1,"starsCount":14,"forksCount":1,"license":"MIT License","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-08-22T10:07:05.664Z"}},{"type":"Public","name":"CoMAE","owner":"MCG-NJU","isFork":false,"description":"[AAAI 2023 Oral] CoMAE: Single Model Hybrid Pre-training on Small-Scale RGB-D Datasets","allTopics":["rgb-d","pre-training","aaai2023"],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":1,"issueCount":3,"starsCount":34,"forksCount":4,"license":null,"participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-08-20T03:51:52.023Z"}},{"type":"Public","name":"SparseOcc","owner":"MCG-NJU","isFork":false,"description":"[ECCV 2024] Fully Sparse 3D Occupancy Prediction & RayIoU Evaluation Metric","allTopics":[],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":1,"issueCount":16,"starsCount":224,"forksCount":16,"license":"Apache License 2.0","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-08-15T03:00:34.547Z"}},{"type":"Public","name":"MOTIP","owner":"MCG-NJU","isFork":false,"description":"Multiple Object Tracking as ID Prediction","allTopics":["tracking","computer","multi-object-tracking","multiple-object-tracking"],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":0,"issueCount":5,"starsCount":86,"forksCount":8,"license":"Apache License 2.0","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-08-14T14:14:23.066Z"}},{"type":"Public","name":"ProVP","owner":"MCG-NJU","isFork":false,"description":"[IJCV] Progressive Visual Prompt Learning with Contrastive Feature Re-formation","allTopics":[],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":0,"issueCount":0,"starsCount":3,"forksCount":0,"license":null,"participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-08-10T07:07:59.166Z"}},{"type":"Public","name":"CamLiFlow","owner":"MCG-NJU","isFork":false,"description":"[CVPR 2022 Oral & TPAMI 2023] Learning Optical Flow and Scene Flow with Bidirectional Camera-LiDAR Fusion","allTopics":["point-cloud","optical-flow","multimodal","scene-flow","cvpr2022"],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":0,"issueCount":1,"starsCount":221,"forksCount":21,"license":null,"participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-07-29T16:01:09.276Z"}},{"type":"Public","name":"ZeroI2V","owner":"MCG-NJU","isFork":false,"description":"[ECCV 2024] ZeroI2V: Zero-Cost Adaptation of Pre-trained Transformers from Image to Video","allTopics":[],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":0,"issueCount":0,"starsCount":12,"forksCount":0,"license":"Apache License 2.0","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-07-29T15:48:33.619Z"}},{"type":"Public","name":"LinK","owner":"MCG-NJU","isFork":false,"description":"[CVPR 2023] LinK: Linear Kernel for LiDAR-based 3D Perception","allTopics":["detection","point-cloud","segmentation","autonomous-driving","large-kernels","nuscenes","semantickitti"],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":0,"issueCount":3,"starsCount":83,"forksCount":6,"license":"MIT License","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-07-27T13:57:22.652Z"}},{"type":"Public","name":"VideoEval","owner":"MCG-NJU","isFork":false,"description":"VideoEval: Comprehensive Benchmark Suite for Low-Cost Evaluation of Video Foundation Model","allTopics":[],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":0,"issueCount":0,"starsCount":6,"forksCount":0,"license":null,"participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-07-10T08:53:39.120Z"}},{"type":"Public","name":"SGM-VFI","owner":"MCG-NJU","isFork":false,"description":"[CVPR 2024] Sparse Global Matching for Video Frame Interpolation with Large Motion","allTopics":[],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":0,"issueCount":0,"starsCount":59,"forksCount":5,"license":null,"participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-07-04T06:56:04.546Z"}},{"type":"Public","name":"ViT-TAD","owner":"MCG-NJU","isFork":false,"description":"[CVPR 2024] Adapting Short-Term Transformers for Action Detection in Untrimmed Videos","allTopics":[],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":0,"issueCount":0,"starsCount":7,"forksCount":0,"license":null,"participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-06-11T06:59:33.052Z"}},{"type":"Public","name":"AMD","owner":"MCG-NJU","isFork":false,"description":"[CVPR 2024] Asymmetric Masked Distillation for Pre-Training Small Foundation Models","allTopics":["action-recognition","video-understanding","distillation","self-supervised-learning","temporal-action-detection","foundation-models","small-models","cvpr2024"],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":0,"issueCount":0,"starsCount":11,"forksCount":1,"license":null,"participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-06-04T09:36:51.336Z"}},{"type":"Public","name":"SportsHHI","owner":"MCG-NJU","isFork":false,"description":"[CVPR 2024] SportsHHI: A Dataset for Human-Human Interaction Detection in Sports Videos","allTopics":[],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":0,"issueCount":0,"starsCount":11,"forksCount":1,"license":null,"participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-05-21T10:20:58.900Z"}},{"type":"Public","name":"MeMOTR","owner":"MCG-NJU","isFork":false,"description":"[ICCV 2023] MeMOTR: Long-Term Memory-Augmented Transformer for Multi-Object Tracking","allTopics":["tracking","computer-vision","deep-learning","multi-object-tracking"],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":0,"issueCount":5,"starsCount":142,"forksCount":9,"license":"MIT License","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-05-09T09:05:12.410Z"}},{"type":"Public","name":"MixFormerV2","owner":"MCG-NJU","isFork":false,"description":"[NeurIPS 2023] MixFormerV2: Efficient Fully Transformer Tracking","allTopics":[],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":0,"issueCount":24,"starsCount":136,"forksCount":18,"license":"MIT License","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-04-20T11:16:56.913Z"}},{"type":"Public","name":"LogN","owner":"MCG-NJU","isFork":false,"description":"[IJCV 2024] Logit Normalization for Long-Tail Object Detection","allTopics":[],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":0,"issueCount":0,"starsCount":4,"forksCount":0,"license":"Apache License 2.0","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-04-14T06:32:46.298Z"}},{"type":"Public","name":"SparseBEV","owner":"MCG-NJU","isFork":false,"description":"[ICCV 2023] SparseBEV: High-Performance Sparse 3D Object Detection from Multi-Camera Videos","allTopics":["transformer","autonomous-driving","3d-object-detection","bev-perception"],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":0,"issueCount":11,"starsCount":337,"forksCount":24,"license":"MIT License","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-03-31T16:01:21.015Z"}},{"type":"Public","name":"MixFormer","owner":"MCG-NJU","isFork":false,"description":"[CVPR 2022 Oral & TPAMI 2024] MixFormer: End-to-End Tracking with Iterative Mixed Attention ","allTopics":["tracking","vot","cvpr2022"],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":0,"issueCount":39,"starsCount":447,"forksCount":75,"license":"MIT License","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-02-28T09:50:34.314Z"}},{"type":"Public","name":"VLG","owner":"MCG-NJU","isFork":false,"description":"VLG: General Video Recognition with Web Textual Knowledge (https://arxiv.org/abs/2212.01638)","allTopics":["action-recognition","few-shot-recognition","open-set-recognition","long-tailed-recognition","video-language"],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":0,"issueCount":0,"starsCount":8,"forksCount":0,"license":null,"participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-01-18T07:23:38.330Z"}},{"type":"Public","name":"StageInteractor","owner":"MCG-NJU","isFork":false,"description":"[ICCV 2023] StageInteractor: Query-based Object Detector with Cross-stage Interaction","allTopics":[],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":0,"issueCount":0,"starsCount":9,"forksCount":1,"license":"Apache License 2.0","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2023-12-22T13:48:13.179Z"}},{"type":"Public","name":"VideoMAE","owner":"MCG-NJU","isFork":false,"description":"[NeurIPS 2022 Spotlight] VideoMAE: Masked Autoencoders are Data-Efficient Learners for Self-Supervised Video Pre-Training","allTopics":["transformer","video-understanding","mae","video-analysis","video-representation-learning","self-supervised-learning","masked-autoencoder","vision-transformer","video-transformer","neurips-2022","pytorch","action-recognition"],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":0,"issueCount":41,"starsCount":1330,"forksCount":133,"license":"Other","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2023-12-08T13:44:48.183Z"}},{"type":"Public","name":"DGN","owner":"MCG-NJU","isFork":false,"description":"[IJCV 2023] Dual Graph Networks for Pose Estimation in Crowded Scenes","allTopics":[],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":0,"issueCount":0,"starsCount":7,"forksCount":1,"license":null,"participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2023-12-05T07:27:30.896Z"}},{"type":"Public","name":"PointTAD","owner":"MCG-NJU","isFork":false,"description":"[NeurIPS 2022] PointTAD: Multi-Label Temporal Action Detection with Learnable Query Points","allTopics":["video-understanding","action-detection","temporal-action-detection","neurips-2022"],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":0,"issueCount":4,"starsCount":38,"forksCount":1,"license":"Apache License 2.0","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2023-11-24T07:53:22.838Z"}},{"type":"Public","name":"DEQDet","owner":"MCG-NJU","isFork":false,"description":"[ICCV 2023] Deep Equilibrium Object Detection","allTopics":[],"primaryLanguage":{"name":"Jupyter Notebook","color":"#DA5B0B"},"pullRequestCount":0,"issueCount":1,"starsCount":21,"forksCount":1,"license":null,"participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2023-11-14T17:17:18.948Z"}},{"type":"Public","name":"MGMAE","owner":"MCG-NJU","isFork":false,"description":"[ICCV 2023] MGMAE: Motion Guided Masking for Video Masked Autoencoding","allTopics":[],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":0,"issueCount":2,"starsCount":20,"forksCount":0,"license":"MIT License","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2023-10-16T06:54:18.255Z"}},{"type":"Public","name":"EVAD","owner":"MCG-NJU","isFork":false,"description":"[ICCV 2023] Efficient Video Action Detection with Token Dropout and Context Refinement","allTopics":["pytorch","transformer","action-recognition","action-detection","efficient-transformers"],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":0,"issueCount":4,"starsCount":25,"forksCount":3,"license":"Other","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2023-09-27T08:30:23.710Z"}}],"repositoryCount":64,"userInfo":null,"searchable":true,"definitions":[],"typeFilters":[{"id":"all","text":"All"},{"id":"public","text":"Public"},{"id":"source","text":"Sources"},{"id":"fork","text":"Forks"},{"id":"archived","text":"Archived"},{"id":"template","text":"Templates"}],"compactMode":false},"title":"MCG-NJU repositories"}