Dataset overview (#548)

* homepage Signed-off-by: lizz <lizz@sensetime.com> * Dataset overview Signed-off-by: lizz <lizz@sensetime.com> * Fix Signed-off-by: lizz <lizz@sensetime.com>
open-mmlab · Jan 18, 2021 · afd7cc9 · afd7cc9
1 parent e1cd981
commit afd7cc9
Show file tree

Hide file tree

Showing 20 changed files with 138 additions and 23 deletions.
diff --git a/docs/index.rst b/docs/index.rst
@@ -13,6 +13,7 @@ Welcome to MMAction2's documentation!
    :maxdepth: 2
    :caption: Datasets
 
+   datasets.md
    data_preparation.md
    supported_datasets.md
 

diff --git a/docs/merge_docs.sh b/docs/merge_docs.sh
@@ -34,7 +34,7 @@ sed -i 's/md###t/html#t/g' detection_models.md
 sed -i "s/md###t/html#t/g" demo.md
 
 sed -i 's/# Preparing/# /g' prepare_data.md
-sed -i 's/#/##&/' prepare_data.md
+sed -i 's/#/#&/' prepare_data.md
 
 sed -i '1i\# Action Localization Models' localization_models.md
 sed -i '1i\# Action Recognition Models' recognition_models.md

diff --git a/docs/stat.py b/docs/stat.py
@@ -10,7 +10,7 @@
 
 def anchor(name):
     return re.sub(r'-+', '-', re.sub(r'[^a-zA-Z0-9]', '-',
-                                     name.strip().lower()))
+                                     name.strip().lower())).strip('-')
 
 
 # Count algorithms
@@ -85,8 +85,80 @@ def anchor(name):
 * Number of papers: {len(allpapers)}
 {countstr}
 
+For supported datasets, see [datasets overview](datasets.md).
+
 {msglist}
 """
 
 with open('modelzoo.md', 'w') as f:
     f.write(modelzoo)
+
+# Count datasets
+
+files = ['supported_datasets.md']
+# files = sorted(glob.glob('docs/tasks/*.md'))
+
+datastats = []
+
+for f in files:
+    with open(f, 'r') as content_file:
+        content = content_file.read()
+
+    # title
+    title = content.split('\n')[0].replace('#', '')
+
+    # count papers
+    papers = set(
+        (papertype, titlecase.titlecase(paper.lower().strip()))
+        for (papertype, paper) in re.findall(
+            r'\[([A-Z]*?)\]\s*\n.*?\btitle\s*=\s*{(.*?)}', content, re.DOTALL))
+    # paper links
+    revcontent = '\n'.join(list(reversed(content.splitlines())))
+    paperlinks = {}
+    for _, p in papers:
+        print(p)
+        q = p.replace('\\', '\\\\').replace('?', '\\?')
+        paperlinks[p] = ', '.join(
+            (f'[{p.strip()} ⇨]({splitext(basename(f))[0]}.html#{anchor(p)})'
+             for p in re.findall(
+                 rf'\btitle\s*=\s*{{\s*{q}\s*}}.*?\n## (.*?)\s*[,;]?\s*\n',
+                 revcontent, re.DOTALL | re.IGNORECASE)))
+        print('   ', paperlinks[p])
+    paperlist = '\n'.join(
+        sorted(f'    - [{t}] {x} ({paperlinks[x]})' for t, x in papers))
+
+    statsmsg = f"""
+## [{title}]({f})
+
+* Number of papers: {len(papers)}
+{paperlist}
+
+    """
+
+    datastats.append((papers, configs, ckpts, statsmsg))
+
+alldatapapers = func.reduce(lambda a, b: a.union(b),
+                            [p for p, _, _, _ in datastats])
+
+# Summarize
+
+msglist = '\n'.join(x for _, _, _, x in stats)
+datamsglist = '\n'.join(x for _, _, _, x in datastats)
+papertypes, papercounts = np.unique([t for t, _ in alldatapapers],
+                                    return_counts=True)
+countstr = '\n'.join(
+    [f'   - {t}: {c}' for t, c in zip(papertypes, papercounts)])
+
+modelzoo = f"""
+# Overview
+
+* Number of papers: {len(alldatapapers)}
+{countstr}
+
+For supported action algorithms, see [modelzoo overview](modelzoo.md).
+
+{datamsglist}
+"""
+
+with open('datasets.md', 'w') as f:
+    f.write(modelzoo)
diff --git a/docs/supported_datasets.md b/docs/supported_datasets.md
@@ -1,26 +1,26 @@
 # Supported Datasets
 
 - Action Recognition
-  - [UCF101](https://www.crcv.ucf.edu/research/data-sets/ucf101/): See [preparing_ucf101](/tools/data/ucf101/README.md).
-  - [HMDB51](https://serre-lab.clps.brown.edu/resource/hmdb-a-large-human-motion-database/): See [preparing_hmdb51](/tools/data/hmdb51/README.md).
-  - [Kinetics-[400/600/700]](https://deepmind.com/research/open-source/kinetics): See [preparing_kinetics](/tools/data/kinetics/README.md)
-  - [Something-Something V1](https://20bn.com/datasets/something-something/v1): See [preparing_sthv1](/tools/data/sthv1/README.md)
-  - [Something-Something V2](https://20bn.com/datasets/something-something): See [preparing_sthv2](/tools/data/sthv2/README.md)
-  - [Moments in Time](http://moments.csail.mit.edu/): See [preparing_mit](/tools/data/mit/README.md)
-  - [Multi-Moments in Time](http://moments.csail.mit.edu/challenge_iccv_2019.html): See [preparing_mmit](/tools/data/mmit/README.md)
-  - [HVU](https://github.com/holistic-video-understanding/HVU-Dataset): See [preparing_hvu](/tools/data/hvu/README.md)
-  - [Jester](https://20bn.com/datasets/jester/v1): See [preparing_jester](/tools/data/jester/README.md)
-  - [GYM](https://sdolivia.github.io/FineGym/): See [preparing_gym](/tools/data/gym/README.md)
-  - [ActivityNet](http://activity-net.org/): See [praparing_activitynet](/tools/data/activitynet/README.md)
+  - [UCF101](/tools/data/ucf101/README.md) \[ [Homepage](https://www.crcv.ucf.edu/research/data-sets/ucf101/) \].
+  - [HMDB51](/tools/data/hmdb51/README.md) \[ [Homepage](https://serre-lab.clps.brown.edu/resource/hmdb-a-large-human-motion-database/) \].
+  - [Kinetics-[400/600/700]](/tools/data/kinetics/README.md) \[ [Homepage](https://deepmind.com/research/open-source/kinetics) \]
+  - [Something-Something V1](/tools/data/sthv1/README.md) \[ [Homepage](https://20bn.com/datasets/something-something/v1) \]
+  - [Something-Something V2](/tools/data/sthv2/README.md) \[ [Homepage](https://20bn.com/datasets/something-something) \]
+  - [Moments in Time](/tools/data/mit/README.md) \[ [Homepage](http://moments.csail.mit.edu/) \]
+  - [Multi-Moments in Time](/tools/data/mmit/README.md) \[ [Homepage](http://moments.csail.mit.edu/challenge_iccv_2019.html) \]
+  - [HVU](/tools/data/hvu/README.md) \[ [Homepage](https://github.com/holistic-video-understanding/HVU-Dataset) \]
+  - [Jester](/tools/data/jester/README.md) \[ [Homepage](https://20bn.com/datasets/jester/v1) \]
+  - [GYM](/tools/data/gym/README.md) \[ [Homepage](https://sdolivia.github.io/FineGym/) \]
+  - [ActivityNet](/tools/data/activitynet/README.md) \[ [Homepage](http://activity-net.org/) \]
 
 - Temporal Action Detection
-  - [ActivityNet](http://activity-net.org/): See [praparing_activitynet](/tools/data/activitynet/README.md)
-  - [THUMOS14](https://www.crcv.ucf.edu/THUMOS14/download.html): See [preparing_thumos14](/tools/data/thumos14/README.md)
+  - [ActivityNet](/tools/data/activitynet/README.md) \[ [Homepage](http://activity-net.org/) \]
+  - [THUMOS14](/tools/data/thumos14/README.md) \[ [Homepage](https://www.crcv.ucf.edu/THUMOS14/download.html) \]
 
 - Spatial Temporal Action Detection
-  - [AVA](https://research.google.com/ava/index.html): See [preparing_ava](/tools/data/ava/README.md)
-  - [UCF101-24](http://www.thumos.info/download.html): See [preparing_ucf101_24](/tools/data/ucf101_24/README.md)
-  - [JHMDB](http://jhmdb.is.tue.mpg.de/): See [preparing_jhmdb](/tools/data/jhmdb/README.md)
+  - [AVA](/tools/data/ava/README.md) \[ [Homepage](https://research.google.com/ava/index.html) \]
+  - [UCF101-24](/tools/data/ucf101_24/README.md) \[ [Homepage](http://www.thumos.info/download.html) \]
+  - [JHMDB](/tools/data/jhmdb/README.md) \[ [Homepage](http://jhmdb.is.tue.mpg.de/) \]
 
 The supported datasets are listed above.
 We provide shell scripts for data preparation under the path `$MMACTION2/tools/data/`.

diff --git a/tools/data/activitynet/README.md b/tools/data/activitynet/README.md
@@ -2,6 +2,8 @@
 
 ## Introduction
 
+[DATASET]
+
 ```
 @article{Heilbron2015ActivityNetAL,
   title={ActivityNet: A large-scale video benchmark for human activity understanding},

diff --git a/tools/data/ava/README.md b/tools/data/ava/README.md
@@ -1,5 +1,17 @@
 # Preparing AVA
 
+[DATASET]
+
+```latex
+@inproceedings{gu2018ava,
+  title={Ava: A video dataset of spatio-temporally localized atomic visual actions},
+  author={Gu, Chunhui and Sun, Chen and Ross, David A and Vondrick, Carl and Pantofaru, Caroline and Li, Yeqing and Vijayanarasimhan, Sudheendra and Toderici, George and Ricco, Susanna and Sukthankar, Rahul and others},
+  booktitle={Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition},
+  pages={6047--6056},
+  year={2018}
+}
+```
+
 For basic dataset information, please refer to the official [website](https://research.google.com/ava/index.html).
 Before we start, please make sure that the directory is located at `$MMACTION2/tools/data/ava/`.
 

diff --git a/tools/data/gym/README.md b/tools/data/gym/README.md
@@ -2,6 +2,8 @@
 
 ## Introduction
 
+[DATASET]
+
 ```
 @inproceedings{shao2020finegym,
   title={Finegym: A hierarchical video dataset for fine-grained action understanding},

diff --git a/tools/data/hmdb51/README.md b/tools/data/hmdb51/README.md
@@ -2,6 +2,8 @@
 
 ## Introduction
 
+[DATASET]
+
 ```
 @article{Kuehne2011HMDBAL,
   title={HMDB: A large video database for human motion recognition},

diff --git a/tools/data/hvu/README.md b/tools/data/hvu/README.md
@@ -2,6 +2,8 @@
 
 ## Introduction
 
+[DATASET]
+
 ```
 @article{Diba2019LargeSH,
   title={Large Scale Holistic Video Understanding},

diff --git a/tools/data/jester/README.md b/tools/data/jester/README.md
@@ -2,6 +2,8 @@
 
 ## Introduction
 
+[DATASET]
+
 ```
 @InProceedings{Materzynska_2019_ICCV,
   author = {Materzynska, Joanna and Berger, Guillaume and Bax, Ingo and Memisevic, Roland},

diff --git a/tools/data/jhmdb/README.md b/tools/data/jhmdb/README.md
@@ -2,6 +2,8 @@
 
 ## Introduction
 
+[DATASET]
+
 ```
 @inproceedings{Jhuang:ICCV:2013,
     title = {Towards understanding action recognition},

diff --git a/tools/data/kinetics/README.md b/tools/data/kinetics/README.md
@@ -2,6 +2,8 @@
 
 ## Introduction
 
+[DATASET]
+
 ```
 @inproceedings{inproceedings,
   author = {Carreira, J. and Zisserman, Andrew},

diff --git a/tools/data/mit/README.md b/tools/data/mit/README.md
@@ -2,6 +2,8 @@
 
 ## Introduction
 
+[DATASET]
+
 ```
 @article{monfortmoments,
     title={Moments in Time Dataset: one million videos for event understanding},

diff --git a/tools/data/mmit/README.md b/tools/data/mmit/README.md
@@ -2,6 +2,8 @@
 
 ## Introduction
 
+[DATASET]
+
 ```
 @misc{monfort2019multimoments,
     title={Multi-Moments in Time: Learning and Interpreting Models for Multi-Action Video Understanding},

diff --git a/tools/data/omnisource/README.md b/tools/data/omnisource/README.md
@@ -1,7 +1,9 @@
-## Preparing OmniSource
+# Preparing OmniSource
 
 ## Introduction
 
+[DATASET]
+
 ```
 @article{duan2020omni,
   title={Omni-sourced Webly-supervised Learning for Video Recognition},

diff --git a/tools/data/sthv1/README.md b/tools/data/sthv1/README.md
@@ -2,6 +2,8 @@
 
 ## Introduction
 
+[DATASET]
+
 ```
 @misc{goyal2017something,
       title={The "something something" video database for learning and evaluating visual common sense},

diff --git a/tools/data/sthv2/README.md b/tools/data/sthv2/README.md
@@ -2,6 +2,8 @@
 
 ## Introduction
 
+[DATASET]
+
 ```
 @misc{goyal2017something,
       title={The "something something" video database for learning and evaluating visual common sense},

diff --git a/tools/data/thumos14/README.md b/tools/data/thumos14/README.md
@@ -2,12 +2,14 @@
 
 ## Introduction
 
+[DATASET]
+
 ```
 @misc{THUMOS14,
-    author = "Jiang, Y.-G. and Liu, J. and Roshan Zamir, A. and Toderici, G. and Laptev,
-    I. and Shah, M. and Sukthankar, R.",
-    title = "{THUMOS} Challenge: Action Recognition with a Large
-    Number of Classes",
+    author = {Jiang, Y.-G. and Liu, J. and Roshan Zamir, A. and Toderici, G. and Laptev,
+    I. and Shah, M. and Sukthankar, R.},
+    title = {{THUMOS} Challenge: Action Recognition with a Large
+    Number of Classes},
     howpublished = "\url{http://crcv.ucf.edu/THUMOS14/}",
     Year = {2014}
 }

diff --git a/tools/data/ucf101/README.md b/tools/data/ucf101/README.md
@@ -2,6 +2,8 @@
 
 ## Introduction
 
+[DATASET]
+
 ```
 @article{Soomro2012UCF101AD,
   title={UCF101: A Dataset of 101 Human Actions Classes From Videos in The Wild},

diff --git a/tools/data/ucf101_24/README.md b/tools/data/ucf101_24/README.md
@@ -2,6 +2,8 @@
 
 ## Introduction
 
+[DATASET]
+
 ```
 @article{Soomro2012UCF101AD,
   title={UCF101: A Dataset of 101 Human Actions Classes From Videos in The Wild},