FrenchKrab · hbredin · Jan 27, 2023 · Apr 17, 2023 · Apr 18, 2023 · Apr 18, 2023
diff --git a/README.md b/README.md
@@ -5,6 +5,7 @@ This repository aims to centralize scripts that prepare datasets to be used with
 Currently available : 
 - [AISHELL4](aishell4)
 - [MSDWild](msdwild)
+- [VoxCeleb](voxceleb)
 
 To setup each dataset, refer to the `README.md` contained in their respective folder.
 

diff --git a/voxceleb/README.md b/voxceleb/README.md
@@ -0,0 +1,28 @@
+# VoxCeleb for pyannote.audio
+
+## Usage
+
+```python
+from pyannote.database import registry
+registry.load_database("database.yml")
+
+protocol = registry.get_protocol("VoxCeleb.SpeakerVerification.VoxCeleb")
+
+# iterate on all VoxCeleb1 dev (but speakers whose names starts with U, V, or W) and on all VoxCeleb2 dev
+for file in protocol.train():
+    pass
+
+# iterate on all VoxCeleb1 dev speakers whoe names starts with U, V, or W
+for file in protocol.development():
+    pass
+
+# iterate over target/non-target trials between the above UVW speakers
+for trial in protocol.development_trial():
+    pass
+
+# iterate over target/non-target trials defined VoxCeleb1 "original"
+for trial in protocol.test_trial():
+    pass
+```
+
+
diff --git a/voxceleb/database.yml b/voxceleb/database.yml
@@ -0,0 +1,102 @@
+Databases:
+  VoxCeleb:
+    - VoxCeleb1/dev/wav/{uri}.wav
+    - VoxCeleb1/test/wav/{uri}.wav
+    - VoxCeleb2/dev/aac/{uri}.wav
+    - VoxCeleb2/test/aac/{uri}.wav
+
+Protocols:
+  VoxCeleb:
+    SpeakerDiarization:
+      # original VoxCeleb1 split (no train)
+      VoxCeleb1:
+        development:
+          uri: vox1.dev.lst
+          annotation: vox1.dev.rttm
+          annotated: vox1.dev.uem
+          duration: vox1.dev.duration.map
+        test:
+          uri: vox1.tst.lst
+          annotation: vox1.tst.rttm
+          annotated: vox1.tst.uem
+          duration: vox1.tst.duration.map
+      # original VoxCeleb2 split (no train)
+      VoxCeleb2:
+        development:
+          uri: vox2.dev.lst
+          annotation: vox2.dev.rttm
+          annotated: vox2.dev.uem
+          duration: vox2.dev.duration.map
+        test:
+          uri: vox2.tst.lst
+          annotation: vox2.tst.rttm
+          annotated: vox2.tst.uem
+          duration: vox2.tst.duration.map
+
+    SpeakerVerification:
+      # trn: all VoxCeleb1 development speakers but those whose name starts by U, V, or W.
+      # dev: VoxCeleb1 development speakers whose name starts by U, V, or W.
+      # tst: original VoxCeleb1 test set
+      VoxCeleb1:
+        train:
+          uri: vox1.dev_a.lst
+          annotation: vox1.dev.rttm
+          annotated: vox1.dev.uem
+          duration: vox1.dev.duration.map
+        development:
+          uri: vox1.dev_b.lst
+          annotation: vox1.dev.rttm
+          annotated: vox1.dev.uem
+          duration: vox1.dev.duration.map
+          trial: vox1.dev_b.trial.txt
+        test:
+          uri: vox1.tst.lst
+          annotation: vox1.tst.rttm
+          annotated: vox1.tst.uem
+          duration: vox1.tst.duration.map
+          trial: vox1.tst.trial.txt
+
+      VoxCeleb:
+        # trn: all VoxCeleb2 development speakers + all VoxCeleb1 speakers but those whose name starts by U, V, or W
+        # dev: VoxCeleb1 development speakers whose name starts by U, V, or W.
+        # tst: original VoxCeleb1 test set
+        train:
+          uri: vox.trn.lst
+          annotation: vox.trn.rttm
+          annotated: vox.trn.uem
+          duration: vox.trn.duration.map
+        development:
+          uri: vox1.dev_b.lst
+          annotation: vox1.dev.rttm
+          annotated: vox1.dev.uem
+          duration: vox1.dev.duration.map
+          trial: vox1.dev_b.trial.txt
+        test:
+          uri: vox1.tst.lst
+          annotation: vox1.tst.rttm
+          annotated: vox1.tst.uem
+          duration: vox1.tst.duration.map
+          trial: vox1.tst.trial.txt
+
+      VoxCelebMini:
+        # trn: First 100 speaker of VoxCeleb protocol
+        # dev: same as VoxCeleb.SpeakerVerification.VoxCeleb protocol
+        # tst: same as VoxCeleb.SpeakerVerification.VoxCeleb protocol
+        train:
+          uri: mini_vox/mini_vox.trn.lst
+          annotation: mini_vox/mini_vox.trn.rttm
+          annotated: mini_vox/mini_vox.trn.uem
+          duration: mini_vox/mini_vox.trn.duration.map
+        development:
+          uri: vox1.dev_b.lst
+          annotation: vox1.dev.rttm
+          annotated: vox1.dev.uem
+          duration: vox1.dev.duration.map
+          trial: vox1.dev_b.trial.txt
+        test:
+          uri: vox1.tst.lst
+          annotation: vox1.tst.rttm
+          annotated: vox1.tst.uem
+          duration: vox1.tst.duration.map
+          trial: vox1.tst.trial.txt
+