-
Notifications
You must be signed in to change notification settings - Fork 99
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #48 from dscripka/auto_training
Auto training
- Loading branch information
Showing
9 changed files
with
1,698 additions
and
14 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,101 @@ | ||
## Configuration file to be used with `train.py` to create custom wake word/phrase models | ||
|
||
# The name of the model (will be used when creating directoires and when saving the final .onnx and .tflite files) | ||
model_name: "my_model" | ||
|
||
# The target word/phrase to be detected by the model. Adding multiple unique words/phrases will | ||
# still only train a binary model detection model, but it will activate on any one of the provided words/phrases. | ||
target_phrase: | ||
- "hey jarvis" | ||
|
||
# Specific phrases that you do *not* want the model to activate on, outside of those generated automatically via phoneme overlap | ||
# This can be a good way to reduce false positives if you notice that, in practice, certain words or phrases are problematic | ||
custom_negative_phrases: [] | ||
|
||
# The total number of positive samples to generate for training (minimum of 20,000 recommended, often 100,000+ is best) | ||
n_samples: 10000 | ||
|
||
# The total number of positive samples to generate for validation and early stopping of model training | ||
n_samples_val: 2000 | ||
|
||
# The batch size to use with Piper TTS when generating synthetic training data | ||
tts_batch_size: 50 | ||
|
||
# The batch size to use when performing data augmentation on generated clips prior to training | ||
# It's recommended that this not be too large to ensure that there is enough variety in the augmentation | ||
augmentation_batch_size: 16 | ||
|
||
# The path to a fork of the piper-sample-generator repository for TTS (https://github.com/dscripka/piper-sample-generator) | ||
piper_sample_generator_path: "./piper-sample-generator" | ||
|
||
# The output directory for the generated synthetic clips, openwakeword features, and trained models | ||
# Sub-directories will be automatically created for train and test clips for both positive and negative examples | ||
output_dir: "./my_custom_model" | ||
|
||
# The directories containing Room Impulse Response recordings | ||
rir_paths: | ||
- "./mit_rirs" | ||
|
||
# The directories containing background audio files to mix with training data | ||
background_paths: | ||
- "./background_clips" | ||
|
||
# The duplication rate for the background audio clips listed above (1 or higher). Can be useful as a way to oversample | ||
# a particular type of background noise more relevant to a given deployment environment. Values apply in the same | ||
# order as the background_paths list above. Only useful when multiple directories are provided above. | ||
background_paths_duplication_rate: | ||
- 1 | ||
|
||
# The location of pre-computed openwakeword features for false-positive validation data | ||
# If you do not have deployment environment validation data, a good general purpose dataset with | ||
# a reasonable mix with ~11 hours of speech, noise, and music is available here: https://huggingface.co/datasets/davidscripka/openwakeword_features | ||
false_positive_validation_data_path: "./validation_set_features.npy" | ||
|
||
# The number of times to apply augmentations to the generated training data | ||
# Values greater than 1 reuse each generation that many times, producing overall unique | ||
# clips for training due to the randomness intrinsic to the augmentation despite using | ||
# the same original synthetic generation. Can be a useful way to increase model robustness | ||
# without having to generate extremely large numbers of synthetic examples. | ||
augmentation_rounds: 1 | ||
|
||
# Paths to pre-computed openwakeword features for positive and negative data. Each file must be a saved | ||
# .npy array (see the example notebook on manually training new models for details on how to create these). | ||
# There is no limit on the number of files but training speed will decrease as more | ||
# data will need to be read from disk for each additional file. | ||
# Also, there is a custom dataloader that uses memory-mapping with loading data, so the total size | ||
# of the files is not limited by the amount of available system memory (though this will result | ||
# in decreased training throughput depending on the speed of the underlying storage device). A fast | ||
# NVME SSD is recommended for optimal performance. | ||
|
||
feature_data_files: | ||
"ACAV100M_sample": "./openwakeword_features_ACAV100M_2000_hrs_16bit.npy" | ||
|
||
# Define the number of examples from each data file per batch. Note that the key names here | ||
# must correspond to those define in the `feature_data_files` dictionary above (except for | ||
# the `positive` and `adversarial_negative` keys, which are automatically defined). The sum | ||
# of the values for each key define the total batch size for training. Initial testing indicates | ||
# that batch sizes of 1024-4096 work well in practice. | ||
|
||
batch_n_per_class: | ||
"ACAV100M_sample": 1024 | ||
"adversarial_negative": 50 | ||
"positive": 50 | ||
|
||
# Define the type of size of the openwakeword model to train. Increasing the layer size | ||
# may result in a more capable model, at the cost of decreased inference speed. The default | ||
# value (32) seems to work well in practice for most wake words/phrases. | ||
|
||
model_type: "dnn" | ||
layer_size: 32 | ||
|
||
# Define training parameters. The values below are recommended defaults for most applications, | ||
# but unique deployment environments will likely require testing to determine which values | ||
# are the most appropriate. | ||
|
||
# The maximum number of steps to train the model | ||
steps: 50000 | ||
|
||
# The maximum negative weight and target false positives per hour, used to control the auto training process | ||
# The target false positive rate may not be achieved, and adjusting the maximum negative weight may be necessary | ||
max_negative_weight: 1500 | ||
target_false_positives_per_hour: 0.2 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.