diff --git a/packages/google-cloud-speech/noxfile.py b/packages/google-cloud-speech/noxfile.py index a2eefbb6765f..9bc787f5ac16 100644 --- a/packages/google-cloud-speech/noxfile.py +++ b/packages/google-cloud-speech/noxfile.py @@ -125,6 +125,26 @@ def system(session): session.run("py.test", "--quiet", system_test_folder_path, *session.posargs) +@nox.session(python=["3.7"]) +def samples(session): + """Run the sample test suite.""" + # Sanity check: Only run tests if the environment variable is set. + if not os.environ.get("GOOGLE_APPLICATION_CREDENTIALS", ""): + session.skip("Credentials must be set via environment variable") + + samples_path = "samples" + if not os.path.exists(samples_path): + session.skip("Samples not found.") + + session.install("pyyaml") + session.install("sample-tester") + for local_dep in LOCAL_DEPS: + session.install("-e", local_dep) + session.install("-e", ".") + + session.run("sample-tester", samples_path, *session.posargs) + + @nox.session(python="3.7") def cover(session): """Run the final coverage report. diff --git a/packages/google-cloud-speech/samples/resources/brooklyn_bridge.flac b/packages/google-cloud-speech/samples/resources/brooklyn_bridge.flac new file mode 100644 index 000000000000..0e101a55df77 Binary files /dev/null and b/packages/google-cloud-speech/samples/resources/brooklyn_bridge.flac differ diff --git a/packages/google-cloud-speech/samples/resources/brooklyn_bridge.mp3 b/packages/google-cloud-speech/samples/resources/brooklyn_bridge.mp3 new file mode 100644 index 000000000000..97f9955c6f80 Binary files /dev/null and b/packages/google-cloud-speech/samples/resources/brooklyn_bridge.mp3 differ diff --git a/packages/google-cloud-speech/samples/resources/brooklyn_bridge.raw b/packages/google-cloud-speech/samples/resources/brooklyn_bridge.raw new file mode 100644 index 000000000000..5ebf79d3c9c5 Binary files /dev/null and b/packages/google-cloud-speech/samples/resources/brooklyn_bridge.raw differ diff --git a/packages/google-cloud-speech/samples/resources/brooklyn_bridge.wav b/packages/google-cloud-speech/samples/resources/brooklyn_bridge.wav new file mode 100644 index 000000000000..044086e91411 Binary files /dev/null and b/packages/google-cloud-speech/samples/resources/brooklyn_bridge.wav differ diff --git a/packages/google-cloud-speech/samples/resources/commercial_mono.wav b/packages/google-cloud-speech/samples/resources/commercial_mono.wav new file mode 100644 index 000000000000..e6b9ed434f9f Binary files /dev/null and b/packages/google-cloud-speech/samples/resources/commercial_mono.wav differ diff --git a/packages/google-cloud-speech/samples/resources/hello.raw b/packages/google-cloud-speech/samples/resources/hello.raw new file mode 100644 index 000000000000..b5b46450082a Binary files /dev/null and b/packages/google-cloud-speech/samples/resources/hello.raw differ diff --git a/packages/google-cloud-speech/samples/resources/hello.wav b/packages/google-cloud-speech/samples/resources/hello.wav new file mode 100644 index 000000000000..69b506936a33 Binary files /dev/null and b/packages/google-cloud-speech/samples/resources/hello.wav differ diff --git a/packages/google-cloud-speech/samples/resources/multi.flac b/packages/google-cloud-speech/samples/resources/multi.flac new file mode 100644 index 000000000000..c550e0f42a14 Binary files /dev/null and b/packages/google-cloud-speech/samples/resources/multi.flac differ diff --git a/packages/google-cloud-speech/samples/resources/multi.wav b/packages/google-cloud-speech/samples/resources/multi.wav new file mode 100644 index 000000000000..7f71d74b951a Binary files /dev/null and b/packages/google-cloud-speech/samples/resources/multi.wav differ diff --git a/packages/google-cloud-speech/samples/v1/speech_transcribe_async.py b/packages/google-cloud-speech/samples/v1/speech_transcribe_async.py new file mode 100644 index 000000000000..5c3dacf2dc4f --- /dev/null +++ b/packages/google-cloud-speech/samples/v1/speech_transcribe_async.py @@ -0,0 +1,90 @@ +# -*- coding: utf-8 -*- +# +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# DO NOT EDIT! This is a generated sample ("LongRunningPromise", "speech_transcribe_async") + +# To install the latest published package dependency, execute the following: +# pip install google-cloud-speech + +# sample-metadata +# title: Transcribe Audio File using Long Running Operation (Local File) (LRO) +# description: Transcribe a long audio file using asynchronous speech recognition +# usage: python3 samples/v1/speech_transcribe_async.py [--local_file_path "resources/brooklyn_bridge.raw"] + +# [START speech_transcribe_async] +from google.cloud import speech_v1 +from google.cloud.speech_v1 import enums +import io + + +def sample_long_running_recognize(local_file_path): + """ + Transcribe a long audio file using asynchronous speech recognition + + Args: + local_file_path Path to local audio file, e.g. /path/audio.wav + """ + + client = speech_v1.SpeechClient() + + # local_file_path = 'resources/brooklyn_bridge.raw' + + # The language of the supplied audio + language_code = "en-US" + + # Sample rate in Hertz of the audio data sent + sample_rate_hertz = 16000 + + # Encoding of audio data sent. This sample sets this explicitly. + # This field is optional for FLAC and WAV audio formats. + encoding = enums.RecognitionConfig.AudioEncoding.LINEAR16 + config = { + "language_code": language_code, + "sample_rate_hertz": sample_rate_hertz, + "encoding": encoding, + } + with io.open(local_file_path, "rb") as f: + content = f.read() + audio = {"content": content} + + operation = client.long_running_recognize(config, audio) + + print(u"Waiting for operation to complete...") + response = operation.result() + + for result in response.results: + # First alternative is the most probable result + alternative = result.alternatives[0] + print(u"Transcript: {}".format(alternative.transcript)) + + +# [END speech_transcribe_async] + + +def main(): + import argparse + + parser = argparse.ArgumentParser() + parser.add_argument( + "--local_file_path", type=str, default="resources/brooklyn_bridge.raw" + ) + args = parser.parse_args() + + sample_long_running_recognize(args.local_file_path) + + +if __name__ == "__main__": + main() diff --git a/packages/google-cloud-speech/samples/v1/speech_transcribe_async_gcs.py b/packages/google-cloud-speech/samples/v1/speech_transcribe_async_gcs.py new file mode 100644 index 000000000000..2a4da915f825 --- /dev/null +++ b/packages/google-cloud-speech/samples/v1/speech_transcribe_async_gcs.py @@ -0,0 +1,91 @@ +# -*- coding: utf-8 -*- +# +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# DO NOT EDIT! This is a generated sample ("LongRunningPromise", "speech_transcribe_async_gcs") + +# To install the latest published package dependency, execute the following: +# pip install google-cloud-speech + +# sample-metadata +# title: Transcript Audio File using Long Running Operation (Cloud Storage) (LRO) +# description: Transcribe long audio file from Cloud Storage using asynchronous speech +# recognition +# usage: python3 samples/v1/speech_transcribe_async_gcs.py [--storage_uri "gs://cloud-samples-data/speech/brooklyn_bridge.raw"] + +# [START speech_transcribe_async_gcs] +from google.cloud import speech_v1 +from google.cloud.speech_v1 import enums + + +def sample_long_running_recognize(storage_uri): + """ + Transcribe long audio file from Cloud Storage using asynchronous speech + recognition + + Args: + storage_uri URI for audio file in Cloud Storage, e.g. gs://[BUCKET]/[FILE] + """ + + client = speech_v1.SpeechClient() + + # storage_uri = 'gs://cloud-samples-data/speech/brooklyn_bridge.raw' + + # Sample rate in Hertz of the audio data sent + sample_rate_hertz = 16000 + + # The language of the supplied audio + language_code = "en-US" + + # Encoding of audio data sent. This sample sets this explicitly. + # This field is optional for FLAC and WAV audio formats. + encoding = enums.RecognitionConfig.AudioEncoding.LINEAR16 + config = { + "sample_rate_hertz": sample_rate_hertz, + "language_code": language_code, + "encoding": encoding, + } + audio = {"uri": storage_uri} + + operation = client.long_running_recognize(config, audio) + + print(u"Waiting for operation to complete...") + response = operation.result() + + for result in response.results: + # First alternative is the most probable result + alternative = result.alternatives[0] + print(u"Transcript: {}".format(alternative.transcript)) + + +# [END speech_transcribe_async_gcs] + + +def main(): + import argparse + + parser = argparse.ArgumentParser() + parser.add_argument( + "--storage_uri", + type=str, + default="gs://cloud-samples-data/speech/brooklyn_bridge.raw", + ) + args = parser.parse_args() + + sample_long_running_recognize(args.storage_uri) + + +if __name__ == "__main__": + main() diff --git a/packages/google-cloud-speech/samples/v1/speech_transcribe_async_word_time_offsets_gcs.py b/packages/google-cloud-speech/samples/v1/speech_transcribe_async_word_time_offsets_gcs.py new file mode 100644 index 000000000000..8b958d73f524 --- /dev/null +++ b/packages/google-cloud-speech/samples/v1/speech_transcribe_async_word_time_offsets_gcs.py @@ -0,0 +1,98 @@ +# -*- coding: utf-8 -*- +# +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# DO NOT EDIT! This is a generated sample ("LongRunningPromise", "speech_transcribe_async_word_time_offsets_gcs") + +# To install the latest published package dependency, execute the following: +# pip install google-cloud-speech + +# sample-metadata +# title: Getting word timestamps (Cloud Storage) (LRO) +# description: Print start and end time of each word spoken in audio file from Cloud Storage +# usage: python3 samples/v1/speech_transcribe_async_word_time_offsets_gcs.py [--storage_uri "gs://cloud-samples-data/speech/brooklyn_bridge.flac"] + +# [START speech_transcribe_async_word_time_offsets_gcs] +from google.cloud import speech_v1 + + +def sample_long_running_recognize(storage_uri): + """ + Print start and end time of each word spoken in audio file from Cloud Storage + + Args: + storage_uri URI for audio file in Cloud Storage, e.g. gs://[BUCKET]/[FILE] + """ + + client = speech_v1.SpeechClient() + + # storage_uri = 'gs://cloud-samples-data/speech/brooklyn_bridge.flac' + + # When enabled, the first result returned by the API will include a list + # of words and the start and end time offsets (timestamps) for those words. + enable_word_time_offsets = True + + # The language of the supplied audio + language_code = "en-US" + config = { + "enable_word_time_offsets": enable_word_time_offsets, + "language_code": language_code, + } + audio = {"uri": storage_uri} + + operation = client.long_running_recognize(config, audio) + + print(u"Waiting for operation to complete...") + response = operation.result() + + # The first result includes start and end time word offsets + result = response.results[0] + # First alternative is the most probable result + alternative = result.alternatives[0] + print(u"Transcript: {}".format(alternative.transcript)) + # Print the start and end time of each word + for word in alternative.words: + print(u"Word: {}".format(word.word)) + print( + u"Start time: {} seconds {} nanos".format( + word.start_time.seconds, word.start_time.nanos + ) + ) + print( + u"End time: {} seconds {} nanos".format( + word.end_time.seconds, word.end_time.nanos + ) + ) + + +# [END speech_transcribe_async_word_time_offsets_gcs] + + +def main(): + import argparse + + parser = argparse.ArgumentParser() + parser.add_argument( + "--storage_uri", + type=str, + default="gs://cloud-samples-data/speech/brooklyn_bridge.flac", + ) + args = parser.parse_args() + + sample_long_running_recognize(args.storage_uri) + + +if __name__ == "__main__": + main() diff --git a/packages/google-cloud-speech/samples/v1/speech_transcribe_enhanced_model.py b/packages/google-cloud-speech/samples/v1/speech_transcribe_enhanced_model.py new file mode 100644 index 000000000000..8a3897b146b7 --- /dev/null +++ b/packages/google-cloud-speech/samples/v1/speech_transcribe_enhanced_model.py @@ -0,0 +1,85 @@ +# -*- coding: utf-8 -*- +# +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# DO NOT EDIT! This is a generated sample ("Request", "speech_transcribe_enhanced_model") + +# To install the latest published package dependency, execute the following: +# pip install google-cloud-speech + +# sample-metadata +# title: Using Enhanced Models (Local File) +# description: Transcribe a short audio file using an enhanced model +# usage: python3 samples/v1/speech_transcribe_enhanced_model.py [--local_file_path "resources/hello.wav"] + +# [START speech_transcribe_enhanced_model] +from google.cloud import speech_v1 +import io + + +def sample_recognize(local_file_path): + """ + Transcribe a short audio file using an enhanced model + + Args: + local_file_path Path to local audio file, e.g. /path/audio.wav + """ + + client = speech_v1.SpeechClient() + + # local_file_path = 'resources/hello.wav' + + # The enhanced model to use, e.g. phone_call + # Currently phone_call is the only model available as an enhanced model. + model = "phone_call" + + # Use an enhanced model for speech recognition (when set to true). + # Project must be eligible for requesting enhanced models. + # Enhanced speech models require that you opt-in to data logging. + use_enhanced = True + + # The language of the supplied audio + language_code = "en-US" + config = { + "model": model, + "use_enhanced": use_enhanced, + "language_code": language_code, + } + with io.open(local_file_path, "rb") as f: + content = f.read() + audio = {"content": content} + + response = client.recognize(config, audio) + for result in response.results: + # First alternative is the most probable result + alternative = result.alternatives[0] + print(u"Transcript: {}".format(alternative.transcript)) + + +# [END speech_transcribe_enhanced_model] + + +def main(): + import argparse + + parser = argparse.ArgumentParser() + parser.add_argument("--local_file_path", type=str, default="resources/hello.wav") + args = parser.parse_args() + + sample_recognize(args.local_file_path) + + +if __name__ == "__main__": + main() diff --git a/packages/google-cloud-speech/samples/v1/speech_transcribe_model_selection.py b/packages/google-cloud-speech/samples/v1/speech_transcribe_model_selection.py new file mode 100644 index 000000000000..49eccf708261 --- /dev/null +++ b/packages/google-cloud-speech/samples/v1/speech_transcribe_model_selection.py @@ -0,0 +1,77 @@ +# -*- coding: utf-8 -*- +# +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# DO NOT EDIT! This is a generated sample ("Request", "speech_transcribe_model_selection") + +# To install the latest published package dependency, execute the following: +# pip install google-cloud-speech + +# sample-metadata +# title: Selecting a Transcription Model (Local File) +# description: Transcribe a short audio file using a specified transcription model +# usage: python3 samples/v1/speech_transcribe_model_selection.py [--local_file_path "resources/hello.wav"] [--model "phone_call"] + +# [START speech_transcribe_model_selection] +from google.cloud import speech_v1 +import io + + +def sample_recognize(local_file_path, model): + """ + Transcribe a short audio file using a specified transcription model + + Args: + local_file_path Path to local audio file, e.g. /path/audio.wav + model The transcription model to use, e.g. video, phone_call, default + For a list of available transcription models, see: + https://cloud.google.com/speech-to-text/docs/transcription-model#transcription_models + """ + + client = speech_v1.SpeechClient() + + # local_file_path = 'resources/hello.wav' + # model = 'phone_call' + + # The language of the supplied audio + language_code = "en-US" + config = {"model": model, "language_code": language_code} + with io.open(local_file_path, "rb") as f: + content = f.read() + audio = {"content": content} + + response = client.recognize(config, audio) + for result in response.results: + # First alternative is the most probable result + alternative = result.alternatives[0] + print(u"Transcript: {}".format(alternative.transcript)) + + +# [END speech_transcribe_model_selection] + + +def main(): + import argparse + + parser = argparse.ArgumentParser() + parser.add_argument("--local_file_path", type=str, default="resources/hello.wav") + parser.add_argument("--model", type=str, default="phone_call") + args = parser.parse_args() + + sample_recognize(args.local_file_path, args.model) + + +if __name__ == "__main__": + main() diff --git a/packages/google-cloud-speech/samples/v1/speech_transcribe_model_selection_gcs.py b/packages/google-cloud-speech/samples/v1/speech_transcribe_model_selection_gcs.py new file mode 100644 index 000000000000..d8e7809a1a53 --- /dev/null +++ b/packages/google-cloud-speech/samples/v1/speech_transcribe_model_selection_gcs.py @@ -0,0 +1,78 @@ +# -*- coding: utf-8 -*- +# +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# DO NOT EDIT! This is a generated sample ("Request", "speech_transcribe_model_selection_gcs") + +# To install the latest published package dependency, execute the following: +# pip install google-cloud-speech + +# sample-metadata +# title: Selecting a Transcription Model (Cloud Storage) +# description: Transcribe a short audio file from Cloud Storage using a specified +# transcription model +# usage: python3 samples/v1/speech_transcribe_model_selection_gcs.py [--storage_uri "gs://cloud-samples-data/speech/hello.wav"] [--model "phone_call"] + +# [START speech_transcribe_model_selection_gcs] +from google.cloud import speech_v1 + + +def sample_recognize(storage_uri, model): + """ + Transcribe a short audio file from Cloud Storage using a specified + transcription model + + Args: + storage_uri URI for audio file in Cloud Storage, e.g. gs://[BUCKET]/[FILE] + model The transcription model to use, e.g. video, phone_call, default + For a list of available transcription models, see: + https://cloud.google.com/speech-to-text/docs/transcription-model#transcription_models + """ + + client = speech_v1.SpeechClient() + + # storage_uri = 'gs://cloud-samples-data/speech/hello.wav' + # model = 'phone_call' + + # The language of the supplied audio + language_code = "en-US" + config = {"model": model, "language_code": language_code} + audio = {"uri": storage_uri} + + response = client.recognize(config, audio) + for result in response.results: + # First alternative is the most probable result + alternative = result.alternatives[0] + print(u"Transcript: {}".format(alternative.transcript)) + + +# [END speech_transcribe_model_selection_gcs] + + +def main(): + import argparse + + parser = argparse.ArgumentParser() + parser.add_argument( + "--storage_uri", type=str, default="gs://cloud-samples-data/speech/hello.wav" + ) + parser.add_argument("--model", type=str, default="phone_call") + args = parser.parse_args() + + sample_recognize(args.storage_uri, args.model) + + +if __name__ == "__main__": + main() diff --git a/packages/google-cloud-speech/samples/v1/speech_transcribe_multichannel.py b/packages/google-cloud-speech/samples/v1/speech_transcribe_multichannel.py new file mode 100644 index 000000000000..790835f22ff3 --- /dev/null +++ b/packages/google-cloud-speech/samples/v1/speech_transcribe_multichannel.py @@ -0,0 +1,86 @@ +# -*- coding: utf-8 -*- +# +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# DO NOT EDIT! This is a generated sample ("Request", "speech_transcribe_multichannel") + +# To install the latest published package dependency, execute the following: +# pip install google-cloud-speech + +# sample-metadata +# title: Multi-Channel Audio Transcription (Local File) +# description: Transcribe a short audio file with multiple channels +# usage: python3 samples/v1/speech_transcribe_multichannel.py [--local_file_path "resources/multi.wav"] + +# [START speech_transcribe_multichannel] +from google.cloud import speech_v1 +import io + + +def sample_recognize(local_file_path): + """ + Transcribe a short audio file with multiple channels + + Args: + local_file_path Path to local audio file, e.g. /path/audio.wav + """ + + client = speech_v1.SpeechClient() + + # local_file_path = 'resources/multi.wav' + + # The number of channels in the input audio file (optional) + audio_channel_count = 2 + + # When set to true, each audio channel will be recognized separately. + # The recognition result will contain a channel_tag field to state which + # channel that result belongs to + enable_separate_recognition_per_channel = True + + # The language of the supplied audio + language_code = "en-US" + config = { + "audio_channel_count": audio_channel_count, + "enable_separate_recognition_per_channel": enable_separate_recognition_per_channel, + "language_code": language_code, + } + with io.open(local_file_path, "rb") as f: + content = f.read() + audio = {"content": content} + + response = client.recognize(config, audio) + for result in response.results: + # channel_tag to recognize which audio channel this result is for + print(u"Channel tag: {}".format(result.channel_tag)) + # First alternative is the most probable result + alternative = result.alternatives[0] + print(u"Transcript: {}".format(alternative.transcript)) + + +# [END speech_transcribe_multichannel] + + +def main(): + import argparse + + parser = argparse.ArgumentParser() + parser.add_argument("--local_file_path", type=str, default="resources/multi.wav") + args = parser.parse_args() + + sample_recognize(args.local_file_path) + + +if __name__ == "__main__": + main() diff --git a/packages/google-cloud-speech/samples/v1/speech_transcribe_multichannel_gcs.py b/packages/google-cloud-speech/samples/v1/speech_transcribe_multichannel_gcs.py new file mode 100644 index 000000000000..fe758b1d69cd --- /dev/null +++ b/packages/google-cloud-speech/samples/v1/speech_transcribe_multichannel_gcs.py @@ -0,0 +1,85 @@ +# -*- coding: utf-8 -*- +# +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# DO NOT EDIT! This is a generated sample ("Request", "speech_transcribe_multichannel_gcs") + +# To install the latest published package dependency, execute the following: +# pip install google-cloud-speech + +# sample-metadata +# title: Multi-Channel Audio Transcription (Cloud Storage) +# description: Transcribe a short audio file from Cloud Storage with multiple channels +# usage: python3 samples/v1/speech_transcribe_multichannel_gcs.py [--storage_uri "gs://cloud-samples-data/speech/multi.wav"] + +# [START speech_transcribe_multichannel_gcs] +from google.cloud import speech_v1 + + +def sample_recognize(storage_uri): + """ + Transcribe a short audio file from Cloud Storage with multiple channels + + Args: + storage_uri URI for audio file in Cloud Storage, e.g. gs://[BUCKET]/[FILE] + """ + + client = speech_v1.SpeechClient() + + # storage_uri = 'gs://cloud-samples-data/speech/multi.wav' + + # The number of channels in the input audio file (optional) + audio_channel_count = 2 + + # When set to true, each audio channel will be recognized separately. + # The recognition result will contain a channel_tag field to state which + # channel that result belongs to + enable_separate_recognition_per_channel = True + + # The language of the supplied audio + language_code = "en-US" + config = { + "audio_channel_count": audio_channel_count, + "enable_separate_recognition_per_channel": enable_separate_recognition_per_channel, + "language_code": language_code, + } + audio = {"uri": storage_uri} + + response = client.recognize(config, audio) + for result in response.results: + # channel_tag to recognize which audio channel this result is for + print(u"Channel tag: {}".format(result.channel_tag)) + # First alternative is the most probable result + alternative = result.alternatives[0] + print(u"Transcript: {}".format(alternative.transcript)) + + +# [END speech_transcribe_multichannel_gcs] + + +def main(): + import argparse + + parser = argparse.ArgumentParser() + parser.add_argument( + "--storage_uri", type=str, default="gs://cloud-samples-data/speech/multi.wav" + ) + args = parser.parse_args() + + sample_recognize(args.storage_uri) + + +if __name__ == "__main__": + main() diff --git a/packages/google-cloud-speech/samples/v1/speech_transcribe_sync.py b/packages/google-cloud-speech/samples/v1/speech_transcribe_sync.py new file mode 100644 index 000000000000..9a97450f0482 --- /dev/null +++ b/packages/google-cloud-speech/samples/v1/speech_transcribe_sync.py @@ -0,0 +1,86 @@ +# -*- coding: utf-8 -*- +# +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# DO NOT EDIT! This is a generated sample ("Request", "speech_transcribe_sync") + +# To install the latest published package dependency, execute the following: +# pip install google-cloud-speech + +# sample-metadata +# title: Transcribe Audio File (Local File) +# description: Transcribe a short audio file using synchronous speech recognition +# usage: python3 samples/v1/speech_transcribe_sync.py [--local_file_path "resources/brooklyn_bridge.raw"] + +# [START speech_transcribe_sync] +from google.cloud import speech_v1 +from google.cloud.speech_v1 import enums +import io + + +def sample_recognize(local_file_path): + """ + Transcribe a short audio file using synchronous speech recognition + + Args: + local_file_path Path to local audio file, e.g. /path/audio.wav + """ + + client = speech_v1.SpeechClient() + + # local_file_path = 'resources/brooklyn_bridge.raw' + + # The language of the supplied audio + language_code = "en-US" + + # Sample rate in Hertz of the audio data sent + sample_rate_hertz = 16000 + + # Encoding of audio data sent. This sample sets this explicitly. + # This field is optional for FLAC and WAV audio formats. + encoding = enums.RecognitionConfig.AudioEncoding.LINEAR16 + config = { + "language_code": language_code, + "sample_rate_hertz": sample_rate_hertz, + "encoding": encoding, + } + with io.open(local_file_path, "rb") as f: + content = f.read() + audio = {"content": content} + + response = client.recognize(config, audio) + for result in response.results: + # First alternative is the most probable result + alternative = result.alternatives[0] + print(u"Transcript: {}".format(alternative.transcript)) + + +# [END speech_transcribe_sync] + + +def main(): + import argparse + + parser = argparse.ArgumentParser() + parser.add_argument( + "--local_file_path", type=str, default="resources/brooklyn_bridge.raw" + ) + args = parser.parse_args() + + sample_recognize(args.local_file_path) + + +if __name__ == "__main__": + main() diff --git a/packages/google-cloud-speech/samples/v1/speech_transcribe_sync_gcs.py b/packages/google-cloud-speech/samples/v1/speech_transcribe_sync_gcs.py new file mode 100644 index 000000000000..8370da717898 --- /dev/null +++ b/packages/google-cloud-speech/samples/v1/speech_transcribe_sync_gcs.py @@ -0,0 +1,87 @@ +# -*- coding: utf-8 -*- +# +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# DO NOT EDIT! This is a generated sample ("Request", "speech_transcribe_sync_gcs") + +# To install the latest published package dependency, execute the following: +# pip install google-cloud-speech + +# sample-metadata +# title: Transcript Audio File (Cloud Storage) +# description: Transcribe short audio file from Cloud Storage using synchronous speech +# recognition +# usage: python3 samples/v1/speech_transcribe_sync_gcs.py [--storage_uri "gs://cloud-samples-data/speech/brooklyn_bridge.raw"] + +# [START speech_transcribe_sync_gcs] +from google.cloud import speech_v1 +from google.cloud.speech_v1 import enums + + +def sample_recognize(storage_uri): + """ + Transcribe short audio file from Cloud Storage using synchronous speech + recognition + + Args: + storage_uri URI for audio file in Cloud Storage, e.g. gs://[BUCKET]/[FILE] + """ + + client = speech_v1.SpeechClient() + + # storage_uri = 'gs://cloud-samples-data/speech/brooklyn_bridge.raw' + + # Sample rate in Hertz of the audio data sent + sample_rate_hertz = 16000 + + # The language of the supplied audio + language_code = "en-US" + + # Encoding of audio data sent. This sample sets this explicitly. + # This field is optional for FLAC and WAV audio formats. + encoding = enums.RecognitionConfig.AudioEncoding.LINEAR16 + config = { + "sample_rate_hertz": sample_rate_hertz, + "language_code": language_code, + "encoding": encoding, + } + audio = {"uri": storage_uri} + + response = client.recognize(config, audio) + for result in response.results: + # First alternative is the most probable result + alternative = result.alternatives[0] + print(u"Transcript: {}".format(alternative.transcript)) + + +# [END speech_transcribe_sync_gcs] + + +def main(): + import argparse + + parser = argparse.ArgumentParser() + parser.add_argument( + "--storage_uri", + type=str, + default="gs://cloud-samples-data/speech/brooklyn_bridge.raw", + ) + args = parser.parse_args() + + sample_recognize(args.storage_uri) + + +if __name__ == "__main__": + main() diff --git a/packages/google-cloud-speech/samples/v1/test/samples.manifest.yaml b/packages/google-cloud-speech/samples/v1/test/samples.manifest.yaml new file mode 100644 index 000000000000..f989660c127f --- /dev/null +++ b/packages/google-cloud-speech/samples/v1/test/samples.manifest.yaml @@ -0,0 +1,38 @@ +type: manifest/samples +schema_version: 3 +base: &common + env: 'python' + bin: 'python3' + chdir: '{@manifest_dir}/../..' + basepath: '.' +samples: +- <<: *common + path: '{basepath}/v1/speech_transcribe_async.py' + sample: 'speech_transcribe_async' +- <<: *common + path: '{basepath}/v1/speech_transcribe_async_gcs.py' + sample: 'speech_transcribe_async_gcs' +- <<: *common + path: '{basepath}/v1/speech_transcribe_async_word_time_offsets_gcs.py' + sample: 'speech_transcribe_async_word_time_offsets_gcs' +- <<: *common + path: '{basepath}/v1/speech_transcribe_enhanced_model.py' + sample: 'speech_transcribe_enhanced_model' +- <<: *common + path: '{basepath}/v1/speech_transcribe_model_selection.py' + sample: 'speech_transcribe_model_selection' +- <<: *common + path: '{basepath}/v1/speech_transcribe_model_selection_gcs.py' + sample: 'speech_transcribe_model_selection_gcs' +- <<: *common + path: '{basepath}/v1/speech_transcribe_multichannel.py' + sample: 'speech_transcribe_multichannel' +- <<: *common + path: '{basepath}/v1/speech_transcribe_multichannel_gcs.py' + sample: 'speech_transcribe_multichannel_gcs' +- <<: *common + path: '{basepath}/v1/speech_transcribe_sync.py' + sample: 'speech_transcribe_sync' +- <<: *common + path: '{basepath}/v1/speech_transcribe_sync_gcs.py' + sample: 'speech_transcribe_sync_gcs' diff --git a/packages/google-cloud-speech/samples/v1/test/speech_transcribe_async.test.yaml b/packages/google-cloud-speech/samples/v1/test/speech_transcribe_async.test.yaml new file mode 100644 index 000000000000..f26cfbabc537 --- /dev/null +++ b/packages/google-cloud-speech/samples/v1/test/speech_transcribe_async.test.yaml @@ -0,0 +1,28 @@ +type: test/samples +schema_version: 1 +test: + suites: + - name: Transcribe Audio File using Long Running Operation (Local File) (LRO) + cases: + + # This sample should default to using brooklyn_bridge.raw + # with explicitly configured sample_rate_hertz and encoding + - name: speech_transcribe_async (no arguments) + spec: + - call: + sample: speech_transcribe_async + - assert_contains: + - literal: "how old is the Brooklyn Bridge" + + # Confirm that another file can be transcribed (use another .raw PCM file) + - name: speech_transcribe_async (--local_file_path) + spec: + - call: + sample: speech_transcribe_async + params: + local_file_path: + literal: "resources/hello.raw" + - assert_contains: + - literal: "hello" + - assert_not_contains: + - literal: "how old is the Brooklyn Bridge" diff --git a/packages/google-cloud-speech/samples/v1/test/speech_transcribe_async_gcs.test.yaml b/packages/google-cloud-speech/samples/v1/test/speech_transcribe_async_gcs.test.yaml new file mode 100644 index 000000000000..d3d83133e783 --- /dev/null +++ b/packages/google-cloud-speech/samples/v1/test/speech_transcribe_async_gcs.test.yaml @@ -0,0 +1,28 @@ +type: test/samples +schema_version: 1 +test: + suites: + - name: Transcript Audio File using Long Running Operation (Cloud Storage) (LRO) + cases: + + # This sample should default to using gs://cloud-samples-data/brooklyn_bridge.raw + # with explicitly configured sample_rate_hertz and encoding + - name: speech_transcribe_async_gcs (no arguments) + spec: + - call: + sample: speech_transcribe_async_gcs + - assert_contains: + - literal: "how old is the Brooklyn Bridge" + + # Confirm that another file can be transcribed (use another .raw PCM file) + - name: speech_transcribe_async_gcs (--storage_uri) + spec: + - call: + sample: speech_transcribe_async_gcs + params: + storage_uri: + literal: "gs://cloud-samples-data/speech/hello.raw" + - assert_contains: + - literal: "hello" + - assert_not_contains: + - literal: "how old is the Brooklyn Bridge" diff --git a/packages/google-cloud-speech/samples/v1/test/speech_transcribe_async_word_time_offsets_gcs.test.yaml b/packages/google-cloud-speech/samples/v1/test/speech_transcribe_async_word_time_offsets_gcs.test.yaml new file mode 100644 index 000000000000..11784726db74 --- /dev/null +++ b/packages/google-cloud-speech/samples/v1/test/speech_transcribe_async_word_time_offsets_gcs.test.yaml @@ -0,0 +1,37 @@ +type: test/samples +schema_version: 1 +test: + suites: + - name: Getting word timestamps (Cloud Storage) (LRO) + cases: + + # This sample should default to using gs://cloud-samples-data/speech/brooklyn_bridge.flac + - name: speech_transcribe_async_word_time_offsets_gcs (no arguments) + spec: + - call: + sample: speech_transcribe_async_word_time_offsets_gcs + - assert_contains: + - literal: "how old is the Brooklyn Bridge" + - literal: "Word: how" + - literal: "Word: old" + - literal: "Word: is" + - literal: "Start time: 0 seconds" + - literal: "End time: 1 seconds" + + # Confirm that another file can be transcribed (use another file) + - name: speech_transcribe_async_word_time_offsets_gcs (--storage_uri) + spec: + - call: + sample: speech_transcribe_async_word_time_offsets_gcs + params: + storage_uri: + literal: "gs://cloud-samples-data/speech/multi.flac" + - assert_contains: + - literal: "how are you doing" + - literal: "Word: how" + - literal: "Word: are" + - literal: "Word: you" + - literal: "Start time: 0 seconds" + - literal: "End time: 1 seconds" + - assert_not_contains: + - literal: "how old is the Brooklyn Bridge" diff --git a/packages/google-cloud-speech/samples/v1/test/speech_transcribe_enhanced_model.test.yaml b/packages/google-cloud-speech/samples/v1/test/speech_transcribe_enhanced_model.test.yaml new file mode 100644 index 000000000000..6eab33b52796 --- /dev/null +++ b/packages/google-cloud-speech/samples/v1/test/speech_transcribe_enhanced_model.test.yaml @@ -0,0 +1,29 @@ +type: test/samples +schema_version: 1 +test: + suites: + - name: Using Enhanced Models (Local File) + cases: + + # This sample should default to using hello.wav + # and the phone_call model (only currently available enhanced model) + # + # Note: if the project used to run these tests isn't eligible for + # enhanced models, you will receive an error. + - name: speech_transcribe_enhanced_model (no arguments) + spec: + - call: + sample: speech_transcribe_enhanced_model + - assert_contains: + - literal: "hello" + + # Confirm that another file can be transcribed (use another .wav file) + - name: speech_transcribe_enhanced_model (--local_file_path) + spec: + - call: + sample: speech_transcribe_enhanced_model + params: + local_file_path: + literal: "resources/commercial_mono.wav" + - assert_contains: + - literal: "Chrome" diff --git a/packages/google-cloud-speech/samples/v1/test/speech_transcribe_model_selection.test.yaml b/packages/google-cloud-speech/samples/v1/test/speech_transcribe_model_selection.test.yaml new file mode 100644 index 000000000000..b5ec2d90290d --- /dev/null +++ b/packages/google-cloud-speech/samples/v1/test/speech_transcribe_model_selection.test.yaml @@ -0,0 +1,52 @@ +type: test/samples +schema_version: 1 +test: + suites: + - name: Selecting a Transcription Model (Local File) + cases: + + # This sample should default to using hello.wav + # and the phone_call model + - name: speech_transcribe_model_selection (no arguments) + spec: + - call: + sample: speech_transcribe_model_selection + - assert_contains: + - literal: "Hello" + + # Confirm that another file can be transcribed (use another .wav file) + - name: speech_transcribe_model_selection (--local_file_path) + spec: + - call: + sample: speech_transcribe_model_selection + params: + local_file_path: + literal: "resources/commercial_mono.wav" + - assert_contains: + - literal: "Chrome" + + # Confirm that --model can be specified and the sample does not blow up + # + # Note: we are not using example audio files which result in deterministically + # different results when using different models. so we simply test + # that regular transcription continues to work. + - name: speech_transcribe_model_selection (--model) + spec: + - call: + sample: speech_transcribe_model_selection + params: + model: + literal: video + - assert_contains: + - literal: "hello" + + # Confirm that --model is being passed through by providing an invalid model + - name: speech_transcribe_model_selection (invalid --model) + spec: + - call_may_fail: + sample: speech_transcribe_model_selection + params: + model: + literal: I_DONT_EXIST + - assert_contains: + - literal: "Incorrect model specified" diff --git a/packages/google-cloud-speech/samples/v1/test/speech_transcribe_model_selection_gcs.test.yaml b/packages/google-cloud-speech/samples/v1/test/speech_transcribe_model_selection_gcs.test.yaml new file mode 100644 index 000000000000..60c45c975d7b --- /dev/null +++ b/packages/google-cloud-speech/samples/v1/test/speech_transcribe_model_selection_gcs.test.yaml @@ -0,0 +1,52 @@ +type: test/samples +schema_version: 1 +test: + suites: + - name: Selecting a Transcription Model (Cloud Storage) + cases: + + # This sample should default to using gs://cloud-samples-data/speech/hello.wav + # and the phone_call model + - name: speech_transcribe_model_selection_gcs (no arguments) + spec: + - call: + sample: speech_transcribe_model_selection_gcs + - assert_contains: + - literal: "Hello" + + # Confirm that another file can be transcribed (use another .wav file) + - name: speech_transcribe_model_selection_gcs (--local_file_path) + spec: + - call: + sample: speech_transcribe_model_selection_gcs + params: + storage_uri: + literal: "gs://cloud-samples-data/speech/commercial_mono.wav" + - assert_contains: + - literal: "Chrome" + + # Confirm that --model can be specified and the sample does not blow up + # + # Note: we are not using example audio files which result in deterministically + # different results when using different models. so we simply test + # that regular transcription continues to work. + - name: speech_transcribe_model_selection_gcs (--model) + spec: + - call: + sample: speech_transcribe_model_selection_gcs + params: + model: + literal: video + - assert_contains: + - literal: "hello" + + # Confirm that --model is being passed through by providing an invalid model + - name: speech_transcribe_model_selection_gcs (invalid --model) + spec: + - call_may_fail: + sample: speech_transcribe_model_selection_gcs + params: + model: + literal: I_DONT_EXIST + - assert_contains: + - literal: "Incorrect model specified" diff --git a/packages/google-cloud-speech/samples/v1/test/speech_transcribe_multichannel.test.yaml b/packages/google-cloud-speech/samples/v1/test/speech_transcribe_multichannel.test.yaml new file mode 100644 index 000000000000..9d5379dc99c7 --- /dev/null +++ b/packages/google-cloud-speech/samples/v1/test/speech_transcribe_multichannel.test.yaml @@ -0,0 +1,31 @@ +type: test/samples +schema_version: 1 +test: + suites: + - name: Multi-Channel Audio Transcription (Local File) + cases: + + # This sample should default to using multi.wav (2 channels) + - name: speech_transcribe_multichannel (no arguments) + spec: + - call: + sample: speech_transcribe_multichannel + - assert_contains: + - literal: "Channel tag: 1" + - literal: "Channel tag: 2" + - literal: "how are you doing" + + # Confirm that another file can be transcribed (use another 2 channel .wav file) + - name: speech_transcribe_multichannel (--local_file_path) + spec: + - call: + sample: speech_transcribe_multichannel + params: + local_file_path: + literal: "resources/brooklyn_bridge.wav" + - assert_contains: + # Only one channel of data is present in brooklyn_bridge.wav + - literal: "Channel tag:" + - literal: "how old is the Brooklyn Bridge" + - assert_not_contains: + - literal: "how are you doing" diff --git a/packages/google-cloud-speech/samples/v1/test/speech_transcribe_multichannel_gcs.test.yaml b/packages/google-cloud-speech/samples/v1/test/speech_transcribe_multichannel_gcs.test.yaml new file mode 100644 index 000000000000..64c9340ce0e6 --- /dev/null +++ b/packages/google-cloud-speech/samples/v1/test/speech_transcribe_multichannel_gcs.test.yaml @@ -0,0 +1,32 @@ +type: test/samples +schema_version: 1 +test: + suites: + - name: Multi-Channel Audio Transcription (Cloud Storage) + cases: + + # This sample should default to using gs://cloud-samples-data/speech/multi.wav + # with 2 audio channels of data + - name: speech_transcribe_multichannel_gcs (no arguments) + spec: + - call: + sample: speech_transcribe_multichannel_gcs + - assert_contains: + - literal: "Channel tag: 1" + - literal: "Channel tag: 2" + - literal: "how are you doing" + + # Confirm that another file can be transcribed (use another 2 channel .wav file) + - name: speech_transcribe_multichannel_gcs (--storage_uri) + spec: + - call: + sample: speech_transcribe_multichannel_gcs + params: + storage_uri: + literal: "gs://cloud-samples-data/speech/brooklyn_bridge.wav" + - assert_contains: + # Only one channel of data is present in brooklyn_bridge.wav + - literal: "Channel tag:" + - literal: "how old is the Brooklyn Bridge" + - assert_not_contains: + - literal: "how are you doing" diff --git a/packages/google-cloud-speech/samples/v1/test/speech_transcribe_sync.test.yaml b/packages/google-cloud-speech/samples/v1/test/speech_transcribe_sync.test.yaml new file mode 100644 index 000000000000..47cc8c1a48e1 --- /dev/null +++ b/packages/google-cloud-speech/samples/v1/test/speech_transcribe_sync.test.yaml @@ -0,0 +1,28 @@ +type: test/samples +schema_version: 1 +test: + suites: + - name: Transcribe Audio File (Local File) + cases: + + # This sample should default to using brooklyn_bridge.raw + # with explicitly configured sample_rate_hertz and encoding + - name: speech_transcribe_sync (no arguments) + spec: + - call: + sample: speech_transcribe_sync + - assert_contains: + - literal: "how old is the Brooklyn Bridge" + + # Confirm that another file can be transcribed (use another .raw PCM file) + - name: speech_transcribe_sync (--local_file_path) + spec: + - call: + sample: speech_transcribe_sync + params: + local_file_path: + literal: "resources/hello.raw" + - assert_contains: + - literal: "hello" + - assert_not_contains: + - literal: "how old is the Brooklyn Bridge" diff --git a/packages/google-cloud-speech/samples/v1/test/speech_transcribe_sync_gcs.test.yaml b/packages/google-cloud-speech/samples/v1/test/speech_transcribe_sync_gcs.test.yaml new file mode 100644 index 000000000000..3defdf28930a --- /dev/null +++ b/packages/google-cloud-speech/samples/v1/test/speech_transcribe_sync_gcs.test.yaml @@ -0,0 +1,28 @@ +type: test/samples +schema_version: 1 +test: + suites: + - name: Transcript Audio File (Cloud Storage) + cases: + + # This sample should default to using gs://cloud-samples-data/speech/brooklyn_bridge.raw + # with explicitly configured sample_rate_hertz and encoding + - name: speech_transcribe_sync_gcs (no arguments) + spec: + - call: + sample: speech_transcribe_sync_gcs + - assert_contains: + - literal: "how old is the Brooklyn Bridge" + + # Confirm that another file can be transcribed (use another .raw PCM file) + - name: speech_transcribe_sync_gcs (--storage_uri) + spec: + - call: + sample: speech_transcribe_sync_gcs + params: + storage_uri: + literal: "gs://cloud-samples-data/speech/hello.raw" + - assert_contains: + - literal: "hello" + - assert_not_contains: + - literal: "how old is the Brooklyn Bridge" diff --git a/packages/google-cloud-speech/samples/v1p1beta1/speech_adaptation_beta.py b/packages/google-cloud-speech/samples/v1p1beta1/speech_adaptation_beta.py new file mode 100644 index 000000000000..cf33015342fa --- /dev/null +++ b/packages/google-cloud-speech/samples/v1p1beta1/speech_adaptation_beta.py @@ -0,0 +1,100 @@ +# -*- coding: utf-8 -*- +# +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# DO NOT EDIT! This is a generated sample ("Request", "speech_adaptation_beta") + +# To install the latest published package dependency, execute the following: +# pip install google-cloud-speech + +# sample-metadata +# title: Speech Adaptation (Cloud Storage) +# description: Transcribe a short audio file with speech adaptation. +# usage: python3 samples/v1p1beta1/speech_adaptation_beta.py [--storage_uri "gs://cloud-samples-data/speech/brooklyn_bridge.mp3"] [--phrase "Brooklyn Bridge"] + +# [START speech_adaptation_beta] +from google.cloud import speech_v1p1beta1 +from google.cloud.speech_v1p1beta1 import enums + + +def sample_recognize(storage_uri, phrase): + """ + Transcribe a short audio file with speech adaptation. + + Args: + storage_uri URI for audio file in Cloud Storage, e.g. gs://[BUCKET]/[FILE] + phrase Phrase "hints" help recognize the specified phrases from your audio. + """ + + client = speech_v1p1beta1.SpeechClient() + + # storage_uri = 'gs://cloud-samples-data/speech/brooklyn_bridge.mp3' + # phrase = 'Brooklyn Bridge' + phrases = [phrase] + + # Hint Boost. This value increases the probability that a specific + # phrase will be recognized over other similar sounding phrases. + # The higher the boost, the higher the chance of false positive + # recognition as well. Can accept wide range of positive values. + # Most use cases are best served with values between 0 and 20. + # Using a binary search happroach may help you find the optimal value. + boost = 20.0 + speech_contexts_element = {"phrases": phrases, "boost": boost} + speech_contexts = [speech_contexts_element] + + # Sample rate in Hertz of the audio data sent + sample_rate_hertz = 44100 + + # The language of the supplied audio + language_code = "en-US" + + # Encoding of audio data sent. This sample sets this explicitly. + # This field is optional for FLAC and WAV audio formats. + encoding = enums.RecognitionConfig.AudioEncoding.MP3 + config = { + "speech_contexts": speech_contexts, + "sample_rate_hertz": sample_rate_hertz, + "language_code": language_code, + "encoding": encoding, + } + audio = {"uri": storage_uri} + + response = client.recognize(config, audio) + for result in response.results: + # First alternative is the most probable result + alternative = result.alternatives[0] + print(u"Transcript: {}".format(alternative.transcript)) + + +# [END speech_adaptation_beta] + + +def main(): + import argparse + + parser = argparse.ArgumentParser() + parser.add_argument( + "--storage_uri", + type=str, + default="gs://cloud-samples-data/speech/brooklyn_bridge.mp3", + ) + parser.add_argument("--phrase", type=str, default="Brooklyn Bridge") + args = parser.parse_args() + + sample_recognize(args.storage_uri, args.phrase) + + +if __name__ == "__main__": + main() diff --git a/packages/google-cloud-speech/samples/v1p1beta1/speech_contexts_classes_beta.py b/packages/google-cloud-speech/samples/v1p1beta1/speech_contexts_classes_beta.py new file mode 100644 index 000000000000..84f1bc0dedfa --- /dev/null +++ b/packages/google-cloud-speech/samples/v1p1beta1/speech_contexts_classes_beta.py @@ -0,0 +1,93 @@ +# -*- coding: utf-8 -*- +# +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# DO NOT EDIT! This is a generated sample ("Request", "speech_contexts_classes_beta") + +# To install the latest published package dependency, execute the following: +# pip install google-cloud-speech + +# sample-metadata +# title: Using Context Classes (Cloud Storage) +# description: Transcribe a short audio file with static context classes. +# usage: python3 samples/v1p1beta1/speech_contexts_classes_beta.py [--storage_uri "gs://cloud-samples-data/speech/time.mp3"] [--phrase "$TIME"] + +# [START speech_contexts_classes_beta] +from google.cloud import speech_v1p1beta1 +from google.cloud.speech_v1p1beta1 import enums + + +def sample_recognize(storage_uri, phrase): + """ + Transcribe a short audio file with static context classes. + + Args: + storage_uri URI for audio file in Cloud Storage, e.g. gs://[BUCKET]/[FILE] + phrase Phrase "hints" help recognize the specified phrases from your audio. + In this sample we are using a static class phrase ($TIME). + Classes represent groups of words that represent common concepts + that occur in natural language. + """ + + client = speech_v1p1beta1.SpeechClient() + + # storage_uri = 'gs://cloud-samples-data/speech/time.mp3' + # phrase = '$TIME' + phrases = [phrase] + speech_contexts_element = {"phrases": phrases} + speech_contexts = [speech_contexts_element] + + # The language of the supplied audio + language_code = "en-US" + + # Sample rate in Hertz of the audio data sent + sample_rate_hertz = 24000 + + # Encoding of audio data sent. This sample sets this explicitly. + # This field is optional for FLAC and WAV audio formats. + encoding = enums.RecognitionConfig.AudioEncoding.MP3 + config = { + "speech_contexts": speech_contexts, + "language_code": language_code, + "sample_rate_hertz": sample_rate_hertz, + "encoding": encoding, + } + audio = {"uri": storage_uri} + + response = client.recognize(config, audio) + for result in response.results: + # First alternative is the most probable result + alternative = result.alternatives[0] + print(u"Transcript: {}".format(alternative.transcript)) + + +# [END speech_contexts_classes_beta] + + +def main(): + import argparse + + parser = argparse.ArgumentParser() + parser.add_argument( + "--storage_uri", type=str, default="gs://cloud-samples-data/speech/time.mp3" + ) + parser.add_argument("--phrase", type=str, default="$TIME") + args = parser.parse_args() + + sample_recognize(args.storage_uri, args.phrase) + + +if __name__ == "__main__": + main() diff --git a/packages/google-cloud-speech/samples/v1p1beta1/speech_quickstart_beta.py b/packages/google-cloud-speech/samples/v1p1beta1/speech_quickstart_beta.py new file mode 100644 index 000000000000..4bf48dcb4ce8 --- /dev/null +++ b/packages/google-cloud-speech/samples/v1p1beta1/speech_quickstart_beta.py @@ -0,0 +1,85 @@ +# -*- coding: utf-8 -*- +# +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# DO NOT EDIT! This is a generated sample ("Request", "speech_quickstart_beta") + +# To install the latest published package dependency, execute the following: +# pip install google-cloud-speech + +# sample-metadata +# title: Quickstart Beta +# description: Performs synchronous speech recognition on an audio file +# usage: python3 samples/v1p1beta1/speech_quickstart_beta.py [--storage_uri "gs://cloud-samples-data/speech/brooklyn_bridge.mp3"] + +# [START speech_quickstart_beta] +from google.cloud import speech_v1p1beta1 +from google.cloud.speech_v1p1beta1 import enums + + +def sample_recognize(storage_uri): + """ + Performs synchronous speech recognition on an audio file + + Args: + storage_uri URI for audio file in Cloud Storage, e.g. gs://[BUCKET]/[FILE] + """ + + client = speech_v1p1beta1.SpeechClient() + + # storage_uri = 'gs://cloud-samples-data/speech/brooklyn_bridge.mp3' + + # The language of the supplied audio + language_code = "en-US" + + # Sample rate in Hertz of the audio data sent + sample_rate_hertz = 44100 + + # Encoding of audio data sent. This sample sets this explicitly. + # This field is optional for FLAC and WAV audio formats. + encoding = enums.RecognitionConfig.AudioEncoding.MP3 + config = { + "language_code": language_code, + "sample_rate_hertz": sample_rate_hertz, + "encoding": encoding, + } + audio = {"uri": storage_uri} + + response = client.recognize(config, audio) + for result in response.results: + # First alternative is the most probable result + alternative = result.alternatives[0] + print(u"Transcript: {}".format(alternative.transcript)) + + +# [END speech_quickstart_beta] + + +def main(): + import argparse + + parser = argparse.ArgumentParser() + parser.add_argument( + "--storage_uri", + type=str, + default="gs://cloud-samples-data/speech/brooklyn_bridge.mp3", + ) + args = parser.parse_args() + + sample_recognize(args.storage_uri) + + +if __name__ == "__main__": + main() diff --git a/packages/google-cloud-speech/samples/v1p1beta1/speech_transcribe_auto_punctuation_beta.py b/packages/google-cloud-speech/samples/v1p1beta1/speech_transcribe_auto_punctuation_beta.py new file mode 100644 index 000000000000..06ce289c2a32 --- /dev/null +++ b/packages/google-cloud-speech/samples/v1p1beta1/speech_transcribe_auto_punctuation_beta.py @@ -0,0 +1,82 @@ +# -*- coding: utf-8 -*- +# +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# DO NOT EDIT! This is a generated sample ("Request", "speech_transcribe_auto_punctuation_beta") + +# To install the latest published package dependency, execute the following: +# pip install google-cloud-speech + +# sample-metadata +# title: Getting punctuation in results (Local File) (Beta) +# description: Transcribe a short audio file with punctuation +# usage: python3 samples/v1p1beta1/speech_transcribe_auto_punctuation_beta.py [--local_file_path "resources/commercial_mono.wav"] + +# [START speech_transcribe_auto_punctuation_beta] +from google.cloud import speech_v1p1beta1 +import io + + +def sample_recognize(local_file_path): + """ + Transcribe a short audio file with punctuation + + Args: + local_file_path Path to local audio file, e.g. /path/audio.wav + """ + + client = speech_v1p1beta1.SpeechClient() + + # local_file_path = 'resources/commercial_mono.wav' + + # When enabled, trascription results may include punctuation + # (available for select languages). + enable_automatic_punctuation = True + + # The language of the supplied audio. Even though additional languages are + # provided by alternative_language_codes, a primary language is still required. + language_code = "en-US" + config = { + "enable_automatic_punctuation": enable_automatic_punctuation, + "language_code": language_code, + } + with io.open(local_file_path, "rb") as f: + content = f.read() + audio = {"content": content} + + response = client.recognize(config, audio) + for result in response.results: + # First alternative is the most probable result + alternative = result.alternatives[0] + print(u"Transcript: {}".format(alternative.transcript)) + + +# [END speech_transcribe_auto_punctuation_beta] + + +def main(): + import argparse + + parser = argparse.ArgumentParser() + parser.add_argument( + "--local_file_path", type=str, default="resources/commercial_mono.wav" + ) + args = parser.parse_args() + + sample_recognize(args.local_file_path) + + +if __name__ == "__main__": + main() diff --git a/packages/google-cloud-speech/samples/v1p1beta1/speech_transcribe_diarization_beta.py b/packages/google-cloud-speech/samples/v1p1beta1/speech_transcribe_diarization_beta.py new file mode 100644 index 000000000000..a1fd633b6909 --- /dev/null +++ b/packages/google-cloud-speech/samples/v1p1beta1/speech_transcribe_diarization_beta.py @@ -0,0 +1,97 @@ +# -*- coding: utf-8 -*- +# +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# DO NOT EDIT! This is a generated sample ("LongRunningPromise", "speech_transcribe_diarization_beta") + +# To install the latest published package dependency, execute the following: +# pip install google-cloud-speech + +# sample-metadata +# title: Separating different speakers (Local File) (LRO) (Beta) +# description: Print confidence level for individual words in a transcription of a short audio +# file +# Separating different speakers in an audio file recording +# usage: python3 samples/v1p1beta1/speech_transcribe_diarization_beta.py [--local_file_path "resources/commercial_mono.wav"] + +# [START speech_transcribe_diarization_beta] +from google.cloud import speech_v1p1beta1 +import io + + +def sample_long_running_recognize(local_file_path): + """ + Print confidence level for individual words in a transcription of a short audio + file + Separating different speakers in an audio file recording + + Args: + local_file_path Path to local audio file, e.g. /path/audio.wav + """ + + client = speech_v1p1beta1.SpeechClient() + + # local_file_path = 'resources/commercial_mono.wav' + + # If enabled, each word in the first alternative of each result will be + # tagged with a speaker tag to identify the speaker. + enable_speaker_diarization = True + + # Optional. Specifies the estimated number of speakers in the conversation. + diarization_speaker_count = 2 + + # The language of the supplied audio + language_code = "en-US" + config = { + "enable_speaker_diarization": enable_speaker_diarization, + "diarization_speaker_count": diarization_speaker_count, + "language_code": language_code, + } + with io.open(local_file_path, "rb") as f: + content = f.read() + audio = {"content": content} + + operation = client.long_running_recognize(config, audio) + + print(u"Waiting for operation to complete...") + response = operation.result() + + for result in response.results: + # First alternative has words tagged with speakers + alternative = result.alternatives[0] + print(u"Transcript: {}".format(alternative.transcript)) + # Print the speaker_tag of each word + for word in alternative.words: + print(u"Word: {}".format(word.word)) + print(u"Speaker tag: {}".format(word.speaker_tag)) + + +# [END speech_transcribe_diarization_beta] + + +def main(): + import argparse + + parser = argparse.ArgumentParser() + parser.add_argument( + "--local_file_path", type=str, default="resources/commercial_mono.wav" + ) + args = parser.parse_args() + + sample_long_running_recognize(args.local_file_path) + + +if __name__ == "__main__": + main() diff --git a/packages/google-cloud-speech/samples/v1p1beta1/speech_transcribe_multilanguage_beta.py b/packages/google-cloud-speech/samples/v1p1beta1/speech_transcribe_multilanguage_beta.py new file mode 100644 index 000000000000..94a5fad8b365 --- /dev/null +++ b/packages/google-cloud-speech/samples/v1p1beta1/speech_transcribe_multilanguage_beta.py @@ -0,0 +1,91 @@ +# -*- coding: utf-8 -*- +# +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# DO NOT EDIT! This is a generated sample ("Request", "speech_transcribe_multilanguage_beta") + +# To install the latest published package dependency, execute the following: +# pip install google-cloud-speech + +# sample-metadata +# title: Detecting language spoken automatically (Local File) (Beta) +# description: Transcribe a short audio file with language detected from a list of possible +# languages +# usage: python3 samples/v1p1beta1/speech_transcribe_multilanguage_beta.py [--local_file_path "resources/brooklyn_bridge.flac"] + +# [START speech_transcribe_multilanguage_beta] +from google.cloud import speech_v1p1beta1 +import io + + +def sample_recognize(local_file_path): + """ + Transcribe a short audio file with language detected from a list of possible + languages + + Args: + local_file_path Path to local audio file, e.g. /path/audio.wav + """ + + client = speech_v1p1beta1.SpeechClient() + + # local_file_path = 'resources/brooklyn_bridge.flac' + + # The language of the supplied audio. Even though additional languages are + # provided by alternative_language_codes, a primary language is still required. + language_code = "fr" + + # Specify up to 3 additional languages as possible alternative languages + # of the supplied audio. + alternative_language_codes_element = "es" + alternative_language_codes_element_2 = "en" + alternative_language_codes = [ + alternative_language_codes_element, + alternative_language_codes_element_2, + ] + config = { + "language_code": language_code, + "alternative_language_codes": alternative_language_codes, + } + with io.open(local_file_path, "rb") as f: + content = f.read() + audio = {"content": content} + + response = client.recognize(config, audio) + for result in response.results: + # The language_code which was detected as the most likely being spoken in the audio + print(u"Detected language: {}".format(result.language_code)) + # First alternative is the most probable result + alternative = result.alternatives[0] + print(u"Transcript: {}".format(alternative.transcript)) + + +# [END speech_transcribe_multilanguage_beta] + + +def main(): + import argparse + + parser = argparse.ArgumentParser() + parser.add_argument( + "--local_file_path", type=str, default="resources/brooklyn_bridge.flac" + ) + args = parser.parse_args() + + sample_recognize(args.local_file_path) + + +if __name__ == "__main__": + main() diff --git a/packages/google-cloud-speech/samples/v1p1beta1/speech_transcribe_recognition_metadata_beta.py b/packages/google-cloud-speech/samples/v1p1beta1/speech_transcribe_recognition_metadata_beta.py new file mode 100644 index 000000000000..4168acacc0dc --- /dev/null +++ b/packages/google-cloud-speech/samples/v1p1beta1/speech_transcribe_recognition_metadata_beta.py @@ -0,0 +1,92 @@ +# -*- coding: utf-8 -*- +# +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# DO NOT EDIT! This is a generated sample ("Request", "speech_transcribe_recognition_metadata_beta") + +# To install the latest published package dependency, execute the following: +# pip install google-cloud-speech + +# sample-metadata +# title: Adding recognition metadata (Local File) (Beta) +# description: Adds additional details short audio file included in this recognition request +# usage: python3 samples/v1p1beta1/speech_transcribe_recognition_metadata_beta.py [--local_file_path "resources/commercial_mono.wav"] + +# [START speech_transcribe_recognition_metadata_beta] +from google.cloud import speech_v1p1beta1 +from google.cloud.speech_v1p1beta1 import enums +import io + + +def sample_recognize(local_file_path): + """ + Adds additional details short audio file included in this recognition request + + Args: + local_file_path Path to local audio file, e.g. /path/audio.wav + """ + + client = speech_v1p1beta1.SpeechClient() + + # local_file_path = 'resources/commercial_mono.wav' + + # The use case of the audio, e.g. PHONE_CALL, DISCUSSION, PRESENTATION, et al. + interaction_type = enums.RecognitionMetadata.InteractionType.VOICE_SEARCH + + # The kind of device used to capture the audio + recording_device_type = enums.RecognitionMetadata.RecordingDeviceType.SMARTPHONE + + # The device used to make the recording. + # Arbitrary string, e.g. 'Pixel XL', 'VoIP', 'Cardioid Microphone', or other + # value. + recording_device_name = "Pixel 3" + metadata = { + "interaction_type": interaction_type, + "recording_device_type": recording_device_type, + "recording_device_name": recording_device_name, + } + + # The language of the supplied audio. Even though additional languages are + # provided by alternative_language_codes, a primary language is still required. + language_code = "en-US" + config = {"metadata": metadata, "language_code": language_code} + with io.open(local_file_path, "rb") as f: + content = f.read() + audio = {"content": content} + + response = client.recognize(config, audio) + for result in response.results: + # First alternative is the most probable result + alternative = result.alternatives[0] + print(u"Transcript: {}".format(alternative.transcript)) + + +# [END speech_transcribe_recognition_metadata_beta] + + +def main(): + import argparse + + parser = argparse.ArgumentParser() + parser.add_argument( + "--local_file_path", type=str, default="resources/commercial_mono.wav" + ) + args = parser.parse_args() + + sample_recognize(args.local_file_path) + + +if __name__ == "__main__": + main() diff --git a/packages/google-cloud-speech/samples/v1p1beta1/speech_transcribe_word_level_confidence_beta.py b/packages/google-cloud-speech/samples/v1p1beta1/speech_transcribe_word_level_confidence_beta.py new file mode 100644 index 000000000000..85cdfbb26a90 --- /dev/null +++ b/packages/google-cloud-speech/samples/v1p1beta1/speech_transcribe_word_level_confidence_beta.py @@ -0,0 +1,88 @@ +# -*- coding: utf-8 -*- +# +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# DO NOT EDIT! This is a generated sample ("Request", "speech_transcribe_word_level_confidence_beta") + +# To install the latest published package dependency, execute the following: +# pip install google-cloud-speech + +# sample-metadata +# title: Enabling word-level confidence (Local File) (Beta) +# description: Print confidence level for individual words in a transcription of a short audio +# file. +# usage: python3 samples/v1p1beta1/speech_transcribe_word_level_confidence_beta.py [--local_file_path "resources/brooklyn_bridge.flac"] + +# [START speech_transcribe_word_level_confidence_beta] +from google.cloud import speech_v1p1beta1 +import io + + +def sample_recognize(local_file_path): + """ + Print confidence level for individual words in a transcription of a short audio + file. + + Args: + local_file_path Path to local audio file, e.g. /path/audio.wav + """ + + client = speech_v1p1beta1.SpeechClient() + + # local_file_path = 'resources/brooklyn_bridge.flac' + + # When enabled, the first result returned by the API will include a list + # of words and the confidence level for each of those words. + enable_word_confidence = True + + # The language of the supplied audio + language_code = "en-US" + config = { + "enable_word_confidence": enable_word_confidence, + "language_code": language_code, + } + with io.open(local_file_path, "rb") as f: + content = f.read() + audio = {"content": content} + + response = client.recognize(config, audio) + # The first result includes confidence levels per word + result = response.results[0] + # First alternative is the most probable result + alternative = result.alternatives[0] + print(u"Transcript: {}".format(alternative.transcript)) + # Print the confidence level of each word + for word in alternative.words: + print(u"Word: {}".format(word.word)) + print(u"Confidence: {}".format(word.confidence)) + + +# [END speech_transcribe_word_level_confidence_beta] + + +def main(): + import argparse + + parser = argparse.ArgumentParser() + parser.add_argument( + "--local_file_path", type=str, default="resources/brooklyn_bridge.flac" + ) + args = parser.parse_args() + + sample_recognize(args.local_file_path) + + +if __name__ == "__main__": + main() diff --git a/packages/google-cloud-speech/samples/v1p1beta1/test/samples.manifest.yaml b/packages/google-cloud-speech/samples/v1p1beta1/test/samples.manifest.yaml new file mode 100644 index 000000000000..787e335e7aa4 --- /dev/null +++ b/packages/google-cloud-speech/samples/v1p1beta1/test/samples.manifest.yaml @@ -0,0 +1,32 @@ +type: manifest/samples +schema_version: 3 +base: &common + env: 'python' + bin: 'python3' + chdir: '{@manifest_dir}/../..' + basepath: '.' +samples: +- <<: *common + path: '{basepath}/v1p1beta1/speech_adaptation_beta.py' + sample: 'speech_adaptation_beta' +- <<: *common + path: '{basepath}/v1p1beta1/speech_contexts_classes_beta.py' + sample: 'speech_contexts_classes_beta' +- <<: *common + path: '{basepath}/v1p1beta1/speech_quickstart_beta.py' + sample: 'speech_quickstart_beta' +- <<: *common + path: '{basepath}/v1p1beta1/speech_transcribe_auto_punctuation_beta.py' + sample: 'speech_transcribe_auto_punctuation_beta' +- <<: *common + path: '{basepath}/v1p1beta1/speech_transcribe_diarization_beta.py' + sample: 'speech_transcribe_diarization_beta' +- <<: *common + path: '{basepath}/v1p1beta1/speech_transcribe_multilanguage_beta.py' + sample: 'speech_transcribe_multilanguage_beta' +- <<: *common + path: '{basepath}/v1p1beta1/speech_transcribe_recognition_metadata_beta.py' + sample: 'speech_transcribe_recognition_metadata_beta' +- <<: *common + path: '{basepath}/v1p1beta1/speech_transcribe_word_level_confidence_beta.py' + sample: 'speech_transcribe_word_level_confidence_beta' diff --git a/packages/google-cloud-speech/samples/v1p1beta1/test/speech_adaptation_beta.test.yaml b/packages/google-cloud-speech/samples/v1p1beta1/test/speech_adaptation_beta.test.yaml new file mode 100644 index 000000000000..4efe8e83fc32 --- /dev/null +++ b/packages/google-cloud-speech/samples/v1p1beta1/test/speech_adaptation_beta.test.yaml @@ -0,0 +1,11 @@ +type: test/samples +schema_version: 1 +test: + suites: + - name: Speech-to-Text Sample Tests For Speech Adaptation + cases: + - name: speech_adaptation_beta + spec: + - call: {sample: speech_adaptation_beta} + - assert_contains: + - literal: "how old is the Brooklyn Bridge" diff --git a/packages/google-cloud-speech/samples/v1p1beta1/test/speech_contexts_classes_beta.test.yaml b/packages/google-cloud-speech/samples/v1p1beta1/test/speech_contexts_classes_beta.test.yaml new file mode 100644 index 000000000000..b6dccfc71ff6 --- /dev/null +++ b/packages/google-cloud-speech/samples/v1p1beta1/test/speech_contexts_classes_beta.test.yaml @@ -0,0 +1,11 @@ +type: test/samples +schema_version: 1 +test: + suites: + - name: Speech-to-Text Sample Tests For Speech Contexts Static Classes + cases: + - name: speech_contexts_classes_beta + spec: + - call: {sample: speech_contexts_classes_beta} + - assert_contains: + - literal: "the time is 5:45 p.m." diff --git a/packages/google-cloud-speech/samples/v1p1beta1/test/speech_quickstart_beta.test.yaml b/packages/google-cloud-speech/samples/v1p1beta1/test/speech_quickstart_beta.test.yaml new file mode 100644 index 000000000000..bd5bf6700a82 --- /dev/null +++ b/packages/google-cloud-speech/samples/v1p1beta1/test/speech_quickstart_beta.test.yaml @@ -0,0 +1,11 @@ +type: test/samples +schema_version: 1 +test: + suites: + - name: Speech-to-Text Sample Tests For Quickstart + cases: + - name: speech_quickstart_beta + spec: + - call: {sample: speech_quickstart_beta} + - assert_contains: + - literal: "how old is the Brooklyn Bridge" diff --git a/packages/google-cloud-speech/samples/v1p1beta1/test/speech_transcribe_auto_punctuation_beta.test.yaml b/packages/google-cloud-speech/samples/v1p1beta1/test/speech_transcribe_auto_punctuation_beta.test.yaml new file mode 100644 index 000000000000..1ab5f79a0f47 --- /dev/null +++ b/packages/google-cloud-speech/samples/v1p1beta1/test/speech_transcribe_auto_punctuation_beta.test.yaml @@ -0,0 +1,28 @@ +type: test/samples +schema_version: 1 +test: + suites: + - name: Getting punctuation in results (Local File) (Beta) + cases: + + # This sample should default to using commercial_mono.wav + - name: speech_transcribe_auto_punctuation_beta (no arguments) + spec: + - call: + sample: speech_transcribe_auto_punctuation_beta + - assert_contains: + # Simply assert that actual punctuation is present from commercial_mono.wav + - literal: "?" + - literal: "," + - literal: "" + + # Confirm that another file can be transcribed (use another file) + - name: speech_transcribe_auto_punctuation_beta (--local_file_path) + spec: + - call: + sample: speech_transcribe_auto_punctuation_beta + params: + local_file_path: + literal: "resources/brooklyn_bridge.flac" + - assert_contains: + - literal: "How old is the Brooklyn Bridge?" diff --git a/packages/google-cloud-speech/samples/v1p1beta1/test/speech_transcribe_diarization_beta.test.yaml b/packages/google-cloud-speech/samples/v1p1beta1/test/speech_transcribe_diarization_beta.test.yaml new file mode 100644 index 000000000000..409e4b54b66f --- /dev/null +++ b/packages/google-cloud-speech/samples/v1p1beta1/test/speech_transcribe_diarization_beta.test.yaml @@ -0,0 +1,40 @@ +type: test/samples +schema_version: 1 +test: + suites: + - name: Separating different speakers (Local File) (LRO) (Beta) + cases: + + # This sample should default to using commercial_mono.wav + - name: speech_transcribe_diarization_beta (no arguments) + spec: + - call: + sample: speech_transcribe_diarization_beta + - assert_contains: + - literal: "Word: Chrome" + # The identifier given to each speaker is non-deterministic. + # For two speakers, it can be 0 and 1, 0 and 2, or other variations. + # + # The example audio file has two speakers, but this test is + # not verifying that greater than one speaker is detected. + - literal: "Speaker tag:" + + # Confirm that another file can be transcribed (use another .flac file) + - name: speech_transcribe_diarization_beta (--local_file_path) + spec: + - call: + sample: speech_transcribe_diarization_beta + params: + local_file_path: + literal: "resources/multi.flac" + - assert_contains: + - literal: "how are you doing" + - literal: "Word: doing" + # The identifier given to each speaker is non-deterministic. + # For two speakers, it can be 0 and 1, 0 and 2, or other variations. + # + # The example audio file has two speakers, but this test is + # not verifying that greater than one speaker is detected. + - literal: "Speaker tag:" + - assert_not_contains: + - literal: "Chrome" diff --git a/packages/google-cloud-speech/samples/v1p1beta1/test/speech_transcribe_multilanguage_beta.test.yaml b/packages/google-cloud-speech/samples/v1p1beta1/test/speech_transcribe_multilanguage_beta.test.yaml new file mode 100644 index 000000000000..d9f2d71093cd --- /dev/null +++ b/packages/google-cloud-speech/samples/v1p1beta1/test/speech_transcribe_multilanguage_beta.test.yaml @@ -0,0 +1,33 @@ +type: test/samples +schema_version: 1 +test: + suites: + - name: Detecting language spoken automatically (Local File) (Beta) + cases: + + # This sample should default to using brooklyn_bridge.flac + - name: speech_transcribe_multilanguage_beta (no arguments) + spec: + - call: + sample: speech_transcribe_multilanguage_beta + - assert_contains: + - literal: "how old is the Brooklyn Bridge" + # Note: the primary language_code given was not English, but + # English was provided in the list of alternative_language_codes + - literal: "Detected language: en-us" + + # Confirm that another file can be transcribed (use another .flac file) + - name: speech_transcribe_multilanguage_beta (--local_file_path) + spec: + - call: + sample: speech_transcribe_multilanguage_beta + params: + local_file_path: + literal: "resources/multi.flac" + - assert_contains: + - literal: "how are you doing" + # Note: the primary language_code given was not English, but + # English was provided in the list of alternative_language_codes + - literal: "Detected language: en-us" + - assert_not_contains: + - literal: "how old is the Brooklyn Bridge" diff --git a/packages/google-cloud-speech/samples/v1p1beta1/test/speech_transcribe_recognition_metadata_beta.test.yaml b/packages/google-cloud-speech/samples/v1p1beta1/test/speech_transcribe_recognition_metadata_beta.test.yaml new file mode 100644 index 000000000000..57cf24a1d261 --- /dev/null +++ b/packages/google-cloud-speech/samples/v1p1beta1/test/speech_transcribe_recognition_metadata_beta.test.yaml @@ -0,0 +1,27 @@ +type: test/samples +schema_version: 1 +test: + suites: + - name: Adding recognition metadata (Local File) (Beta) + cases: + + # This sample should default to using commercial_mono.wav + - name: speech_transcribe_recognition_metadata_beta (no arguments) + spec: + - call: + sample: speech_transcribe_recognition_metadata_beta + - assert_contains: + - literal: "Chrome" + + # Confirm that another file can be transcribed (use another file) + - name: speech_transcribe_recognition_metadata_beta (--local_file_path) + spec: + - call: + sample: speech_transcribe_recognition_metadata_beta + params: + local_file_path: + literal: "resources/brooklyn_bridge.flac" + - assert_contains: + - literal: "how old is the Brooklyn Bridge" + - assert_not_contains: + - literal: "Chrome" diff --git a/packages/google-cloud-speech/samples/v1p1beta1/test/speech_transcribe_word_level_confidence_beta.test.yaml b/packages/google-cloud-speech/samples/v1p1beta1/test/speech_transcribe_word_level_confidence_beta.test.yaml new file mode 100644 index 000000000000..0d7d60570a01 --- /dev/null +++ b/packages/google-cloud-speech/samples/v1p1beta1/test/speech_transcribe_word_level_confidence_beta.test.yaml @@ -0,0 +1,35 @@ +type: test/samples +schema_version: 1 +test: + suites: + - name: Enabling word-level confidence (Local File) (Beta) + cases: + + # This sample should default to using brooklyn_bridge.flac + - name: speech_transcribe_word_level_confidence_beta (no arguments) + spec: + - call: + sample: speech_transcribe_word_level_confidence_beta + - assert_contains: + - literal: "how old is the Brooklyn Bridge" + - literal: "Word: how" + - literal: "Word: old" + - literal: "Word: is" + - literal: "Confidence: 0." + + # Confirm that another file can be transcribed (use another .flac file) + - name: speech_transcribe_word_level_confidence_beta (--local_file_path) + spec: + - call: + sample: speech_transcribe_word_level_confidence_beta + params: + local_file_path: + literal: "resources/multi.flac" + - assert_contains: + - literal: "how are you doing" + - literal: "Word: how" + - literal: "Word: are" + - literal: "Word: you" + - literal: "Confidence: 0." + - assert_not_contains: + - literal: "how old is the Brooklyn Bridge" diff --git a/packages/google-cloud-speech/synth.metadata b/packages/google-cloud-speech/synth.metadata index 78f193e56bfd..da81eb6a3ae6 100644 --- a/packages/google-cloud-speech/synth.metadata +++ b/packages/google-cloud-speech/synth.metadata @@ -1,19 +1,11 @@ { - "updateTime": "2019-08-06T16:44:05.691252Z", + "updateTime": "2019-08-29T22:41:20.931044Z", "sources": [ { "generator": { "name": "artman", - "version": "0.32.1", - "dockerImage": "googleapis/artman@sha256:a684d40ba9a4e15946f5f2ca6b4bd9fe301192f522e9de4fff622118775f309b" - } - }, - { - "git": { - "name": "googleapis", - "remote": "https://github.com/googleapis/googleapis.git", - "sha": "e699b0cba64ffddfae39633417180f1f65875896", - "internalRef": "261759677" + "version": "0.35.1", + "dockerImage": "googleapis/artman@sha256:b11c7ea0d0831c54016fb50f4b796d24d1971439b30fbc32a369ba1ac887c384" } }, { diff --git a/packages/google-cloud-speech/synth.py b/packages/google-cloud-speech/synth.py index 20a617c82c9b..dba406260300 100644 --- a/packages/google-cloud-speech/synth.py +++ b/packages/google-cloud-speech/synth.py @@ -28,7 +28,12 @@ # Generate speech GAPIC layer # ---------------------------------------------------------------------------- for version in versions: - library = gapic.py_library("speech", version, include_protos=True) + library = gapic.py_library( + "speech", + version, + include_protos=True, + include_samples=True + ) # Don't move over __init__.py, as we modify it to make the generated client # use helpers.py. @@ -37,6 +42,7 @@ s.move(library / f"google/cloud/speech_{version}/proto") s.move(library / f"tests/unit/gapic/{version}") s.move(library / f"docs/gapic/{version}") + s.move(library / f"samples") # Use the highest version library to generate documentation import alias.