From f90b7ee7340c9f37cdd49e250c5597f4eb51c90a Mon Sep 17 00:00:00 2001 From: Kadir Pekel Date: Wed, 18 Dec 2024 16:00:03 +0100 Subject: [PATCH 1/5] ENG-1235 Utility functions has now special treatment as they need dynamic param population from node spec --- aixplain/modules/pipeline/designer/nodes.py | 81 +++++++++++++-------- 1 file changed, 52 insertions(+), 29 deletions(-) diff --git a/aixplain/modules/pipeline/designer/nodes.py b/aixplain/modules/pipeline/designer/nodes.py index 70ff302f..af07026b 100644 --- a/aixplain/modules/pipeline/designer/nodes.py +++ b/aixplain/modules/pipeline/designer/nodes.py @@ -1,15 +1,9 @@ from typing import List, Union, Type, TYPE_CHECKING, Optional from aixplain.modules import Model -from aixplain.enums import DataType - -from .enums import ( - NodeType, - FunctionType, - RouteType, - Operation, - AssetType, -) +from aixplain.enums import DataType, Function + +from .enums import NodeType, FunctionType, RouteType, Operation, AssetType from .base import ( Node, Link, @@ -85,7 +79,9 @@ def populate_asset(self): if self.function: if self.asset.function.value != self.function: - raise ValueError(f"Function {self.function} is not supported by asset {self.asset_id}") # noqa + raise ValueError( + f"Function {self.function} is not supported by asset {self.asset_id}" + ) else: self.function = self.asset.function.value self._auto_populate_params() @@ -95,19 +91,31 @@ def populate_asset(self): def _auto_populate_params(self): from aixplain.enums.function import FunctionInputOutput - spec = FunctionInputOutput[self.asset.function.value]["spec"] - for item in spec["params"]: - self.inputs.create_param( - code=item["code"], - data_type=item["dataType"], - is_required=item["required"], - ) - - for item in spec["output"]: - self.outputs.create_param( - code=item["code"], - data_type=item["dataType"], - ) + if self.function == Function.UTILITIES: + for param in self.asset.input_params.values(): + self.inputs.create_param( + code=param["name"], + data_type=param["dataType"], + is_required=param["required"], + ) + for param in self.asset.output_params.values(): + self.outputs.create_param( + code=param["name"], data_type=param["dataType"] + ) + else: + spec = FunctionInputOutput[self.function]["spec"] + for item in spec["params"]: + self.inputs.create_param( + code=item["code"], + data_type=item["dataType"], + is_required=item["required"], + ) + + for item in spec["output"]: + self.outputs.create_param( + code=item["code"], + data_type=item["dataType"], + ) def _auto_set_params(self): for k, v in self.asset.additional_info["parameters"].items(): @@ -217,7 +225,12 @@ class Output(Node[OutputInputs, OutputOutputs]): inputs_class: Type[TI] = OutputInputs outputs_class: Type[TO] = OutputOutputs - def __init__(self, data_types: Optional[List[DataType]] = None, pipeline: "DesignerPipeline" = None, **kwargs): + def __init__( + self, + data_types: Optional[List[DataType]] = None, + pipeline: "DesignerPipeline" = None, + **kwargs + ): super().__init__(pipeline=pipeline, **kwargs) self.data_types = data_types or [] @@ -278,7 +291,14 @@ class Route(Serializable): operation: Operation type: RouteType - def __init__(self, value: DataType, path: List[Union[Node, int]], operation: Operation, type: RouteType, **kwargs): + def __init__( + self, + value: DataType, + path: List[Union[Node, int]], + operation: Operation, + type: RouteType, + **kwargs + ): """ Post init method to convert the nodes to node numbers if they are nodes. @@ -294,8 +314,7 @@ def __init__(self, value: DataType, path: List[Union[Node, int]], operation: Ope # convert nodes to node numbers if they are nodes self.path = [ - node.number if isinstance(node, Node) else node - for node in self.path + node.number if isinstance(node, Node) else node for node in self.path ] def serialize(self) -> dict: @@ -334,7 +353,9 @@ class Router(Node[RouterInputs, RouterOutputs], LinkableMixin): inputs_class: Type[TI] = RouterInputs outputs_class: Type[TO] = RouterOutputs - def __init__(self, routes: List[Route], pipeline: "DesignerPipeline" = None, **kwargs): + def __init__( + self, routes: List[Route], pipeline: "DesignerPipeline" = None, **kwargs + ): super().__init__(pipeline=pipeline, **kwargs) self.routes = routes @@ -373,7 +394,9 @@ class Decision(Node[DecisionInputs, DecisionOutputs], LinkableMixin): inputs_class: Type[TI] = DecisionInputs outputs_class: Type[TO] = DecisionOutputs - def __init__(self, routes: List[Route], pipeline: "DesignerPipeline" = None, **kwargs): + def __init__( + self, routes: List[Route], pipeline: "DesignerPipeline" = None, **kwargs + ): super().__init__(pipeline=pipeline, **kwargs) self.routes = routes From 6631c86b647baca65577be49393bed0c12cf2fd4 Mon Sep 17 00:00:00 2001 From: Thiago Castro Ferreira Date: Thu, 19 Dec 2024 13:38:30 -0300 Subject: [PATCH 2/5] Setting default values as parameters --- aixplain/factories/model_factory/utils.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/aixplain/factories/model_factory/utils.py b/aixplain/factories/model_factory/utils.py index 5a8d1503..daa1f0db 100644 --- a/aixplain/factories/model_factory/utils.py +++ b/aixplain/factories/model_factory/utils.py @@ -29,6 +29,10 @@ def create_model_from_response(response: Dict) -> Model: for param in response["params"]: if "language" in param["name"]: parameters[param["name"]] = [w["value"] for w in param["values"]] + else: + values = [w["value"] for w in param["defaultValues"]] + if len(values) > 0: + parameters[param["name"]] = values function_id = response["function"]["id"] function = Function(function_id) From 2c6007f370985b19448814aeb7c97f876dcbc4e5 Mon Sep 17 00:00:00 2001 From: Kadir Pekel Date: Thu, 19 Dec 2024 18:13:43 +0100 Subject: [PATCH 3/5] ENG-1235 Utility functions now has a special pipeline method called --- aixplain/modules/pipeline/designer/nodes.py | 18 +- .../modules/pipeline/designer/pipeline.py | 48 +- aixplain/modules/pipeline/generate.py | 14 +- aixplain/modules/pipeline/pipeline.py | 4544 ++++++++--------- 4 files changed, 2278 insertions(+), 2346 deletions(-) diff --git a/aixplain/modules/pipeline/designer/nodes.py b/aixplain/modules/pipeline/designer/nodes.py index af07026b..8be8350b 100644 --- a/aixplain/modules/pipeline/designer/nodes.py +++ b/aixplain/modules/pipeline/designer/nodes.py @@ -82,6 +82,12 @@ def populate_asset(self): raise ValueError( f"Function {self.function} is not supported by asset {self.asset_id}" ) + + # Despite function field has been set, we should still dynamically + # populate parameters for Utility functions + if self.function == Function.UTILITIES: + self._auto_populate_params() + else: self.function = self.asset.function.value self._auto_populate_params() @@ -91,6 +97,8 @@ def populate_asset(self): def _auto_populate_params(self): from aixplain.enums.function import FunctionInputOutput + # When the node is a utility, we need to create it's parameters + # dynamically by referring the node data. if self.function == Function.UTILITIES: for param in self.asset.input_params.values(): self.inputs.create_param( @@ -100,7 +108,7 @@ def _auto_populate_params(self): ) for param in self.asset.output_params.values(): self.outputs.create_param( - code=param["name"], data_type=param["dataType"] + code=param["code"], data_type=param["dataType"] ) else: spec = FunctionInputOutput[self.function]["spec"] @@ -119,6 +127,9 @@ def _auto_populate_params(self): def _auto_set_params(self): for k, v in self.asset.additional_info["parameters"].items(): + if k not in self.inputs: + continue + if isinstance(v, list): self.inputs[k] = v[0] else: @@ -148,6 +159,11 @@ class BareAsset(AssetNode[BareAssetInputs, BareAssetOutputs]): pass +class Utility(AssetNode[BareAssetInputs, BareAssetOutputs]): + + function = "utilities" + + class InputInputs(Inputs): pass diff --git a/aixplain/modules/pipeline/designer/pipeline.py b/aixplain/modules/pipeline/designer/pipeline.py index 79013590..ed12016c 100644 --- a/aixplain/modules/pipeline/designer/pipeline.py +++ b/aixplain/modules/pipeline/designer/pipeline.py @@ -3,7 +3,19 @@ from aixplain.enums import DataType from .base import Serializable, Node, Link -from .nodes import AssetNode, Decision, Script, Input, Output, Router, Route, BareReconstructor, BareSegmentor, BareMetric +from .nodes import ( + AssetNode, + Utility, + Decision, + Script, + Input, + Output, + Router, + Route, + BareReconstructor, + BareSegmentor, + BareMetric, +) from .enums import NodeType, RouteType, Operation from .mixins import OutputableMixin from .utils import find_prompt_params @@ -141,7 +153,9 @@ def special_prompt_validation(self, node: Node): node.inputs.text.is_required = False for match in matches: if match not in node.inputs: - raise ValueError(f"Param {match} of node {node.label} should be defined and set") + raise ValueError( + f"Param {match} of node {node.label} should be defined and set" + ) def validate_params(self): """ @@ -153,7 +167,9 @@ def validate_params(self): self.special_prompt_validation(node) for param in node.inputs: if param.is_required and not self.is_param_set(node, param): - raise ValueError(f"Param {param.code} of node {node.label} is required") + raise ValueError( + f"Param {param.code} of node {node.label} is required" + ) def validate(self): """ @@ -179,7 +195,11 @@ def get_link(self, from_node: int, to_node: int) -> Link: :return: the link """ return next( - (link for link in self.links if link.from_node == from_node and link.to_node == to_node), + ( + link + for link in self.links + if link.from_node == from_node and link.to_node == to_node + ), None, ) @@ -225,7 +245,9 @@ def infer_data_type(node): infer_data_type(self) infer_data_type(to_node) - def asset(self, asset_id: str, *args, asset_class: Type[T] = AssetNode, **kwargs) -> T: + def asset( + self, asset_id: str, *args, asset_class: Type[T] = AssetNode, **kwargs + ) -> T: """ Shortcut to create an asset node for the current pipeline. All params will be passed as keyword arguments to the node @@ -236,6 +258,22 @@ def asset(self, asset_id: str, *args, asset_class: Type[T] = AssetNode, **kwargs """ return asset_class(asset_id, *args, pipeline=self, **kwargs) + def utility( + self, asset_id: str, *args, asset_class: Type[T] = Utility, **kwargs + ) -> T: + """ + Shortcut to create an asset node for the current pipeline. + All params will be passed as keyword arguments to the node + constructor. + + :param kwargs: keyword arguments + :return: the node + """ + if not issubclass(asset_class, Utility): + raise ValueError("`asset_class` should be a subclass of `Utility` class") + + return asset_class(asset_id, *args, pipeline=self, **kwargs) + def decision(self, *args, **kwargs) -> Decision: """ Shortcut to create an decision node for the current pipeline. diff --git a/aixplain/modules/pipeline/generate.py b/aixplain/modules/pipeline/generate.py index 8bfeecb3..eeb36412 100644 --- a/aixplain/modules/pipeline/generate.py +++ b/aixplain/modules/pipeline/generate.py @@ -5,6 +5,7 @@ from jinja2 import Environment, BaseLoader from aixplain.utils import config +from aixplain.enums import Function SEGMENTOR_FUNCTIONS = [ "split-on-linebreak", @@ -143,9 +144,16 @@ def populate_specs(functions: list): """ function_class_specs = [] for function in functions: + # Utility functions has dynamic input parameters so they are not + # subject to static class generation + if function["id"] == Function.UTILITIES: + continue + # slugify function name by trimming some special chars and # transforming it to snake case - function_name = function["id"].replace("-", "_").replace("(", "_").replace(")", "_") + function_name = ( + function["id"].replace("-", "_").replace("(", "_").replace(")", "_") + ) base_class = "AssetNode" is_segmentor = function["id"] in SEGMENTOR_FUNCTIONS is_reconstructor = function["id"] in RECONSTRUCTOR_FUNCTIONS @@ -153,7 +161,9 @@ def populate_specs(functions: list): base_class = "BaseSegmentor" elif is_reconstructor: base_class = "BaseReconstructor" - elif "metric" in function_name.split("_"): # noqa: Advise a better distinguisher please + elif "metric" in function_name.split( + "_" + ): # noqa: Advise a better distinguisher please base_class = "BaseMetric" spec = { diff --git a/aixplain/modules/pipeline/pipeline.py b/aixplain/modules/pipeline/pipeline.py index bf67ff15..27091770 100644 --- a/aixplain/modules/pipeline/pipeline.py +++ b/aixplain/modules/pipeline/pipeline.py @@ -4,7 +4,18 @@ from typing import Union, Type from aixplain.enums import DataType -from .designer import InputParam, OutputParam, Inputs, Outputs, TI, TO, AssetNode, BaseReconstructor, BaseSegmentor, BaseMetric +from .designer import ( + InputParam, + OutputParam, + Inputs, + Outputs, + TI, + TO, + AssetNode, + BaseReconstructor, + BaseSegmentor, + BaseMetric +) from .default import DefaultPipeline from aixplain.modules import asset @@ -27,14 +38,13 @@ def __init__(self, node=None): class ObjectDetection(AssetNode[ObjectDetectionInputs, ObjectDetectionOutputs]): """ - Object Detection is a computer vision technology that identifies and locates - objects within an image, typically by drawing bounding boxes around the - detected objects and classifying them into predefined categories. + Object Detection is a computer vision technology that identifies and locates +objects within an image, typically by drawing bounding boxes around the +detected objects and classifying them into predefined categories. - InputType: video - OutputType: text + InputType: video + OutputType: text """ - function: str = "object-detection" input_type: str = DataType.VIDEO output_type: str = DataType.TEXT @@ -61,13 +71,12 @@ def __init__(self, node=None): class LanguageIdentification(AssetNode[LanguageIdentificationInputs, LanguageIdentificationOutputs]): """ - Language Identification is the process of automatically determining the - language in which a given piece of text is written. + Detects the language in which a given text is written, aiding in multilingual +platforms or content localization. - InputType: text - OutputType: text + InputType: text + OutputType: text """ - function: str = "language-identification" input_type: str = DataType.TEXT output_type: str = DataType.TEXT @@ -76,17 +85,17 @@ class LanguageIdentification(AssetNode[LanguageIdentificationInputs, LanguageIde outputs_class: Type[TO] = LanguageIdentificationOutputs -class OcrInputs(Inputs): +class DepthEstimationInputs(Inputs): + language: InputParam = None image: InputParam = None - featuretypes: InputParam = None def __init__(self, node=None): super().__init__(node=node) - self.image = self.create_param(code="image", data_type=DataType.IMAGE, is_required=True) - self.featuretypes = self.create_param(code="featuretypes", data_type=DataType.TEXT, is_required=True) + self.language = self.create_param(code="language", data_type=DataType.LABEL, is_required=True) + self.image = self.create_param(code="image", data_type=DataType.IMAGE, is_required=False) -class OcrOutputs(Outputs): +class DepthEstimationOutputs(Outputs): data: OutputParam = None def __init__(self, node=None): @@ -94,23 +103,21 @@ def __init__(self, node=None): self.data = self.create_param(code="data", data_type=DataType.TEXT) -class Ocr(AssetNode[OcrInputs, OcrOutputs]): +class DepthEstimation(AssetNode[DepthEstimationInputs, DepthEstimationOutputs]): """ - OCR, or Optical Character Recognition, is a technology that converts different - types of documents, such as scanned paper documents, PDFs, or images captured - by a digital camera, into editable and searchable data by recognizing and - extracting text from the images. + Depth estimation is a computational process that determines the distance of +objects from a viewpoint, typically using visual data from cameras or sensors +to create a three-dimensional understanding of a scene. - InputType: image - OutputType: text + InputType: image + OutputType: text """ - - function: str = "ocr" + function: str = "depth-estimation" input_type: str = DataType.IMAGE output_type: str = DataType.TEXT - inputs_class: Type[TI] = OcrInputs - outputs_class: Type[TO] = OcrOutputs + inputs_class: Type[TI] = DepthEstimationInputs + outputs_class: Type[TO] = DepthEstimationOutputs class ScriptExecutionInputs(Inputs): @@ -131,14 +138,13 @@ def __init__(self, node=None): class ScriptExecution(AssetNode[ScriptExecutionInputs, ScriptExecutionOutputs]): """ - Script Execution refers to the process of running a set of programmed - instructions or code within a computing environment, enabling the automated - performance of tasks, calculations, or operations as defined by the script. + Script Execution refers to the process of running a set of programmed +instructions or code within a computing environment, enabling the automated +performance of tasks, calculations, or operations as defined by the script. - InputType: text - OutputType: text + InputType: text + OutputType: text """ - function: str = "script-execution" input_type: str = DataType.TEXT output_type: str = DataType.TEXT @@ -147,157 +153,157 @@ class ScriptExecution(AssetNode[ScriptExecutionInputs, ScriptExecutionOutputs]): outputs_class: Type[TO] = ScriptExecutionOutputs -class ImageLabelDetectionInputs(Inputs): +class ImageEmbeddingInputs(Inputs): + language: InputParam = None image: InputParam = None - min_confidence: InputParam = None def __init__(self, node=None): super().__init__(node=node) - self.image = self.create_param(code="image", data_type=DataType.IMAGE, is_required=True) - self.min_confidence = self.create_param(code="min_confidence", data_type=DataType.TEXT, is_required=False) + self.language = self.create_param(code="language", data_type=DataType.LABEL, is_required=True) + self.image = self.create_param(code="image", data_type=DataType.IMAGE, is_required=False) -class ImageLabelDetectionOutputs(Outputs): +class ImageEmbeddingOutputs(Outputs): data: OutputParam = None def __init__(self, node=None): super().__init__(node=node) - self.data = self.create_param(code="data", data_type=DataType.LABEL) + self.data = self.create_param(code="data", data_type=DataType.TEXT) -class ImageLabelDetection(AssetNode[ImageLabelDetectionInputs, ImageLabelDetectionOutputs]): +class ImageEmbedding(AssetNode[ImageEmbeddingInputs, ImageEmbeddingOutputs]): """ - Image Label Detection is a function that automatically identifies and assigns - descriptive tags or labels to objects, scenes, or elements within an image, - enabling easier categorization, search, and analysis of visual content. + Image Embedding is a process that transforms an image into a fixed-dimensional +vector representation, capturing its essential features and enabling efficient +comparison, retrieval, and analysis in various machine learning and computer +vision tasks. - InputType: image - OutputType: label + InputType: image + OutputType: text """ - - function: str = "image-label-detection" + function: str = "image-embedding" input_type: str = DataType.IMAGE - output_type: str = DataType.LABEL + output_type: str = DataType.TEXT - inputs_class: Type[TI] = ImageLabelDetectionInputs - outputs_class: Type[TO] = ImageLabelDetectionOutputs + inputs_class: Type[TI] = ImageEmbeddingInputs + outputs_class: Type[TO] = ImageEmbeddingOutputs -class ImageCaptioningInputs(Inputs): +class ImageToVideoGenerationInputs(Inputs): + language: InputParam = None image: InputParam = None def __init__(self, node=None): super().__init__(node=node) - self.image = self.create_param(code="image", data_type=DataType.IMAGE, is_required=True) + self.language = self.create_param(code="language", data_type=DataType.LABEL, is_required=True) + self.image = self.create_param(code="image", data_type=DataType.IMAGE, is_required=False) -class ImageCaptioningOutputs(Outputs): +class ImageToVideoGenerationOutputs(Outputs): data: OutputParam = None def __init__(self, node=None): super().__init__(node=node) - self.data = self.create_param(code="data", data_type=DataType.TEXT) + self.data = self.create_param(code="data", data_type=DataType.VIDEO) -class ImageCaptioning(AssetNode[ImageCaptioningInputs, ImageCaptioningOutputs]): +class ImageToVideoGeneration(AssetNode[ImageToVideoGenerationInputs, ImageToVideoGenerationOutputs]): """ - Image Captioning is a process that involves generating a textual description of - an image, typically using machine learning models to analyze the visual content - and produce coherent and contextually relevant sentences that describe the - objects, actions, and scenes depicted in the image. + The Image To Video Generation function transforms a series of static images +into a cohesive, dynamic video sequence, often incorporating transitions, +effects, and synchronization with audio to create a visually engaging +narrative. - InputType: image - OutputType: text + InputType: image + OutputType: video """ - - function: str = "image-captioning" + function: str = "image-to-video-generation" input_type: str = DataType.IMAGE - output_type: str = DataType.TEXT + output_type: str = DataType.VIDEO - inputs_class: Type[TI] = ImageCaptioningInputs - outputs_class: Type[TO] = ImageCaptioningOutputs + inputs_class: Type[TI] = ImageToVideoGenerationInputs + outputs_class: Type[TO] = ImageToVideoGenerationOutputs -class AudioLanguageIdentificationInputs(Inputs): - audio: InputParam = None +class ImageImpaintingInputs(Inputs): + image: InputParam = None def __init__(self, node=None): super().__init__(node=node) - self.audio = self.create_param(code="audio", data_type=DataType.AUDIO, is_required=True) + self.image = self.create_param(code="image", data_type=DataType.IMAGE, is_required=False) -class AudioLanguageIdentificationOutputs(Outputs): - data: OutputParam = None +class ImageImpaintingOutputs(Outputs): + image: OutputParam = None def __init__(self, node=None): super().__init__(node=node) - self.data = self.create_param(code="data", data_type=DataType.LABEL) + self.image = self.create_param(code="image", data_type=DataType.IMAGE) -class AudioLanguageIdentification(AssetNode[AudioLanguageIdentificationInputs, AudioLanguageIdentificationOutputs]): +class ImageImpainting(AssetNode[ImageImpaintingInputs, ImageImpaintingOutputs]): """ - Audio Language Identification is a process that involves analyzing an audio - recording to determine the language being spoken. + Image inpainting is a process that involves filling in missing or damaged parts +of an image in a way that is visually coherent and seamlessly blends with the +surrounding areas, often using advanced algorithms and techniques to restore +the image to its original or intended appearance. - InputType: audio - OutputType: label + InputType: image + OutputType: image """ + function: str = "image-impainting" + input_type: str = DataType.IMAGE + output_type: str = DataType.IMAGE - function: str = "audio-language-identification" - input_type: str = DataType.AUDIO - output_type: str = DataType.LABEL - - inputs_class: Type[TI] = AudioLanguageIdentificationInputs - outputs_class: Type[TO] = AudioLanguageIdentificationOutputs + inputs_class: Type[TI] = ImageImpaintingInputs + outputs_class: Type[TO] = ImageImpaintingOutputs -class AsrAgeClassificationInputs(Inputs): - source_audio: InputParam = None +class StyleTransferInputs(Inputs): + image: InputParam = None def __init__(self, node=None): super().__init__(node=node) - self.source_audio = self.create_param(code="source_audio", data_type=DataType.AUDIO, is_required=True) + self.image = self.create_param(code="image", data_type=DataType.IMAGE, is_required=False) -class AsrAgeClassificationOutputs(Outputs): - data: OutputParam = None +class StyleTransferOutputs(Outputs): + image: OutputParam = None def __init__(self, node=None): super().__init__(node=node) - self.data = self.create_param(code="data", data_type=DataType.LABEL) + self.image = self.create_param(code="image", data_type=DataType.IMAGE) -class AsrAgeClassification(AssetNode[AsrAgeClassificationInputs, AsrAgeClassificationOutputs]): +class StyleTransfer(AssetNode[StyleTransferInputs, StyleTransferOutputs]): """ - The ASR Age Classification function is designed to analyze audio recordings of - speech to determine the speaker's age group by leveraging automatic speech - recognition (ASR) technology and machine learning algorithms. + Style Transfer is a technique in artificial intelligence that applies the +visual style of one image (such as the brushstrokes of a famous painting) to +the content of another image, effectively blending the artistic elements of the +first image with the subject matter of the second. - InputType: audio - OutputType: label + InputType: image + OutputType: image """ + function: str = "style-transfer" + input_type: str = DataType.IMAGE + output_type: str = DataType.IMAGE - function: str = "asr-age-classification" - input_type: str = DataType.AUDIO - output_type: str = DataType.LABEL - - inputs_class: Type[TI] = AsrAgeClassificationInputs - outputs_class: Type[TO] = AsrAgeClassificationOutputs + inputs_class: Type[TI] = StyleTransferInputs + outputs_class: Type[TO] = StyleTransferOutputs -class BenchmarkScoringMtInputs(Inputs): - input: InputParam = None - text: InputParam = None +class MultiClassTextClassificationInputs(Inputs): + language: InputParam = None text: InputParam = None def __init__(self, node=None): super().__init__(node=node) - self.input = self.create_param(code="input", data_type=DataType.TEXT, is_required=True) - self.text = self.create_param(code="text", data_type=DataType.TEXT, is_required=True) - self.text = self.create_param(code="text", data_type=DataType.TEXT, is_required=True) + self.language = self.create_param(code="language", data_type=DataType.LABEL, is_required=True) + self.text = self.create_param(code="text", data_type=DataType.TEXT, is_required=False) -class BenchmarkScoringMtOutputs(Outputs): +class MultiClassTextClassificationOutputs(Outputs): data: OutputParam = None def __init__(self, node=None): @@ -305,33 +311,34 @@ def __init__(self, node=None): self.data = self.create_param(code="data", data_type=DataType.LABEL) -class BenchmarkScoringMt(AssetNode[BenchmarkScoringMtInputs, BenchmarkScoringMtOutputs]): +class MultiClassTextClassification(AssetNode[MultiClassTextClassificationInputs, MultiClassTextClassificationOutputs]): """ - Benchmark Scoring MT is a function designed to evaluate and score machine - translation systems by comparing their output against a set of predefined - benchmarks, thereby assessing their accuracy and performance. + Multi Class Text Classification is a natural language processing task that +involves categorizing a given text into one of several predefined classes or +categories based on its content. - InputType: text - OutputType: label + InputType: text + OutputType: label """ - - function: str = "benchmark-scoring-mt" + function: str = "multi-class-text-classification" input_type: str = DataType.TEXT output_type: str = DataType.LABEL - inputs_class: Type[TI] = BenchmarkScoringMtInputs - outputs_class: Type[TO] = BenchmarkScoringMtOutputs + inputs_class: Type[TI] = MultiClassTextClassificationInputs + outputs_class: Type[TO] = MultiClassTextClassificationOutputs -class AsrGenderClassificationInputs(Inputs): - source_audio: InputParam = None +class PartOfSpeechTaggingInputs(Inputs): + language: InputParam = None + text: InputParam = None def __init__(self, node=None): super().__init__(node=node) - self.source_audio = self.create_param(code="source_audio", data_type=DataType.AUDIO, is_required=True) + self.language = self.create_param(code="language", data_type=DataType.LABEL, is_required=True) + self.text = self.create_param(code="text", data_type=DataType.TEXT, is_required=False) -class AsrGenderClassificationOutputs(Outputs): +class PartOfSpeechTaggingOutputs(Outputs): data: OutputParam = None def __init__(self, node=None): @@ -339,34 +346,33 @@ def __init__(self, node=None): self.data = self.create_param(code="data", data_type=DataType.LABEL) -class AsrGenderClassification(AssetNode[AsrGenderClassificationInputs, AsrGenderClassificationOutputs]): +class PartOfSpeechTagging(AssetNode[PartOfSpeechTaggingInputs, PartOfSpeechTaggingOutputs]): """ - The ASR Gender Classification function analyzes audio recordings to determine - and classify the speaker's gender based on their voice characteristics. + Part of Speech Tagging is a natural language processing task that involves +assigning each word in a sentence its corresponding part of speech, such as +noun, verb, adjective, or adverb, based on its role and context within the +sentence. - InputType: audio - OutputType: label + InputType: text + OutputType: label """ - - function: str = "asr-gender-classification" - input_type: str = DataType.AUDIO + function: str = "part-of-speech-tagging" + input_type: str = DataType.TEXT output_type: str = DataType.LABEL - inputs_class: Type[TI] = AsrGenderClassificationInputs - outputs_class: Type[TO] = AsrGenderClassificationOutputs + inputs_class: Type[TI] = PartOfSpeechTaggingInputs + outputs_class: Type[TO] = PartOfSpeechTaggingOutputs -class BaseModelInputs(Inputs): - language: InputParam = None +class MetricAggregationInputs(Inputs): text: InputParam = None def __init__(self, node=None): super().__init__(node=node) - self.language = self.create_param(code="language", data_type=DataType.LABEL, is_required=True) self.text = self.create_param(code="text", data_type=DataType.TEXT, is_required=True) -class BaseModelOutputs(Outputs): +class MetricAggregationOutputs(Outputs): data: OutputParam = None def __init__(self, node=None): @@ -374,331 +380,382 @@ def __init__(self, node=None): self.data = self.create_param(code="data", data_type=DataType.TEXT) -class BaseModel(AssetNode[BaseModelInputs, BaseModelOutputs]): +class MetricAggregation(BaseMetric[MetricAggregationInputs, MetricAggregationOutputs]): """ - The Base-Model function serves as a foundational framework designed to provide - essential features and capabilities upon which more specialized or advanced - models can be built and customized. + Metric Aggregation is a function that computes and summarizes numerical data by +applying statistical operations, such as averaging, summing, or finding the +minimum and maximum values, to provide insights and facilitate analysis of +large datasets. - InputType: text - OutputType: text + InputType: text + OutputType: text """ - - function: str = "base-model" + function: str = "metric-aggregation" input_type: str = DataType.TEXT output_type: str = DataType.TEXT - inputs_class: Type[TI] = BaseModelInputs - outputs_class: Type[TO] = BaseModelOutputs + inputs_class: Type[TI] = MetricAggregationInputs + outputs_class: Type[TO] = MetricAggregationOutputs -class LanguageIdentificationAudioInputs(Inputs): - audio: InputParam = None +class ImageColorizationInputs(Inputs): + image: InputParam = None def __init__(self, node=None): super().__init__(node=node) - self.audio = self.create_param(code="audio", data_type=DataType.AUDIO, is_required=True) + self.image = self.create_param(code="image", data_type=DataType.IMAGE, is_required=False) -class LanguageIdentificationAudioOutputs(Outputs): - data: OutputParam = None +class ImageColorizationOutputs(Outputs): + image: OutputParam = None def __init__(self, node=None): super().__init__(node=node) - self.data = self.create_param(code="data", data_type=DataType.LABEL) + self.image = self.create_param(code="image", data_type=DataType.IMAGE) -class LanguageIdentificationAudio(AssetNode[LanguageIdentificationAudioInputs, LanguageIdentificationAudioOutputs]): +class ImageColorization(AssetNode[ImageColorizationInputs, ImageColorizationOutputs]): """ - The Language Identification Audio function analyzes audio input to determine - and identify the language being spoken. + Image colorization is a process that involves adding color to grayscale images, +transforming them from black-and-white to full-color representations, often +using advanced algorithms and machine learning techniques to predict and apply +the appropriate hues and shades. - InputType: audio - OutputType: label + InputType: image + OutputType: image """ + function: str = "image-colorization" + input_type: str = DataType.IMAGE + output_type: str = DataType.IMAGE - function: str = "language-identification-audio" - input_type: str = DataType.AUDIO - output_type: str = DataType.LABEL - - inputs_class: Type[TI] = LanguageIdentificationAudioInputs - outputs_class: Type[TO] = LanguageIdentificationAudioOutputs + inputs_class: Type[TI] = ImageColorizationInputs + outputs_class: Type[TO] = ImageColorizationOutputs -class LoglikelihoodInputs(Inputs): +class IntentClassificationInputs(Inputs): + language: InputParam = None text: InputParam = None def __init__(self, node=None): super().__init__(node=node) - self.text = self.create_param(code="text", data_type=DataType.TEXT, is_required=True) + self.language = self.create_param(code="language", data_type=DataType.LABEL, is_required=True) + self.text = self.create_param(code="text", data_type=DataType.TEXT, is_required=False) -class LoglikelihoodOutputs(Outputs): +class IntentClassificationOutputs(Outputs): data: OutputParam = None def __init__(self, node=None): super().__init__(node=node) - self.data = self.create_param(code="data", data_type=DataType.NUMBER) + self.data = self.create_param(code="data", data_type=DataType.LABEL) -class Loglikelihood(AssetNode[LoglikelihoodInputs, LoglikelihoodOutputs]): +class IntentClassification(AssetNode[IntentClassificationInputs, IntentClassificationOutputs]): """ - The Log Likelihood function measures the probability of observing the given - data under a specific statistical model by taking the natural logarithm of the - likelihood function, thereby transforming the product of probabilities into a - sum, which simplifies the process of optimization and parameter estimation. + Intent Classification is a natural language processing task that involves +analyzing and categorizing user text input to determine the underlying purpose +or goal behind the communication, such as booking a flight, asking for weather +information, or setting a reminder. - InputType: text - OutputType: number + InputType: text + OutputType: label """ - - function: str = "loglikelihood" + function: str = "intent-classification" input_type: str = DataType.TEXT - output_type: str = DataType.NUMBER + output_type: str = DataType.LABEL - inputs_class: Type[TI] = LoglikelihoodInputs - outputs_class: Type[TO] = LoglikelihoodOutputs + inputs_class: Type[TI] = IntentClassificationInputs + outputs_class: Type[TO] = IntentClassificationOutputs -class VideoEmbeddingInputs(Inputs): - language: InputParam = None - video: InputParam = None +class AudioIntentDetectionInputs(Inputs): + audio: InputParam = None def __init__(self, node=None): super().__init__(node=node) - self.language = self.create_param(code="language", data_type=DataType.LABEL, is_required=True) - self.video = self.create_param(code="video", data_type=DataType.VIDEO, is_required=False) + self.audio = self.create_param(code="audio", data_type=DataType.AUDIO, is_required=False) -class VideoEmbeddingOutputs(Outputs): +class AudioIntentDetectionOutputs(Outputs): data: OutputParam = None def __init__(self, node=None): super().__init__(node=node) - self.data = self.create_param(code="data", data_type=DataType.EMBEDDING) + self.data = self.create_param(code="data", data_type=DataType.LABEL) -class VideoEmbedding(AssetNode[VideoEmbeddingInputs, VideoEmbeddingOutputs]): +class AudioIntentDetection(AssetNode[AudioIntentDetectionInputs, AudioIntentDetectionOutputs]): """ - Video Embedding is a process that transforms video content into a fixed- - dimensional vector representation, capturing essential features and patterns to - facilitate tasks such as retrieval, classification, and recommendation. + Audio Intent Detection is a process that involves analyzing audio signals to +identify and interpret the underlying intentions or purposes behind spoken +words, enabling systems to understand and respond appropriately to human +speech. - InputType: video - OutputType: embedding + InputType: audio + OutputType: label """ + function: str = "audio-intent-detection" + input_type: str = DataType.AUDIO + output_type: str = DataType.LABEL - function: str = "video-embedding" - input_type: str = DataType.VIDEO - output_type: str = DataType.EMBEDDING - - inputs_class: Type[TI] = VideoEmbeddingInputs - outputs_class: Type[TO] = VideoEmbeddingOutputs + inputs_class: Type[TI] = AudioIntentDetectionInputs + outputs_class: Type[TO] = AudioIntentDetectionOutputs -class TextSegmenationInputs(Inputs): +class AsrQualityEstimationInputs(Inputs): text: InputParam = None - language: InputParam = None + script: InputParam = None def __init__(self, node=None): super().__init__(node=node) self.text = self.create_param(code="text", data_type=DataType.TEXT, is_required=True) - self.language = self.create_param(code="language", data_type=DataType.LABEL, is_required=True) + self.script = self.create_param(code="script", data_type=DataType.LABEL, is_required=False) -class TextSegmenationOutputs(Outputs): +class AsrQualityEstimationOutputs(Outputs): data: OutputParam = None def __init__(self, node=None): super().__init__(node=node) - self.data = self.create_param(code="data", data_type=DataType.TEXT) + self.data = self.create_param(code="data", data_type=DataType.LABEL) -class TextSegmenation(AssetNode[TextSegmenationInputs, TextSegmenationOutputs]): +class AsrQualityEstimation(AssetNode[AsrQualityEstimationInputs, AsrQualityEstimationOutputs]): """ - Text Segmentation is the process of dividing a continuous text into meaningful - units, such as words, sentences, or topics, to facilitate easier analysis and - understanding. + ASR Quality Estimation is a process that evaluates the accuracy and reliability +of automatic speech recognition systems by analyzing their performance in +transcribing spoken language into text. - InputType: text - OutputType: text + InputType: text + OutputType: label """ + function: str = "asr-quality-estimation" + input_type: str = DataType.TEXT + output_type: str = DataType.LABEL - function: str = "text-segmenation" + inputs_class: Type[TI] = AsrQualityEstimationInputs + outputs_class: Type[TO] = AsrQualityEstimationOutputs + + +class SearchInputs(Inputs): + text: InputParam = None + + def __init__(self, node=None): + super().__init__(node=node) + self.text = self.create_param(code="text", data_type=DataType.TEXT, is_required=True) + + +class SearchOutputs(Outputs): + data: OutputParam = None + + def __init__(self, node=None): + super().__init__(node=node) + self.data = self.create_param(code="data", data_type=DataType.TEXT) + + +class Search(AssetNode[SearchInputs, SearchOutputs]): + """ + An algorithm that identifies and returns data or items that match particular +keywords or conditions from a dataset. A fundamental tool for databases and +websites. + + InputType: text + OutputType: text + """ + function: str = "search" input_type: str = DataType.TEXT output_type: str = DataType.TEXT - inputs_class: Type[TI] = TextSegmenationInputs - outputs_class: Type[TO] = TextSegmenationOutputs + inputs_class: Type[TI] = SearchInputs + outputs_class: Type[TO] = SearchOutputs -class ImageEmbeddingInputs(Inputs): +class VisemeGenerationInputs(Inputs): + text: InputParam = None language: InputParam = None - image: InputParam = None + dialect: InputParam = None + script: InputParam = None def __init__(self, node=None): super().__init__(node=node) + self.text = self.create_param(code="text", data_type=DataType.TEXT, is_required=True) self.language = self.create_param(code="language", data_type=DataType.LABEL, is_required=True) - self.image = self.create_param(code="image", data_type=DataType.IMAGE, is_required=False) + self.dialect = self.create_param(code="dialect", data_type=DataType.LABEL, is_required=False) + self.script = self.create_param(code="script", data_type=DataType.LABEL, is_required=False) -class ImageEmbeddingOutputs(Outputs): +class VisemeGenerationOutputs(Outputs): data: OutputParam = None def __init__(self, node=None): super().__init__(node=node) - self.data = self.create_param(code="data", data_type=DataType.TEXT) + self.data = self.create_param(code="data", data_type=DataType.LABEL) -class ImageEmbedding(AssetNode[ImageEmbeddingInputs, ImageEmbeddingOutputs]): +class VisemeGeneration(AssetNode[VisemeGenerationInputs, VisemeGenerationOutputs]): """ - Image Embedding is a process that transforms an image into a fixed-dimensional - vector representation, capturing its essential features and enabling efficient - comparison, retrieval, and analysis in various machine learning and computer - vision tasks. + Viseme Generation is the process of creating visual representations of +phonemes, which are the distinct units of sound in speech, to synchronize lip +movements with spoken words in animations or virtual avatars. - InputType: image - OutputType: text + InputType: text + OutputType: label """ + function: str = "viseme-generation" + input_type: str = DataType.TEXT + output_type: str = DataType.LABEL - function: str = "image-embedding" - input_type: str = DataType.IMAGE - output_type: str = DataType.TEXT - - inputs_class: Type[TI] = ImageEmbeddingInputs - outputs_class: Type[TO] = ImageEmbeddingOutputs + inputs_class: Type[TI] = VisemeGenerationInputs + outputs_class: Type[TO] = VisemeGenerationOutputs -class ImageManipulationInputs(Inputs): +class OcrInputs(Inputs): image: InputParam = None - targetimage: InputParam = None + featuretypes: InputParam = None def __init__(self, node=None): super().__init__(node=node) self.image = self.create_param(code="image", data_type=DataType.IMAGE, is_required=True) - self.targetimage = self.create_param(code="targetimage", data_type=DataType.IMAGE, is_required=True) + self.featuretypes = self.create_param(code="featuretypes", data_type=DataType.TEXT, is_required=True) -class ImageManipulationOutputs(Outputs): - image: OutputParam = None +class OcrOutputs(Outputs): + data: OutputParam = None def __init__(self, node=None): super().__init__(node=node) - self.image = self.create_param(code="image", data_type=DataType.IMAGE) + self.data = self.create_param(code="data", data_type=DataType.TEXT) -class ImageManipulation(AssetNode[ImageManipulationInputs, ImageManipulationOutputs]): +class Ocr(AssetNode[OcrInputs, OcrOutputs]): """ - Image Manipulation refers to the process of altering or enhancing digital - images using various techniques and tools to achieve desired visual effects, - correct imperfections, or transform the image's appearance. + Converts images of typed, handwritten, or printed text into machine-encoded +text. Used in digitizing printed texts for data retrieval. - InputType: image - OutputType: image + InputType: image + OutputType: text """ - - function: str = "image-manipulation" + function: str = "ocr" input_type: str = DataType.IMAGE - output_type: str = DataType.IMAGE + output_type: str = DataType.TEXT - inputs_class: Type[TI] = ImageManipulationInputs - outputs_class: Type[TO] = ImageManipulationOutputs + inputs_class: Type[TI] = OcrInputs + outputs_class: Type[TO] = OcrOutputs -class ImageToVideoGenerationInputs(Inputs): +class LoglikelihoodInputs(Inputs): + text: InputParam = None + + def __init__(self, node=None): + super().__init__(node=node) + self.text = self.create_param(code="text", data_type=DataType.TEXT, is_required=True) + + +class LoglikelihoodOutputs(Outputs): + data: OutputParam = None + + def __init__(self, node=None): + super().__init__(node=node) + self.data = self.create_param(code="data", data_type=DataType.NUMBER) + + +class Loglikelihood(AssetNode[LoglikelihoodInputs, LoglikelihoodOutputs]): + """ + The Log Likelihood function measures the probability of observing the given +data under a specific statistical model by taking the natural logarithm of the +likelihood function, thereby transforming the product of probabilities into a +sum, which simplifies the process of optimization and parameter estimation. + + InputType: text + OutputType: number + """ + function: str = "loglikelihood" + input_type: str = DataType.TEXT + output_type: str = DataType.NUMBER + + inputs_class: Type[TI] = LoglikelihoodInputs + outputs_class: Type[TO] = LoglikelihoodOutputs + + +class VideoEmbeddingInputs(Inputs): language: InputParam = None - image: InputParam = None + video: InputParam = None def __init__(self, node=None): super().__init__(node=node) self.language = self.create_param(code="language", data_type=DataType.LABEL, is_required=True) - self.image = self.create_param(code="image", data_type=DataType.IMAGE, is_required=False) + self.video = self.create_param(code="video", data_type=DataType.VIDEO, is_required=False) -class ImageToVideoGenerationOutputs(Outputs): +class VideoEmbeddingOutputs(Outputs): data: OutputParam = None def __init__(self, node=None): super().__init__(node=node) - self.data = self.create_param(code="data", data_type=DataType.VIDEO) + self.data = self.create_param(code="data", data_type=DataType.EMBEDDING) -class ImageToVideoGeneration(AssetNode[ImageToVideoGenerationInputs, ImageToVideoGenerationOutputs]): +class VideoEmbedding(AssetNode[VideoEmbeddingInputs, VideoEmbeddingOutputs]): """ - The Image To Video Generation function transforms a series of static images - into a cohesive, dynamic video sequence, often incorporating transitions, - effects, and synchronization with audio to create a visually engaging - narrative. + Video Embedding is a process that transforms video content into a fixed- +dimensional vector representation, capturing essential features and patterns to +facilitate tasks such as retrieval, classification, and recommendation. - InputType: image - OutputType: video + InputType: video + OutputType: embedding """ + function: str = "video-embedding" + input_type: str = DataType.VIDEO + output_type: str = DataType.EMBEDDING - function: str = "image-to-video-generation" - input_type: str = DataType.IMAGE - output_type: str = DataType.VIDEO - - inputs_class: Type[TI] = ImageToVideoGenerationInputs - outputs_class: Type[TO] = ImageToVideoGenerationOutputs + inputs_class: Type[TI] = VideoEmbeddingInputs + outputs_class: Type[TO] = VideoEmbeddingOutputs -class AudioForcedAlignmentInputs(Inputs): - audio: InputParam = None +class TextSegmenationInputs(Inputs): text: InputParam = None language: InputParam = None - dialect: InputParam = None - script: InputParam = None def __init__(self, node=None): super().__init__(node=node) - self.audio = self.create_param(code="audio", data_type=DataType.AUDIO, is_required=True) self.text = self.create_param(code="text", data_type=DataType.TEXT, is_required=True) self.language = self.create_param(code="language", data_type=DataType.LABEL, is_required=True) - self.dialect = self.create_param(code="dialect", data_type=DataType.LABEL, is_required=False) - self.script = self.create_param(code="script", data_type=DataType.LABEL, is_required=False) -class AudioForcedAlignmentOutputs(Outputs): - text: OutputParam = None - audio: OutputParam = None +class TextSegmenationOutputs(Outputs): + data: OutputParam = None def __init__(self, node=None): super().__init__(node=node) - self.text = self.create_param(code="text", data_type=DataType.TEXT) - self.audio = self.create_param(code="audio", data_type=DataType.AUDIO) + self.data = self.create_param(code="data", data_type=DataType.TEXT) -class AudioForcedAlignment(AssetNode[AudioForcedAlignmentInputs, AudioForcedAlignmentOutputs]): +class TextSegmenation(AssetNode[TextSegmenationInputs, TextSegmenationOutputs]): """ - Audio Forced Alignment is a process that synchronizes a given audio recording - with its corresponding transcript by precisely aligning each spoken word or - phoneme to its exact timing within the audio. + Text Segmentation is the process of dividing a continuous text into meaningful +units, such as words, sentences, or topics, to facilitate easier analysis and +understanding. - InputType: audio - OutputType: audio + InputType: text + OutputType: text """ + function: str = "text-segmenation" + input_type: str = DataType.TEXT + output_type: str = DataType.TEXT - function: str = "audio-forced-alignment" - input_type: str = DataType.AUDIO - output_type: str = DataType.AUDIO - - inputs_class: Type[TI] = AudioForcedAlignmentInputs - outputs_class: Type[TO] = AudioForcedAlignmentOutputs + inputs_class: Type[TI] = TextSegmenationInputs + outputs_class: Type[TO] = TextSegmenationOutputs -class BenchmarkScoringAsrInputs(Inputs): - input: InputParam = None - text: InputParam = None - text: InputParam = None +class ExpressionDetectionInputs(Inputs): + media: InputParam = None def __init__(self, node=None): super().__init__(node=node) - self.input = self.create_param(code="input", data_type=DataType.AUDIO, is_required=True) - self.text = self.create_param(code="text", data_type=DataType.TEXT, is_required=True) - self.text = self.create_param(code="text", data_type=DataType.TEXT, is_required=True) + self.media = self.create_param(code="media", data_type=DataType.IMAGE, is_required=True) -class BenchmarkScoringAsrOutputs(Outputs): +class ExpressionDetectionOutputs(Outputs): data: OutputParam = None def __init__(self, node=None): @@ -706,215 +763,208 @@ def __init__(self, node=None): self.data = self.create_param(code="data", data_type=DataType.LABEL) -class BenchmarkScoringAsr(AssetNode[BenchmarkScoringAsrInputs, BenchmarkScoringAsrOutputs]): +class ExpressionDetection(AssetNode[ExpressionDetectionInputs, ExpressionDetectionOutputs]): """ - Benchmark Scoring ASR is a function that evaluates and compares the performance - of automatic speech recognition systems by analyzing their accuracy, speed, and - other relevant metrics against a standardized set of benchmarks. + Expression Detection is the process of identifying and analyzing facial +expressions to interpret emotions or intentions using AI and computer vision +techniques. - InputType: audio - OutputType: label + InputType: text + OutputType: label """ - - function: str = "benchmark-scoring-asr" - input_type: str = DataType.AUDIO + function: str = "expression-detection" + input_type: str = DataType.TEXT output_type: str = DataType.LABEL - inputs_class: Type[TI] = BenchmarkScoringAsrInputs - outputs_class: Type[TO] = BenchmarkScoringAsrOutputs + inputs_class: Type[TI] = ExpressionDetectionInputs + outputs_class: Type[TO] = ExpressionDetectionOutputs -class VisualQuestionAnsweringInputs(Inputs): - text: InputParam = None +class SpeechClassificationInputs(Inputs): + audio: InputParam = None language: InputParam = None - image: InputParam = None + script: InputParam = None + dialect: InputParam = None def __init__(self, node=None): super().__init__(node=node) - self.text = self.create_param(code="text", data_type=DataType.TEXT, is_required=True) + self.audio = self.create_param(code="audio", data_type=DataType.AUDIO, is_required=True) self.language = self.create_param(code="language", data_type=DataType.LABEL, is_required=True) - self.image = self.create_param(code="image", data_type=DataType.IMAGE, is_required=False) + self.script = self.create_param(code="script", data_type=DataType.LABEL, is_required=False) + self.dialect = self.create_param(code="dialect", data_type=DataType.LABEL, is_required=False) -class VisualQuestionAnsweringOutputs(Outputs): +class SpeechClassificationOutputs(Outputs): data: OutputParam = None def __init__(self, node=None): super().__init__(node=node) - self.data = self.create_param(code="data", data_type=DataType.TEXT) + self.data = self.create_param(code="data", data_type=DataType.LABEL) -class VisualQuestionAnswering(AssetNode[VisualQuestionAnsweringInputs, VisualQuestionAnsweringOutputs]): +class SpeechClassification(AssetNode[SpeechClassificationInputs, SpeechClassificationOutputs]): """ - Visual Question Answering (VQA) is a task in artificial intelligence that - involves analyzing an image and providing accurate, contextually relevant - answers to questions posed about the visual content of that image. + Categorizes audio clips based on their content, aiding in content organization +and targeted actions. - InputType: image - OutputType: video + InputType: audio + OutputType: label """ + function: str = "speech-classification" + input_type: str = DataType.AUDIO + output_type: str = DataType.LABEL - function: str = "visual-question-answering" - input_type: str = DataType.IMAGE - output_type: str = DataType.VIDEO - - inputs_class: Type[TI] = VisualQuestionAnsweringInputs - outputs_class: Type[TO] = VisualQuestionAnsweringOutputs + inputs_class: Type[TI] = SpeechClassificationInputs + outputs_class: Type[TO] = SpeechClassificationOutputs -class DocumentImageParsingInputs(Inputs): - image: InputParam = None +class InverseTextNormalizationInputs(Inputs): + text: InputParam = None def __init__(self, node=None): super().__init__(node=node) - self.image = self.create_param(code="image", data_type=DataType.IMAGE, is_required=False) + self.text = self.create_param(code="text", data_type=DataType.TEXT, is_required=False) -class DocumentImageParsingOutputs(Outputs): +class InverseTextNormalizationOutputs(Outputs): data: OutputParam = None def __init__(self, node=None): super().__init__(node=node) - self.data = self.create_param(code="data", data_type=DataType.TEXT) + self.data = self.create_param(code="data", data_type=DataType.LABEL) -class DocumentImageParsing(AssetNode[DocumentImageParsingInputs, DocumentImageParsingOutputs]): +class InverseTextNormalization(AssetNode[InverseTextNormalizationInputs, InverseTextNormalizationOutputs]): """ - Document Image Parsing is the process of analyzing and converting scanned or - photographed images of documents into structured, machine-readable formats by - identifying and extracting text, layout, and other relevant information. + Inverse Text Normalization is the process of converting spoken or written +language in its normalized form, such as numbers, dates, and abbreviations, +back into their original, more complex or detailed textual representations. - InputType: image - OutputType: text + InputType: text + OutputType: label """ + function: str = "inverse-text-normalization" + input_type: str = DataType.TEXT + output_type: str = DataType.LABEL - function: str = "document-image-parsing" - input_type: str = DataType.IMAGE - output_type: str = DataType.TEXT - - inputs_class: Type[TI] = DocumentImageParsingInputs - outputs_class: Type[TO] = DocumentImageParsingOutputs + inputs_class: Type[TI] = InverseTextNormalizationInputs + outputs_class: Type[TO] = InverseTextNormalizationOutputs -class DocumentInformationExtractionInputs(Inputs): - image: InputParam = None +class ExtractAudioFromVideoInputs(Inputs): + video: InputParam = None def __init__(self, node=None): super().__init__(node=node) - self.image = self.create_param(code="image", data_type=DataType.IMAGE, is_required=False) + self.video = self.create_param(code="video", data_type=DataType.VIDEO, is_required=True) -class DocumentInformationExtractionOutputs(Outputs): +class ExtractAudioFromVideoOutputs(Outputs): data: OutputParam = None def __init__(self, node=None): super().__init__(node=node) - self.data = self.create_param(code="data", data_type=DataType.TEXT) + self.data = self.create_param(code="data", data_type=DataType.AUDIO) -class DocumentInformationExtraction(AssetNode[DocumentInformationExtractionInputs, DocumentInformationExtractionOutputs]): +class ExtractAudioFromVideo(AssetNode[ExtractAudioFromVideoInputs, ExtractAudioFromVideoOutputs]): """ - Document Information Extraction is the process of automatically identifying, - extracting, and structuring relevant data from unstructured or semi-structured - documents, such as invoices, receipts, contracts, and forms, to facilitate - easier data management and analysis. + Isolates and extracts audio tracks from video files, aiding in audio analysis +or transcription tasks. - InputType: image - OutputType: text + InputType: video + OutputType: audio """ + function: str = "extract-audio-from-video" + input_type: str = DataType.VIDEO + output_type: str = DataType.AUDIO - function: str = "document-information-extraction" - input_type: str = DataType.IMAGE - output_type: str = DataType.TEXT - - inputs_class: Type[TI] = DocumentInformationExtractionInputs - outputs_class: Type[TO] = DocumentInformationExtractionOutputs + inputs_class: Type[TI] = ExtractAudioFromVideoInputs + outputs_class: Type[TO] = ExtractAudioFromVideoOutputs -class DepthEstimationInputs(Inputs): - language: InputParam = None +class ImageCompressionInputs(Inputs): image: InputParam = None + apl_qfactor: InputParam = None def __init__(self, node=None): super().__init__(node=node) - self.language = self.create_param(code="language", data_type=DataType.LABEL, is_required=True) - self.image = self.create_param(code="image", data_type=DataType.IMAGE, is_required=False) + self.image = self.create_param(code="image", data_type=DataType.IMAGE, is_required=True) + self.apl_qfactor = self.create_param(code="apl_qfactor", data_type=DataType.TEXT, is_required=False) -class DepthEstimationOutputs(Outputs): - data: OutputParam = None +class ImageCompressionOutputs(Outputs): + image: OutputParam = None def __init__(self, node=None): super().__init__(node=node) - self.data = self.create_param(code="data", data_type=DataType.TEXT) + self.image = self.create_param(code="image", data_type=DataType.IMAGE) -class DepthEstimation(AssetNode[DepthEstimationInputs, DepthEstimationOutputs]): +class ImageCompression(AssetNode[ImageCompressionInputs, ImageCompressionOutputs]): """ - Depth estimation is a computational process that determines the distance of - objects from a viewpoint, typically using visual data from cameras or sensors - to create a three-dimensional understanding of a scene. + Reduces the size of image files without significantly compromising their visual +quality. Useful for optimizing storage and improving webpage load times. - InputType: image - OutputType: text + InputType: image + OutputType: image """ - - function: str = "depth-estimation" + function: str = "image-compression" input_type: str = DataType.IMAGE - output_type: str = DataType.TEXT + output_type: str = DataType.IMAGE - inputs_class: Type[TI] = DepthEstimationInputs - outputs_class: Type[TO] = DepthEstimationOutputs + inputs_class: Type[TI] = ImageCompressionInputs + outputs_class: Type[TO] = ImageCompressionOutputs -class VideoGenerationInputs(Inputs): - text: InputParam = None +class NoiseRemovalInputs(Inputs): + audio: InputParam = None def __init__(self, node=None): super().__init__(node=node) - self.text = self.create_param(code="text", data_type=DataType.TEXT, is_required=True) + self.audio = self.create_param(code="audio", data_type=DataType.AUDIO, is_required=False) -class VideoGenerationOutputs(Outputs): +class NoiseRemovalOutputs(Outputs): data: OutputParam = None def __init__(self, node=None): super().__init__(node=node) - self.data = self.create_param(code="data", data_type=DataType.VIDEO) + self.data = self.create_param(code="data", data_type=DataType.AUDIO) -class VideoGeneration(AssetNode[VideoGenerationInputs, VideoGenerationOutputs]): +class NoiseRemoval(AssetNode[NoiseRemovalInputs, NoiseRemovalOutputs]): """ - Video Generation is the process of creating video content through automated or - semi-automated means, often utilizing algorithms, artificial intelligence, or - software tools to produce visual and audio elements that can range from simple - animations to complex, realistic scenes. + Noise Removal is a process that involves identifying and eliminating unwanted +random variations or disturbances from an audio signal to enhance the clarity +and quality of the underlying information. - InputType: text - OutputType: video + InputType: audio + OutputType: audio """ + function: str = "noise-removal" + input_type: str = DataType.AUDIO + output_type: str = DataType.AUDIO - function: str = "video-generation" - input_type: str = DataType.TEXT - output_type: str = DataType.VIDEO - - inputs_class: Type[TI] = VideoGenerationInputs - outputs_class: Type[TO] = VideoGenerationOutputs + inputs_class: Type[TI] = NoiseRemovalInputs + outputs_class: Type[TO] = NoiseRemovalOutputs -class ReferencelessAudioGenerationMetricInputs(Inputs): - hypotheses: InputParam = None - sources: InputParam = None - score_identifier: InputParam = None +class TextSummarizationInputs(Inputs): + text: InputParam = None + language: InputParam = None + script: InputParam = None + dialect: InputParam = None def __init__(self, node=None): super().__init__(node=node) - self.hypotheses = self.create_param(code="hypotheses", data_type=DataType.AUDIO, is_required=True) - self.sources = self.create_param(code="sources", data_type=DataType.AUDIO, is_required=False) - self.score_identifier = self.create_param(code="score_identifier", data_type=DataType.TEXT, is_required=True) + self.text = self.create_param(code="text", data_type=DataType.TEXT, is_required=True) + self.language = self.create_param(code="language", data_type=DataType.LABEL, is_required=True) + self.script = self.create_param(code="script", data_type=DataType.LABEL, is_required=False) + self.dialect = self.create_param(code="dialect", data_type=DataType.LABEL, is_required=False) -class ReferencelessAudioGenerationMetricOutputs(Outputs): +class TextSummarizationOutputs(Outputs): data: OutputParam = None def __init__(self, node=None): @@ -922,103 +972,109 @@ def __init__(self, node=None): self.data = self.create_param(code="data", data_type=DataType.TEXT) -class ReferencelessAudioGenerationMetric( - BaseMetric[ReferencelessAudioGenerationMetricInputs, ReferencelessAudioGenerationMetricOutputs] -): +class TextSummarization(AssetNode[TextSummarizationInputs, TextSummarizationOutputs]): """ - The Referenceless Audio Generation Metric is a tool designed to evaluate the - quality of generated audio content without the need for a reference or original - audio sample for comparison. + Extracts the main points from a larger body of text, producing a concise +summary without losing the primary message. - InputType: text - OutputType: text + InputType: text + OutputType: text """ - - function: str = "referenceless-audio-generation-metric" + function: str = "text-summarization" input_type: str = DataType.TEXT output_type: str = DataType.TEXT - inputs_class: Type[TI] = ReferencelessAudioGenerationMetricInputs - outputs_class: Type[TO] = ReferencelessAudioGenerationMetricOutputs + inputs_class: Type[TI] = TextSummarizationInputs + outputs_class: Type[TO] = TextSummarizationOutputs -class MultiClassImageClassificationInputs(Inputs): - image: InputParam = None +class TextGenerationMetricInputs(Inputs): + hypotheses: InputParam = None + references: InputParam = None + sources: InputParam = None + score_identifier: InputParam = None def __init__(self, node=None): super().__init__(node=node) - self.image = self.create_param(code="image", data_type=DataType.IMAGE, is_required=False) + self.hypotheses = self.create_param(code="hypotheses", data_type=DataType.TEXT, is_required=True) + self.references = self.create_param(code="references", data_type=DataType.TEXT, is_required=False) + self.sources = self.create_param(code="sources", data_type=DataType.TEXT, is_required=False) + self.score_identifier = self.create_param(code="score_identifier", data_type=DataType.TEXT, is_required=True) -class MultiClassImageClassificationOutputs(Outputs): +class TextGenerationMetricOutputs(Outputs): data: OutputParam = None def __init__(self, node=None): super().__init__(node=node) - self.data = self.create_param(code="data", data_type=DataType.LABEL) + self.data = self.create_param(code="data", data_type=DataType.TEXT) -class MultiClassImageClassification(AssetNode[MultiClassImageClassificationInputs, MultiClassImageClassificationOutputs]): +class TextGenerationMetric(BaseMetric[TextGenerationMetricInputs, TextGenerationMetricOutputs]): """ - Multi Class Image Classification is a machine learning task where an algorithm - is trained to categorize images into one of several predefined classes or - categories based on their visual content. + A Text Generation Metric is a quantitative measure used to evaluate the quality +and effectiveness of text produced by natural language processing models, often +assessing aspects such as coherence, relevance, fluency, and adherence to given +prompts or instructions. - InputType: image - OutputType: label + InputType: text + OutputType: text """ + function: str = "text-generation-metric" + input_type: str = DataType.TEXT + output_type: str = DataType.TEXT - function: str = "multi-class-image-classification" - input_type: str = DataType.IMAGE - output_type: str = DataType.LABEL - - inputs_class: Type[TI] = MultiClassImageClassificationInputs - outputs_class: Type[TO] = MultiClassImageClassificationOutputs + inputs_class: Type[TI] = TextGenerationMetricInputs + outputs_class: Type[TO] = TextGenerationMetricOutputs -class SemanticSegmentationInputs(Inputs): +class ImageCaptioningInputs(Inputs): image: InputParam = None def __init__(self, node=None): super().__init__(node=node) - self.image = self.create_param(code="image", data_type=DataType.IMAGE, is_required=False) + self.image = self.create_param(code="image", data_type=DataType.IMAGE, is_required=True) -class SemanticSegmentationOutputs(Outputs): +class ImageCaptioningOutputs(Outputs): data: OutputParam = None def __init__(self, node=None): super().__init__(node=node) - self.data = self.create_param(code="data", data_type=DataType.LABEL) + self.data = self.create_param(code="data", data_type=DataType.TEXT) -class SemanticSegmentation(AssetNode[SemanticSegmentationInputs, SemanticSegmentationOutputs]): +class ImageCaptioning(AssetNode[ImageCaptioningInputs, ImageCaptioningOutputs]): """ - Semantic segmentation is a computer vision process that involves classifying - each pixel in an image into a predefined category, effectively partitioning the - image into meaningful segments based on the objects or regions they represent. + Image Captioning is a process that involves generating a textual description of +an image, typically using machine learning models to analyze the visual content +and produce coherent and contextually relevant sentences that describe the +objects, actions, and scenes depicted in the image. - InputType: image - OutputType: label + InputType: image + OutputType: text """ - - function: str = "semantic-segmentation" + function: str = "image-captioning" input_type: str = DataType.IMAGE - output_type: str = DataType.LABEL + output_type: str = DataType.TEXT - inputs_class: Type[TI] = SemanticSegmentationInputs - outputs_class: Type[TO] = SemanticSegmentationOutputs + inputs_class: Type[TI] = ImageCaptioningInputs + outputs_class: Type[TO] = ImageCaptioningOutputs -class InstanceSegmentationInputs(Inputs): - image: InputParam = None +class BenchmarkScoringMtInputs(Inputs): + input: InputParam = None + text: InputParam = None + text: InputParam = None def __init__(self, node=None): super().__init__(node=node) - self.image = self.create_param(code="image", data_type=DataType.IMAGE, is_required=False) + self.input = self.create_param(code="input", data_type=DataType.TEXT, is_required=True) + self.text = self.create_param(code="text", data_type=DataType.TEXT, is_required=True) + self.text = self.create_param(code="text", data_type=DataType.TEXT, is_required=True) -class InstanceSegmentationOutputs(Outputs): +class BenchmarkScoringMtOutputs(Outputs): data: OutputParam = None def __init__(self, node=None): @@ -1026,137 +1082,138 @@ def __init__(self, node=None): self.data = self.create_param(code="data", data_type=DataType.LABEL) -class InstanceSegmentation(AssetNode[InstanceSegmentationInputs, InstanceSegmentationOutputs]): +class BenchmarkScoringMt(AssetNode[BenchmarkScoringMtInputs, BenchmarkScoringMtOutputs]): """ - Instance segmentation is a computer vision task that involves detecting and - delineating each distinct object within an image, assigning a unique label and - precise boundary to every individual instance of objects, even if they belong - to the same category. + Benchmark Scoring MT is a function designed to evaluate and score machine +translation systems by comparing their output against a set of predefined +benchmarks, thereby assessing their accuracy and performance. - InputType: image - OutputType: label + InputType: text + OutputType: label """ - - function: str = "instance-segmentation" - input_type: str = DataType.IMAGE + function: str = "benchmark-scoring-mt" + input_type: str = DataType.TEXT output_type: str = DataType.LABEL - inputs_class: Type[TI] = InstanceSegmentationInputs - outputs_class: Type[TO] = InstanceSegmentationOutputs + inputs_class: Type[TI] = BenchmarkScoringMtInputs + outputs_class: Type[TO] = BenchmarkScoringMtOutputs -class ImageColorizationInputs(Inputs): - image: InputParam = None +class SpeakerDiarizationAudioInputs(Inputs): + audio: InputParam = None + language: InputParam = None + script: InputParam = None + dialect: InputParam = None def __init__(self, node=None): super().__init__(node=node) - self.image = self.create_param(code="image", data_type=DataType.IMAGE, is_required=False) + self.audio = self.create_param(code="audio", data_type=DataType.AUDIO, is_required=True) + self.language = self.create_param(code="language", data_type=DataType.LABEL, is_required=False) + self.script = self.create_param(code="script", data_type=DataType.LABEL, is_required=False) + self.dialect = self.create_param(code="dialect", data_type=DataType.LABEL, is_required=False) -class ImageColorizationOutputs(Outputs): - image: OutputParam = None +class SpeakerDiarizationAudioOutputs(Outputs): + data: OutputParam = None + audio: OutputParam = None def __init__(self, node=None): super().__init__(node=node) - self.image = self.create_param(code="image", data_type=DataType.IMAGE) + self.data = self.create_param(code="data", data_type=DataType.LABEL) + self.audio = self.create_param(code="audio", data_type=DataType.AUDIO) -class ImageColorization(AssetNode[ImageColorizationInputs, ImageColorizationOutputs]): +class SpeakerDiarizationAudio(BaseSegmentor[SpeakerDiarizationAudioInputs, SpeakerDiarizationAudioOutputs]): """ - Image colorization is a process that involves adding color to grayscale images, - transforming them from black-and-white to full-color representations, often - using advanced algorithms and machine learning techniques to predict and apply - the appropriate hues and shades. + Identifies individual speakers and their respective speech segments within an +audio clip. Ideal for multi-speaker recordings or conference calls. - InputType: image - OutputType: image + InputType: audio + OutputType: label """ + function: str = "speaker-diarization-audio" + input_type: str = DataType.AUDIO + output_type: str = DataType.LABEL - function: str = "image-colorization" - input_type: str = DataType.IMAGE - output_type: str = DataType.IMAGE - - inputs_class: Type[TI] = ImageColorizationInputs - outputs_class: Type[TO] = ImageColorizationOutputs + inputs_class: Type[TI] = SpeakerDiarizationAudioInputs + outputs_class: Type[TO] = SpeakerDiarizationAudioOutputs -class AudioGenerationMetricInputs(Inputs): - hypotheses: InputParam = None - references: InputParam = None - sources: InputParam = None - score_identifier: InputParam = None +class BenchmarkScoringAsrInputs(Inputs): + input: InputParam = None + text: InputParam = None + text: InputParam = None def __init__(self, node=None): super().__init__(node=node) - self.hypotheses = self.create_param(code="hypotheses", data_type=DataType.AUDIO, is_required=True) - self.references = self.create_param(code="references", data_type=DataType.AUDIO, is_required=False) - self.sources = self.create_param(code="sources", data_type=DataType.TEXT, is_required=False) - self.score_identifier = self.create_param(code="score_identifier", data_type=DataType.TEXT, is_required=True) + self.input = self.create_param(code="input", data_type=DataType.AUDIO, is_required=True) + self.text = self.create_param(code="text", data_type=DataType.TEXT, is_required=True) + self.text = self.create_param(code="text", data_type=DataType.TEXT, is_required=True) -class AudioGenerationMetricOutputs(Outputs): +class BenchmarkScoringAsrOutputs(Outputs): data: OutputParam = None def __init__(self, node=None): super().__init__(node=node) - self.data = self.create_param(code="data", data_type=DataType.TEXT) + self.data = self.create_param(code="data", data_type=DataType.LABEL) -class AudioGenerationMetric(BaseMetric[AudioGenerationMetricInputs, AudioGenerationMetricOutputs]): +class BenchmarkScoringAsr(AssetNode[BenchmarkScoringAsrInputs, BenchmarkScoringAsrOutputs]): """ - The Audio Generation Metric is a quantitative measure used to evaluate the - quality, accuracy, and overall performance of audio generated by artificial - intelligence systems, often considering factors such as fidelity, - intelligibility, and similarity to human-produced audio. + Benchmark Scoring ASR is a function that evaluates and compares the performance +of automatic speech recognition systems by analyzing their accuracy, speed, and +other relevant metrics against a standardized set of benchmarks. - InputType: text - OutputType: text + InputType: audio + OutputType: label """ + function: str = "benchmark-scoring-asr" + input_type: str = DataType.AUDIO + output_type: str = DataType.LABEL - function: str = "audio-generation-metric" - input_type: str = DataType.TEXT - output_type: str = DataType.TEXT - - inputs_class: Type[TI] = AudioGenerationMetricInputs - outputs_class: Type[TO] = AudioGenerationMetricOutputs + inputs_class: Type[TI] = BenchmarkScoringAsrInputs + outputs_class: Type[TO] = BenchmarkScoringAsrOutputs -class ImageImpaintingInputs(Inputs): +class VisualQuestionAnsweringInputs(Inputs): + text: InputParam = None + language: InputParam = None image: InputParam = None def __init__(self, node=None): super().__init__(node=node) + self.text = self.create_param(code="text", data_type=DataType.TEXT, is_required=True) + self.language = self.create_param(code="language", data_type=DataType.LABEL, is_required=True) self.image = self.create_param(code="image", data_type=DataType.IMAGE, is_required=False) -class ImageImpaintingOutputs(Outputs): - image: OutputParam = None +class VisualQuestionAnsweringOutputs(Outputs): + data: OutputParam = None def __init__(self, node=None): super().__init__(node=node) - self.image = self.create_param(code="image", data_type=DataType.IMAGE) + self.data = self.create_param(code="data", data_type=DataType.TEXT) -class ImageImpainting(AssetNode[ImageImpaintingInputs, ImageImpaintingOutputs]): +class VisualQuestionAnswering(AssetNode[VisualQuestionAnsweringInputs, VisualQuestionAnsweringOutputs]): """ - Image inpainting is a process that involves filling in missing or damaged parts - of an image in a way that is visually coherent and seamlessly blends with the - surrounding areas, often using advanced algorithms and techniques to restore - the image to its original or intended appearance. + Visual Question Answering (VQA) is a task in artificial intelligence that +involves analyzing an image and providing accurate, contextually relevant +answers to questions posed about the visual content of that image. - InputType: image - OutputType: image + InputType: image + OutputType: video """ - - function: str = "image-impainting" + function: str = "visual-question-answering" input_type: str = DataType.IMAGE - output_type: str = DataType.IMAGE + output_type: str = DataType.VIDEO - inputs_class: Type[TI] = ImageImpaintingInputs - outputs_class: Type[TO] = ImageImpaintingOutputs + inputs_class: Type[TI] = VisualQuestionAnsweringInputs + outputs_class: Type[TO] = VisualQuestionAnsweringOutputs -class StyleTransferInputs(Inputs): +class DocumentImageParsingInputs(Inputs): image: InputParam = None def __init__(self, node=None): @@ -1164,34 +1221,32 @@ def __init__(self, node=None): self.image = self.create_param(code="image", data_type=DataType.IMAGE, is_required=False) -class StyleTransferOutputs(Outputs): - image: OutputParam = None +class DocumentImageParsingOutputs(Outputs): + data: OutputParam = None def __init__(self, node=None): super().__init__(node=node) - self.image = self.create_param(code="image", data_type=DataType.IMAGE) + self.data = self.create_param(code="data", data_type=DataType.TEXT) -class StyleTransfer(AssetNode[StyleTransferInputs, StyleTransferOutputs]): +class DocumentImageParsing(AssetNode[DocumentImageParsingInputs, DocumentImageParsingOutputs]): """ - Style Transfer is a technique in artificial intelligence that applies the - visual style of one image (such as the brushstrokes of a famous painting) to - the content of another image, effectively blending the artistic elements of the - first image with the subject matter of the second. + Document Image Parsing is the process of analyzing and converting scanned or +photographed images of documents into structured, machine-readable formats by +identifying and extracting text, layout, and other relevant information. - InputType: image - OutputType: image + InputType: image + OutputType: text """ - - function: str = "style-transfer" + function: str = "document-image-parsing" input_type: str = DataType.IMAGE - output_type: str = DataType.IMAGE + output_type: str = DataType.TEXT - inputs_class: Type[TI] = StyleTransferInputs - outputs_class: Type[TO] = StyleTransferOutputs + inputs_class: Type[TI] = DocumentImageParsingInputs + outputs_class: Type[TO] = DocumentImageParsingOutputs -class MultiClassTextClassificationInputs(Inputs): +class MultiLabelTextClassificationInputs(Inputs): language: InputParam = None text: InputParam = None @@ -1201,7 +1256,7 @@ def __init__(self, node=None): self.text = self.create_param(code="text", data_type=DataType.TEXT, is_required=False) -class MultiClassTextClassificationOutputs(Outputs): +class MultiLabelTextClassificationOutputs(Outputs): data: OutputParam = None def __init__(self, node=None): @@ -1209,76 +1264,67 @@ def __init__(self, node=None): self.data = self.create_param(code="data", data_type=DataType.LABEL) -class MultiClassTextClassification(AssetNode[MultiClassTextClassificationInputs, MultiClassTextClassificationOutputs]): +class MultiLabelTextClassification(AssetNode[MultiLabelTextClassificationInputs, MultiLabelTextClassificationOutputs]): """ - Multi Class Text Classification is a natural language processing task that - involves categorizing a given text into one of several predefined classes or - categories based on its content. + Multi Label Text Classification is a natural language processing task where a +given text is analyzed and assigned multiple relevant labels or categories from +a predefined set, allowing for the text to belong to more than one category +simultaneously. - InputType: text - OutputType: label + InputType: text + OutputType: label """ - - function: str = "multi-class-text-classification" + function: str = "multi-label-text-classification" input_type: str = DataType.TEXT output_type: str = DataType.LABEL - inputs_class: Type[TI] = MultiClassTextClassificationInputs - outputs_class: Type[TO] = MultiClassTextClassificationOutputs + inputs_class: Type[TI] = MultiLabelTextClassificationInputs + outputs_class: Type[TO] = MultiLabelTextClassificationOutputs -class TextEmbeddingInputs(Inputs): +class TextReconstructionInputs(Inputs): text: InputParam = None - language: InputParam = None - dialect: InputParam = None - script: InputParam = None def __init__(self, node=None): super().__init__(node=node) self.text = self.create_param(code="text", data_type=DataType.TEXT, is_required=True) - self.language = self.create_param(code="language", data_type=DataType.LABEL, is_required=True) - self.dialect = self.create_param(code="dialect", data_type=DataType.LABEL, is_required=False) - self.script = self.create_param(code="script", data_type=DataType.LABEL, is_required=False) -class TextEmbeddingOutputs(Outputs): +class TextReconstructionOutputs(Outputs): data: OutputParam = None def __init__(self, node=None): super().__init__(node=node) - self.data = self.create_param(code="data", data_type=DataType.LABEL) + self.data = self.create_param(code="data", data_type=DataType.TEXT) -class TextEmbedding(AssetNode[TextEmbeddingInputs, TextEmbeddingOutputs]): +class TextReconstruction(BaseReconstructor[TextReconstructionInputs, TextReconstructionOutputs]): """ - Text embedding is a process that converts text into numerical vectors, - capturing the semantic meaning and contextual relationships of words or - phrases, enabling machines to understand and analyze natural language more - effectively. + Text Reconstruction is a process that involves piecing together fragmented or +incomplete text data to restore it to its original, coherent form. - InputType: text - OutputType: text + InputType: text + OutputType: text """ - - function: str = "text-embedding" + function: str = "text-reconstruction" input_type: str = DataType.TEXT output_type: str = DataType.TEXT - inputs_class: Type[TI] = TextEmbeddingInputs - outputs_class: Type[TO] = TextEmbeddingOutputs + inputs_class: Type[TI] = TextReconstructionInputs + outputs_class: Type[TO] = TextReconstructionOutputs -class MultiLabelTextClassificationInputs(Inputs): - language: InputParam = None - text: InputParam = None +class VideoContentModerationInputs(Inputs): + video: InputParam = None + min_confidence: InputParam = None def __init__(self, node=None): super().__init__(node=node) - self.language = self.create_param(code="language", data_type=DataType.LABEL, is_required=True) - self.text = self.create_param(code="text", data_type=DataType.TEXT, is_required=False) + self.video = self.create_param(code="video", data_type=DataType.VIDEO, is_required=True) + self.min_confidence = self.create_param(code="min_confidence", data_type=DataType.TEXT, is_required=False) -class MultiLabelTextClassificationOutputs(Outputs): +class VideoContentModerationOutputs(Outputs): data: OutputParam = None def __init__(self, node=None): @@ -1286,34 +1332,33 @@ def __init__(self, node=None): self.data = self.create_param(code="data", data_type=DataType.LABEL) -class MultiLabelTextClassification(AssetNode[MultiLabelTextClassificationInputs, MultiLabelTextClassificationOutputs]): +class VideoContentModeration(AssetNode[VideoContentModerationInputs, VideoContentModerationOutputs]): """ - Multi Label Text Classification is a natural language processing task where a - given text is analyzed and assigned multiple relevant labels or categories from - a predefined set, allowing for the text to belong to more than one category - simultaneously. + Automatically reviews video content to detect and possibly remove inappropriate +or harmful material. Essential for user-generated content platforms. - InputType: text - OutputType: label + InputType: video + OutputType: label """ - - function: str = "multi-label-text-classification" - input_type: str = DataType.TEXT + function: str = "video-content-moderation" + input_type: str = DataType.VIDEO output_type: str = DataType.LABEL - inputs_class: Type[TI] = MultiLabelTextClassificationInputs - outputs_class: Type[TO] = MultiLabelTextClassificationOutputs + inputs_class: Type[TI] = VideoContentModerationInputs + outputs_class: Type[TO] = VideoContentModerationOutputs -class TextReconstructionInputs(Inputs): - text: InputParam = None +class MultilingualSpeechRecognitionInputs(Inputs): + source_audio: InputParam = None + language: InputParam = None def __init__(self, node=None): super().__init__(node=node) - self.text = self.create_param(code="text", data_type=DataType.TEXT, is_required=True) + self.source_audio = self.create_param(code="source_audio", data_type=DataType.AUDIO, is_required=True) + self.language = self.create_param(code="language", data_type=DataType.LABEL, is_required=False) -class TextReconstructionOutputs(Outputs): +class MultilingualSpeechRecognitionOutputs(Outputs): data: OutputParam = None def __init__(self, node=None): @@ -1321,34 +1366,36 @@ def __init__(self, node=None): self.data = self.create_param(code="data", data_type=DataType.TEXT) -class TextReconstruction(BaseReconstructor[TextReconstructionInputs, TextReconstructionOutputs]): +class MultilingualSpeechRecognition(AssetNode[MultilingualSpeechRecognitionInputs, MultilingualSpeechRecognitionOutputs]): """ - Text Reconstruction is a process that involves piecing together fragmented or - incomplete text data to restore it to its original, coherent form. + Multilingual Speech Recognition is a technology that enables the automatic +transcription of spoken language into text across multiple languages, allowing +for seamless communication and understanding in diverse linguistic contexts. - InputType: text - OutputType: text + InputType: audio + OutputType: text """ - - function: str = "text-reconstruction" - input_type: str = DataType.TEXT + function: str = "multilingual-speech-recognition" + input_type: str = DataType.AUDIO output_type: str = DataType.TEXT - inputs_class: Type[TI] = TextReconstructionInputs - outputs_class: Type[TO] = TextReconstructionOutputs + inputs_class: Type[TI] = MultilingualSpeechRecognitionInputs + outputs_class: Type[TO] = MultilingualSpeechRecognitionOutputs -class FactCheckingInputs(Inputs): - language: InputParam = None +class EntityLinkingInputs(Inputs): text: InputParam = None + language: InputParam = None + domain: InputParam = None def __init__(self, node=None): super().__init__(node=node) + self.text = self.create_param(code="text", data_type=DataType.TEXT, is_required=True) self.language = self.create_param(code="language", data_type=DataType.LABEL, is_required=True) - self.text = self.create_param(code="text", data_type=DataType.TEXT, is_required=False) + self.domain = self.create_param(code="domain", data_type=DataType.LABEL, is_required=False) -class FactCheckingOutputs(Outputs): +class EntityLinkingOutputs(Outputs): data: OutputParam = None def __init__(self, node=None): @@ -1356,75 +1403,64 @@ def __init__(self, node=None): self.data = self.create_param(code="data", data_type=DataType.LABEL) -class FactChecking(AssetNode[FactCheckingInputs, FactCheckingOutputs]): +class EntityLinking(AssetNode[EntityLinkingInputs, EntityLinkingOutputs]): """ - Fact Checking is the process of verifying the accuracy and truthfulness of - information, statements, or claims by cross-referencing with reliable sources - and evidence. + Associates identified entities in the text with specific entries in a knowledge +base or database. - InputType: text - OutputType: label + InputType: text + OutputType: label """ - - function: str = "fact-checking" + function: str = "entity-linking" input_type: str = DataType.TEXT output_type: str = DataType.LABEL - inputs_class: Type[TI] = FactCheckingInputs - outputs_class: Type[TO] = FactCheckingOutputs + inputs_class: Type[TI] = EntityLinkingInputs + outputs_class: Type[TO] = EntityLinkingOutputs -class SpeechClassificationInputs(Inputs): - audio: InputParam = None - language: InputParam = None - script: InputParam = None - dialect: InputParam = None +class AudioReconstructionInputs(Inputs): + audio: InputParam = None def __init__(self, node=None): super().__init__(node=node) self.audio = self.create_param(code="audio", data_type=DataType.AUDIO, is_required=True) - self.language = self.create_param(code="language", data_type=DataType.LABEL, is_required=True) - self.script = self.create_param(code="script", data_type=DataType.LABEL, is_required=False) - self.dialect = self.create_param(code="dialect", data_type=DataType.LABEL, is_required=False) -class SpeechClassificationOutputs(Outputs): +class AudioReconstructionOutputs(Outputs): data: OutputParam = None def __init__(self, node=None): super().__init__(node=node) - self.data = self.create_param(code="data", data_type=DataType.LABEL) + self.data = self.create_param(code="data", data_type=DataType.AUDIO) -class SpeechClassification(AssetNode[SpeechClassificationInputs, SpeechClassificationOutputs]): +class AudioReconstruction(BaseReconstructor[AudioReconstructionInputs, AudioReconstructionOutputs]): """ - Speech Classification is a process that involves analyzing and categorizing - spoken language into predefined categories or classes based on various features - such as tone, pitch, and linguistic content. + Audio Reconstruction is the process of restoring or recreating audio signals +from incomplete, damaged, or degraded recordings to achieve a high-quality, +accurate representation of the original sound. - InputType: audio - OutputType: label + InputType: audio + OutputType: audio """ - - function: str = "speech-classification" + function: str = "audio-reconstruction" input_type: str = DataType.AUDIO - output_type: str = DataType.LABEL + output_type: str = DataType.AUDIO - inputs_class: Type[TI] = SpeechClassificationInputs - outputs_class: Type[TO] = SpeechClassificationOutputs + inputs_class: Type[TI] = AudioReconstructionInputs + outputs_class: Type[TO] = AudioReconstructionOutputs -class IntentClassificationInputs(Inputs): - language: InputParam = None - text: InputParam = None +class AudioEmotionDetectionInputs(Inputs): + audio: InputParam = None def __init__(self, node=None): super().__init__(node=node) - self.language = self.create_param(code="language", data_type=DataType.LABEL, is_required=True) - self.text = self.create_param(code="text", data_type=DataType.TEXT, is_required=False) + self.audio = self.create_param(code="audio", data_type=DataType.AUDIO, is_required=False) -class IntentClassificationOutputs(Outputs): +class AudioEmotionDetectionOutputs(Outputs): data: OutputParam = None def __init__(self, node=None): @@ -1432,143 +1468,143 @@ def __init__(self, node=None): self.data = self.create_param(code="data", data_type=DataType.LABEL) -class IntentClassification(AssetNode[IntentClassificationInputs, IntentClassificationOutputs]): +class AudioEmotionDetection(AssetNode[AudioEmotionDetectionInputs, AudioEmotionDetectionOutputs]): """ - Intent Classification is a natural language processing task that involves - analyzing and categorizing user text input to determine the underlying purpose - or goal behind the communication, such as booking a flight, asking for weather - information, or setting a reminder. + Audio Emotion Detection is a technology that analyzes vocal characteristics and +patterns in audio recordings to identify and classify the emotional state of +the speaker. - InputType: text - OutputType: label + InputType: audio + OutputType: label """ - - function: str = "intent-classification" - input_type: str = DataType.TEXT + function: str = "audio-emotion-detection" + input_type: str = DataType.AUDIO output_type: str = DataType.LABEL - inputs_class: Type[TI] = IntentClassificationInputs - outputs_class: Type[TO] = IntentClassificationOutputs + inputs_class: Type[TI] = AudioEmotionDetectionInputs + outputs_class: Type[TO] = AudioEmotionDetectionOutputs -class PartOfSpeechTaggingInputs(Inputs): - language: InputParam = None +class SplitOnLinebreakInputs(Inputs): text: InputParam = None def __init__(self, node=None): super().__init__(node=node) - self.language = self.create_param(code="language", data_type=DataType.LABEL, is_required=True) - self.text = self.create_param(code="text", data_type=DataType.TEXT, is_required=False) + self.text = self.create_param(code="text", data_type=DataType.TEXT, is_required=True) -class PartOfSpeechTaggingOutputs(Outputs): +class SplitOnLinebreakOutputs(Outputs): data: OutputParam = None + audio: OutputParam = None def __init__(self, node=None): super().__init__(node=node) - self.data = self.create_param(code="data", data_type=DataType.LABEL) + self.data = self.create_param(code="data", data_type=DataType.TEXT) + self.audio = self.create_param(code="audio", data_type=DataType.AUDIO) -class PartOfSpeechTagging(AssetNode[PartOfSpeechTaggingInputs, PartOfSpeechTaggingOutputs]): +class SplitOnLinebreak(BaseSegmentor[SplitOnLinebreakInputs, SplitOnLinebreakOutputs]): """ - Part of Speech Tagging is a natural language processing task that involves - assigning each word in a sentence its corresponding part of speech, such as - noun, verb, adjective, or adverb, based on its role and context within the - sentence. + The "Split On Linebreak" function divides a given string into a list of +substrings, using linebreaks (newline characters) as the points of separation. - InputType: text - OutputType: label + InputType: text + OutputType: text """ - - function: str = "part-of-speech-tagging" + function: str = "split-on-linebreak" input_type: str = DataType.TEXT - output_type: str = DataType.LABEL + output_type: str = DataType.TEXT - inputs_class: Type[TI] = PartOfSpeechTaggingInputs - outputs_class: Type[TO] = PartOfSpeechTaggingOutputs + inputs_class: Type[TI] = SplitOnLinebreakInputs + outputs_class: Type[TO] = SplitOnLinebreakOutputs -class MetricAggregationInputs(Inputs): - text: InputParam = None +class KeywordSpottingInputs(Inputs): + audio: InputParam = None def __init__(self, node=None): super().__init__(node=node) - self.text = self.create_param(code="text", data_type=DataType.TEXT, is_required=True) + self.audio = self.create_param(code="audio", data_type=DataType.AUDIO, is_required=False) -class MetricAggregationOutputs(Outputs): +class KeywordSpottingOutputs(Outputs): data: OutputParam = None def __init__(self, node=None): super().__init__(node=node) - self.data = self.create_param(code="data", data_type=DataType.TEXT) + self.data = self.create_param(code="data", data_type=DataType.LABEL) -class MetricAggregation(BaseMetric[MetricAggregationInputs, MetricAggregationOutputs]): +class KeywordSpotting(AssetNode[KeywordSpottingInputs, KeywordSpottingOutputs]): """ - Metric Aggregation is a function that computes and summarizes numerical data by - applying statistical operations, such as averaging, summing, or finding the - minimum and maximum values, to provide insights and facilitate analysis of - large datasets. + Keyword Spotting is a function that enables the detection and identification of +specific words or phrases within a stream of audio, often used in voice- +activated systems to trigger actions or commands based on recognized keywords. - InputType: text - OutputType: text + InputType: audio + OutputType: label """ + function: str = "keyword-spotting" + input_type: str = DataType.AUDIO + output_type: str = DataType.LABEL - function: str = "metric-aggregation" - input_type: str = DataType.TEXT - output_type: str = DataType.TEXT - - inputs_class: Type[TI] = MetricAggregationInputs - outputs_class: Type[TO] = MetricAggregationOutputs + inputs_class: Type[TI] = KeywordSpottingInputs + outputs_class: Type[TO] = KeywordSpottingOutputs -class DialectDetectionInputs(Inputs): - audio: InputParam = None +class TextClassificationInputs(Inputs): + text: InputParam = None language: InputParam = None + dialect: InputParam = None + script: InputParam = None def __init__(self, node=None): super().__init__(node=node) - self.audio = self.create_param(code="audio", data_type=DataType.AUDIO, is_required=True) - self.language = self.create_param(code="language", data_type=DataType.LABEL, is_required=False) + self.text = self.create_param(code="text", data_type=DataType.TEXT, is_required=True) + self.language = self.create_param(code="language", data_type=DataType.LABEL, is_required=True) + self.dialect = self.create_param(code="dialect", data_type=DataType.LABEL, is_required=False) + self.script = self.create_param(code="script", data_type=DataType.LABEL, is_required=False) -class DialectDetectionOutputs(Outputs): +class TextClassificationOutputs(Outputs): data: OutputParam = None def __init__(self, node=None): super().__init__(node=node) - self.data = self.create_param(code="data", data_type=DataType.TEXT) + self.data = self.create_param(code="data", data_type=DataType.LABEL) -class DialectDetection(AssetNode[DialectDetectionInputs, DialectDetectionOutputs]): +class TextClassification(AssetNode[TextClassificationInputs, TextClassificationOutputs]): """ - Dialect Detection is a function that identifies and classifies the specific - regional or social variations of a language spoken or written by an individual, - enabling the recognition of distinct linguistic patterns and nuances associated - with different dialects. + Categorizes text into predefined groups or topics, facilitating content +organization and targeted actions. - InputType: audio - OutputType: text + InputType: text + OutputType: label """ + function: str = "text-classification" + input_type: str = DataType.TEXT + output_type: str = DataType.LABEL - function: str = "dialect-detection" - input_type: str = DataType.AUDIO - output_type: str = DataType.TEXT - - inputs_class: Type[TI] = DialectDetectionInputs - outputs_class: Type[TO] = DialectDetectionOutputs + inputs_class: Type[TI] = TextClassificationInputs + outputs_class: Type[TO] = TextClassificationOutputs -class InverseTextNormalizationInputs(Inputs): +class OffensiveLanguageIdentificationInputs(Inputs): text: InputParam = None + language: InputParam = None + dialect: InputParam = None + script: InputParam = None def __init__(self, node=None): super().__init__(node=node) - self.text = self.create_param(code="text", data_type=DataType.TEXT, is_required=False) + self.text = self.create_param(code="text", data_type=DataType.TEXT, is_required=True) + self.language = self.create_param(code="language", data_type=DataType.LABEL, is_required=True) + self.dialect = self.create_param(code="dialect", data_type=DataType.LABEL, is_required=False) + self.script = self.create_param(code="script", data_type=DataType.LABEL, is_required=False) -class InverseTextNormalizationOutputs(Outputs): +class OffensiveLanguageIdentificationOutputs(Outputs): data: OutputParam = None def __init__(self, node=None): @@ -1576,64 +1612,66 @@ def __init__(self, node=None): self.data = self.create_param(code="data", data_type=DataType.LABEL) -class InverseTextNormalization(AssetNode[InverseTextNormalizationInputs, InverseTextNormalizationOutputs]): +class OffensiveLanguageIdentification(AssetNode[OffensiveLanguageIdentificationInputs, OffensiveLanguageIdentificationOutputs]): """ - Inverse Text Normalization is the process of converting spoken or written - language in its normalized form, such as numbers, dates, and abbreviations, - back into their original, more complex or detailed textual representations. + Detects language or phrases that might be considered offensive, aiding in +content moderation and creating respectful user interactions. - InputType: text - OutputType: label + InputType: text + OutputType: label """ - - function: str = "inverse-text-normalization" + function: str = "offensive-language-identification" input_type: str = DataType.TEXT output_type: str = DataType.LABEL - inputs_class: Type[TI] = InverseTextNormalizationInputs - outputs_class: Type[TO] = InverseTextNormalizationOutputs + inputs_class: Type[TI] = OffensiveLanguageIdentificationInputs + outputs_class: Type[TO] = OffensiveLanguageIdentificationOutputs -class TextToAudioInputs(Inputs): - text: InputParam = None +class SpeechNonSpeechClassificationInputs(Inputs): + audio: InputParam = None language: InputParam = None + script: InputParam = None + dialect: InputParam = None def __init__(self, node=None): super().__init__(node=node) - self.text = self.create_param(code="text", data_type=DataType.TEXT, is_required=True) - self.language = self.create_param(code="language", data_type=DataType.LABEL, is_required=False) + self.audio = self.create_param(code="audio", data_type=DataType.AUDIO, is_required=True) + self.language = self.create_param(code="language", data_type=DataType.LABEL, is_required=True) + self.script = self.create_param(code="script", data_type=DataType.LABEL, is_required=False) + self.dialect = self.create_param(code="dialect", data_type=DataType.LABEL, is_required=False) -class TextToAudioOutputs(Outputs): +class SpeechNonSpeechClassificationOutputs(Outputs): data: OutputParam = None def __init__(self, node=None): super().__init__(node=node) - self.data = self.create_param(code="data", data_type=DataType.AUDIO) + self.data = self.create_param(code="data", data_type=DataType.LABEL) -class TextToAudio(AssetNode[TextToAudioInputs, TextToAudioOutputs]): +class SpeechNonSpeechClassification(AssetNode[SpeechNonSpeechClassificationInputs, SpeechNonSpeechClassificationOutputs]): """ - The Text to Audio function converts written text into spoken words, allowing - users to listen to the content instead of reading it. + Differentiates between speech and non-speech audio segments. Great for editing +software and transcription services to exclude irrelevant audio. - InputType: text - OutputType: audio + InputType: audio + OutputType: label """ + function: str = "speech-non-speech-classification" + input_type: str = DataType.AUDIO + output_type: str = DataType.LABEL - function: str = "text-to-audio" - input_type: str = DataType.TEXT - output_type: str = DataType.AUDIO - - inputs_class: Type[TI] = TextToAudioInputs - outputs_class: Type[TO] = TextToAudioOutputs + inputs_class: Type[TI] = SpeechNonSpeechClassificationInputs + outputs_class: Type[TO] = SpeechNonSpeechClassificationOutputs -class FillTextMaskInputs(Inputs): +class NamedEntityRecognitionInputs(Inputs): text: InputParam = None language: InputParam = None dialect: InputParam = None script: InputParam = None + domain: InputParam = None def __init__(self, node=None): super().__init__(node=node) @@ -1641,80 +1679,78 @@ def __init__(self, node=None): self.language = self.create_param(code="language", data_type=DataType.LABEL, is_required=True) self.dialect = self.create_param(code="dialect", data_type=DataType.LABEL, is_required=False) self.script = self.create_param(code="script", data_type=DataType.LABEL, is_required=False) + self.domain = self.create_param(code="domain", data_type=DataType.LABEL, is_required=False) -class FillTextMaskOutputs(Outputs): +class NamedEntityRecognitionOutputs(Outputs): data: OutputParam = None def __init__(self, node=None): super().__init__(node=node) - self.data = self.create_param(code="data", data_type=DataType.TEXT) + self.data = self.create_param(code="data", data_type=DataType.LABEL) -class FillTextMask(AssetNode[FillTextMaskInputs, FillTextMaskOutputs]): +class NamedEntityRecognition(AssetNode[NamedEntityRecognitionInputs, NamedEntityRecognitionOutputs]): """ - The "Fill Text Mask" function takes a text input with masked or placeholder - characters and replaces those placeholders with specified or contextually - appropriate characters to generate a complete and coherent text output. + Identifies and classifies named entities (e.g., persons, organizations, +locations) within text. Useful for information extraction, content tagging, and +search enhancements. - InputType: text - OutputType: text + InputType: text + OutputType: label """ - - function: str = "fill-text-mask" + function: str = "named-entity-recognition" input_type: str = DataType.TEXT - output_type: str = DataType.TEXT + output_type: str = DataType.LABEL - inputs_class: Type[TI] = FillTextMaskInputs - outputs_class: Type[TO] = FillTextMaskOutputs + inputs_class: Type[TI] = NamedEntityRecognitionInputs + outputs_class: Type[TO] = NamedEntityRecognitionOutputs -class VideoContentModerationInputs(Inputs): - video: InputParam = None - min_confidence: InputParam = None +class ImageManipulationInputs(Inputs): + image: InputParam = None + targetimage: InputParam = None def __init__(self, node=None): super().__init__(node=node) - self.video = self.create_param(code="video", data_type=DataType.VIDEO, is_required=True) - self.min_confidence = self.create_param(code="min_confidence", data_type=DataType.TEXT, is_required=False) + self.image = self.create_param(code="image", data_type=DataType.IMAGE, is_required=True) + self.targetimage = self.create_param(code="targetimage", data_type=DataType.IMAGE, is_required=True) -class VideoContentModerationOutputs(Outputs): - data: OutputParam = None +class ImageManipulationOutputs(Outputs): + image: OutputParam = None def __init__(self, node=None): super().__init__(node=node) - self.data = self.create_param(code="data", data_type=DataType.LABEL) + self.image = self.create_param(code="image", data_type=DataType.IMAGE) -class VideoContentModeration(AssetNode[VideoContentModerationInputs, VideoContentModerationOutputs]): +class ImageManipulation(AssetNode[ImageManipulationInputs, ImageManipulationOutputs]): """ - Video Content Moderation is the process of reviewing, analyzing, and filtering - video content to ensure it adheres to community guidelines, legal standards, - and platform policies, thereby preventing the dissemination of inappropriate, - harmful, or illegal material. + Image Manipulation refers to the process of altering or enhancing digital +images using various techniques and tools to achieve desired visual effects, +correct imperfections, or transform the image's appearance. - InputType: video - OutputType: label + InputType: image + OutputType: image """ + function: str = "image-manipulation" + input_type: str = DataType.IMAGE + output_type: str = DataType.IMAGE - function: str = "video-content-moderation" - input_type: str = DataType.VIDEO - output_type: str = DataType.LABEL - - inputs_class: Type[TI] = VideoContentModerationInputs - outputs_class: Type[TO] = VideoContentModerationOutputs + inputs_class: Type[TI] = ImageManipulationInputs + outputs_class: Type[TO] = ImageManipulationOutputs -class ExtractAudioFromVideoInputs(Inputs): - video: InputParam = None +class SplitOnSilenceInputs(Inputs): + audio: InputParam = None def __init__(self, node=None): super().__init__(node=node) - self.video = self.create_param(code="video", data_type=DataType.VIDEO, is_required=True) + self.audio = self.create_param(code="audio", data_type=DataType.AUDIO, is_required=True) -class ExtractAudioFromVideoOutputs(Outputs): +class SplitOnSilenceOutputs(Outputs): data: OutputParam = None def __init__(self, node=None): @@ -1722,152 +1758,169 @@ def __init__(self, node=None): self.data = self.create_param(code="data", data_type=DataType.AUDIO) -class ExtractAudioFromVideo(AssetNode[ExtractAudioFromVideoInputs, ExtractAudioFromVideoOutputs]): +class SplitOnSilence(AssetNode[SplitOnSilenceInputs, SplitOnSilenceOutputs]): + """ + The "Split On Silence" function divides an audio recording into separate +segments based on periods of silence, allowing for easier editing and analysis +of individual sections. + + InputType: audio + OutputType: audio """ - The "Extract Audio From Video" function allows users to separate and save the - audio track from a video file, enabling them to obtain just the sound without - the accompanying visual content. + function: str = "split-on-silence" + input_type: str = DataType.AUDIO + output_type: str = DataType.AUDIO + + inputs_class: Type[TI] = SplitOnSilenceInputs + outputs_class: Type[TO] = SplitOnSilenceOutputs + + +class TextToVideoGenerationInputs(Inputs): + text: InputParam = None + language: InputParam = None + + def __init__(self, node=None): + super().__init__(node=node) + self.text = self.create_param(code="text", data_type=DataType.TEXT, is_required=True) + self.language = self.create_param(code="language", data_type=DataType.LABEL, is_required=False) + + +class TextToVideoGenerationOutputs(Outputs): + data: OutputParam = None + + def __init__(self, node=None): + super().__init__(node=node) + self.data = self.create_param(code="data", data_type=DataType.VIDEO) + - InputType: video - OutputType: audio +class TextToVideoGeneration(AssetNode[TextToVideoGenerationInputs, TextToVideoGenerationOutputs]): """ + Text To Video Generation is a process that converts written descriptions or +scripts into dynamic, visual video content using advanced algorithms and +artificial intelligence. - function: str = "extract-audio-from-video" - input_type: str = DataType.VIDEO - output_type: str = DataType.AUDIO + InputType: text + OutputType: video + """ + function: str = "text-to-video-generation" + input_type: str = DataType.TEXT + output_type: str = DataType.VIDEO - inputs_class: Type[TI] = ExtractAudioFromVideoInputs - outputs_class: Type[TO] = ExtractAudioFromVideoOutputs + inputs_class: Type[TI] = TextToVideoGenerationInputs + outputs_class: Type[TO] = TextToVideoGenerationOutputs -class ImageCompressionInputs(Inputs): +class DocumentInformationExtractionInputs(Inputs): image: InputParam = None - apl_qfactor: InputParam = None def __init__(self, node=None): super().__init__(node=node) - self.image = self.create_param(code="image", data_type=DataType.IMAGE, is_required=True) - self.apl_qfactor = self.create_param(code="apl_qfactor", data_type=DataType.TEXT, is_required=False) + self.image = self.create_param(code="image", data_type=DataType.IMAGE, is_required=False) -class ImageCompressionOutputs(Outputs): - image: OutputParam = None +class DocumentInformationExtractionOutputs(Outputs): + data: OutputParam = None def __init__(self, node=None): super().__init__(node=node) - self.image = self.create_param(code="image", data_type=DataType.IMAGE) + self.data = self.create_param(code="data", data_type=DataType.TEXT) -class ImageCompression(AssetNode[ImageCompressionInputs, ImageCompressionOutputs]): +class DocumentInformationExtraction(AssetNode[DocumentInformationExtractionInputs, DocumentInformationExtractionOutputs]): """ - Image compression is a process that reduces the file size of an image by - removing redundant or non-essential data, while maintaining an acceptable level - of visual quality. + Document Information Extraction is the process of automatically identifying, +extracting, and structuring relevant data from unstructured or semi-structured +documents, such as invoices, receipts, contracts, and forms, to facilitate +easier data management and analysis. - InputType: image - OutputType: image + InputType: image + OutputType: text """ - - function: str = "image-compression" + function: str = "document-information-extraction" input_type: str = DataType.IMAGE - output_type: str = DataType.IMAGE + output_type: str = DataType.TEXT - inputs_class: Type[TI] = ImageCompressionInputs - outputs_class: Type[TO] = ImageCompressionOutputs + inputs_class: Type[TI] = DocumentInformationExtractionInputs + outputs_class: Type[TO] = DocumentInformationExtractionOutputs -class MultilingualSpeechRecognitionInputs(Inputs): - source_audio: InputParam = None - language: InputParam = None +class VideoGenerationInputs(Inputs): + text: InputParam = None def __init__(self, node=None): super().__init__(node=node) - self.source_audio = self.create_param(code="source_audio", data_type=DataType.AUDIO, is_required=True) - self.language = self.create_param(code="language", data_type=DataType.LABEL, is_required=False) + self.text = self.create_param(code="text", data_type=DataType.TEXT, is_required=True) -class MultilingualSpeechRecognitionOutputs(Outputs): +class VideoGenerationOutputs(Outputs): data: OutputParam = None def __init__(self, node=None): super().__init__(node=node) - self.data = self.create_param(code="data", data_type=DataType.TEXT) + self.data = self.create_param(code="data", data_type=DataType.VIDEO) -class MultilingualSpeechRecognition(AssetNode[MultilingualSpeechRecognitionInputs, MultilingualSpeechRecognitionOutputs]): +class VideoGeneration(AssetNode[VideoGenerationInputs, VideoGenerationOutputs]): """ - Multilingual Speech Recognition is a technology that enables the automatic - transcription of spoken language into text across multiple languages, allowing - for seamless communication and understanding in diverse linguistic contexts. + Produces video content based on specific inputs or datasets. Can be used for +simulations, animations, or even deepfake detection. - InputType: audio - OutputType: text + InputType: text + OutputType: video """ + function: str = "video-generation" + input_type: str = DataType.TEXT + output_type: str = DataType.VIDEO - function: str = "multilingual-speech-recognition" - input_type: str = DataType.AUDIO - output_type: str = DataType.TEXT - - inputs_class: Type[TI] = MultilingualSpeechRecognitionInputs - outputs_class: Type[TO] = MultilingualSpeechRecognitionOutputs + inputs_class: Type[TI] = VideoGenerationInputs + outputs_class: Type[TO] = VideoGenerationOutputs -class ReferencelessTextGenerationMetricInputs(Inputs): - hypotheses: InputParam = None - sources: InputParam = None - score_identifier: InputParam = None +class TextToImageGenerationInputs(Inputs): + text: InputParam = None def __init__(self, node=None): super().__init__(node=node) - self.hypotheses = self.create_param(code="hypotheses", data_type=DataType.TEXT, is_required=True) - self.sources = self.create_param(code="sources", data_type=DataType.TEXT, is_required=False) - self.score_identifier = self.create_param(code="score_identifier", data_type=DataType.TEXT, is_required=True) + self.text = self.create_param(code="text", data_type=DataType.TEXT, is_required=True) -class ReferencelessTextGenerationMetricOutputs(Outputs): +class TextToImageGenerationOutputs(Outputs): data: OutputParam = None def __init__(self, node=None): super().__init__(node=node) - self.data = self.create_param(code="data", data_type=DataType.TEXT) + self.data = self.create_param(code="data", data_type=DataType.IMAGE) -class ReferencelessTextGenerationMetric( - BaseMetric[ReferencelessTextGenerationMetricInputs, ReferencelessTextGenerationMetricOutputs] -): +class TextToImageGeneration(AssetNode[TextToImageGenerationInputs, TextToImageGenerationOutputs]): """ - The Referenceless Text Generation Metric is a method for evaluating the quality - of generated text without requiring a reference text for comparison, often - leveraging models or algorithms to assess coherence, relevance, and fluency - based on intrinsic properties of the text itself. + Creates a visual representation based on textual input, turning descriptions +into pictorial forms. Used in creative processes and content generation. - InputType: text - OutputType: text + InputType: text + OutputType: image """ - - function: str = "referenceless-text-generation-metric" + function: str = "text-to-image-generation" input_type: str = DataType.TEXT - output_type: str = DataType.TEXT + output_type: str = DataType.IMAGE - inputs_class: Type[TI] = ReferencelessTextGenerationMetricInputs - outputs_class: Type[TO] = ReferencelessTextGenerationMetricOutputs + inputs_class: Type[TI] = TextToImageGenerationInputs + outputs_class: Type[TO] = TextToImageGenerationOutputs -class TextGenerationMetricDefaultInputs(Inputs): +class ReferencelessTextGenerationMetricInputs(Inputs): hypotheses: InputParam = None - references: InputParam = None sources: InputParam = None score_identifier: InputParam = None def __init__(self, node=None): super().__init__(node=node) self.hypotheses = self.create_param(code="hypotheses", data_type=DataType.TEXT, is_required=True) - self.references = self.create_param(code="references", data_type=DataType.TEXT, is_required=False) self.sources = self.create_param(code="sources", data_type=DataType.TEXT, is_required=False) self.score_identifier = self.create_param(code="score_identifier", data_type=DataType.TEXT, is_required=True) -class TextGenerationMetricDefaultOutputs(Outputs): +class ReferencelessTextGenerationMetricOutputs(Outputs): data: OutputParam = None def __init__(self, node=None): @@ -1875,153 +1928,151 @@ def __init__(self, node=None): self.data = self.create_param(code="data", data_type=DataType.TEXT) -class TextGenerationMetricDefault(BaseMetric[TextGenerationMetricDefaultInputs, TextGenerationMetricDefaultOutputs]): +class ReferencelessTextGenerationMetric(BaseMetric[ReferencelessTextGenerationMetricInputs, ReferencelessTextGenerationMetricOutputs]): """ - The "Text Generation Metric Default" function provides a standard set of - evaluation metrics for assessing the quality and performance of text generation - models. + The Referenceless Text Generation Metric is a method for evaluating the quality +of generated text without requiring a reference text for comparison, often +leveraging models or algorithms to assess coherence, relevance, and fluency +based on intrinsic properties of the text itself. - InputType: text - OutputType: text + InputType: text + OutputType: text """ - - function: str = "text-generation-metric-default" + function: str = "referenceless-text-generation-metric" input_type: str = DataType.TEXT output_type: str = DataType.TEXT - inputs_class: Type[TI] = TextGenerationMetricDefaultInputs - outputs_class: Type[TO] = TextGenerationMetricDefaultOutputs + inputs_class: Type[TI] = ReferencelessTextGenerationMetricInputs + outputs_class: Type[TO] = ReferencelessTextGenerationMetricOutputs -class NoiseRemovalInputs(Inputs): - audio: InputParam = None +class OtherMultipurposeInputs(Inputs): + text: InputParam = None + language: InputParam = None def __init__(self, node=None): super().__init__(node=node) - self.audio = self.create_param(code="audio", data_type=DataType.AUDIO, is_required=False) + self.text = self.create_param(code="text", data_type=DataType.TEXT, is_required=True) + self.language = self.create_param(code="language", data_type=DataType.LABEL, is_required=True) -class NoiseRemovalOutputs(Outputs): +class OtherMultipurposeOutputs(Outputs): data: OutputParam = None def __init__(self, node=None): super().__init__(node=node) - self.data = self.create_param(code="data", data_type=DataType.AUDIO) + self.data = self.create_param(code="data", data_type=DataType.TEXT) -class NoiseRemoval(AssetNode[NoiseRemovalInputs, NoiseRemovalOutputs]): +class OtherMultipurpose(AssetNode[OtherMultipurposeInputs, OtherMultipurposeOutputs]): """ - Noise Removal is a process that involves identifying and eliminating unwanted - random variations or disturbances from an audio signal to enhance the clarity - and quality of the underlying information. + The "Other (Multipurpose)" function serves as a versatile category designed to +accommodate a wide range of tasks and activities that do not fit neatly into +predefined classifications, offering flexibility and adaptability for various +needs. - InputType: audio - OutputType: audio + InputType: text + OutputType: text """ + function: str = "other-(multipurpose)" + input_type: str = DataType.TEXT + output_type: str = DataType.TEXT - function: str = "noise-removal" - input_type: str = DataType.AUDIO - output_type: str = DataType.AUDIO - - inputs_class: Type[TI] = NoiseRemovalInputs - outputs_class: Type[TO] = NoiseRemovalOutputs + inputs_class: Type[TI] = OtherMultipurposeInputs + outputs_class: Type[TO] = OtherMultipurposeOutputs -class AudioReconstructionInputs(Inputs): - audio: InputParam = None +class ImageLabelDetectionInputs(Inputs): + image: InputParam = None + min_confidence: InputParam = None def __init__(self, node=None): super().__init__(node=node) - self.audio = self.create_param(code="audio", data_type=DataType.AUDIO, is_required=True) + self.image = self.create_param(code="image", data_type=DataType.IMAGE, is_required=True) + self.min_confidence = self.create_param(code="min_confidence", data_type=DataType.TEXT, is_required=False) -class AudioReconstructionOutputs(Outputs): +class ImageLabelDetectionOutputs(Outputs): data: OutputParam = None def __init__(self, node=None): super().__init__(node=node) - self.data = self.create_param(code="data", data_type=DataType.AUDIO) + self.data = self.create_param(code="data", data_type=DataType.LABEL) -class AudioReconstruction(BaseReconstructor[AudioReconstructionInputs, AudioReconstructionOutputs]): +class ImageLabelDetection(AssetNode[ImageLabelDetectionInputs, ImageLabelDetectionOutputs]): """ - Audio Reconstruction is the process of restoring or recreating audio signals - from incomplete, damaged, or degraded recordings to achieve a high-quality, - accurate representation of the original sound. + Identifies objects, themes, or topics within images, useful for image +categorization, search, and recommendation systems. - InputType: audio - OutputType: audio + InputType: image + OutputType: label """ + function: str = "image-label-detection" + input_type: str = DataType.IMAGE + output_type: str = DataType.LABEL - function: str = "audio-reconstruction" - input_type: str = DataType.AUDIO - output_type: str = DataType.AUDIO - - inputs_class: Type[TI] = AudioReconstructionInputs - outputs_class: Type[TO] = AudioReconstructionOutputs + inputs_class: Type[TI] = ImageLabelDetectionInputs + outputs_class: Type[TO] = ImageLabelDetectionOutputs -class VoiceCloningInputs(Inputs): - text: InputParam = None - audio: InputParam = None +class SpeakerDiarizationVideoInputs(Inputs): + video: InputParam = None language: InputParam = None - dialect: InputParam = None - voice: InputParam = None script: InputParam = None - type: InputParam = None + dialect: InputParam = None def __init__(self, node=None): super().__init__(node=node) - self.text = self.create_param(code="text", data_type=DataType.TEXT, is_required=True) - self.audio = self.create_param(code="audio", data_type=DataType.AUDIO, is_required=True) - self.language = self.create_param(code="language", data_type=DataType.LABEL, is_required=True) - self.dialect = self.create_param(code="dialect", data_type=DataType.LABEL, is_required=False) - self.voice = self.create_param(code="voice", data_type=DataType.LABEL, is_required=False) + self.video = self.create_param(code="video", data_type=DataType.VIDEO, is_required=True) + self.language = self.create_param(code="language", data_type=DataType.LABEL, is_required=False) self.script = self.create_param(code="script", data_type=DataType.LABEL, is_required=False) - self.type = self.create_param(code="type", data_type=DataType.LABEL, is_required=False) + self.dialect = self.create_param(code="dialect", data_type=DataType.LABEL, is_required=False) -class VoiceCloningOutputs(Outputs): +class SpeakerDiarizationVideoOutputs(Outputs): data: OutputParam = None def __init__(self, node=None): super().__init__(node=node) - self.data = self.create_param(code="data", data_type=DataType.AUDIO) + self.data = self.create_param(code="data", data_type=DataType.VIDEO) -class VoiceCloning(AssetNode[VoiceCloningInputs, VoiceCloningOutputs]): +class SpeakerDiarizationVideo(AssetNode[SpeakerDiarizationVideoInputs, SpeakerDiarizationVideoOutputs]): """ - Voice cloning is a technology that uses artificial intelligence to create a - digital replica of a person's voice, allowing for the generation of speech that - mimics the tone, pitch, and speaking style of the original speaker. + Segments a video based on different speakers, identifying when each individual +speaks. Useful for transcriptions and understanding multi-person conversations. - InputType: text - OutputType: audio + InputType: video + OutputType: label """ + function: str = "speaker-diarization-video" + input_type: str = DataType.VIDEO + output_type: str = DataType.LABEL - function: str = "voice-cloning" - input_type: str = DataType.TEXT - output_type: str = DataType.AUDIO - - inputs_class: Type[TI] = VoiceCloningInputs - outputs_class: Type[TO] = VoiceCloningOutputs + inputs_class: Type[TI] = SpeakerDiarizationVideoInputs + outputs_class: Type[TO] = SpeakerDiarizationVideoOutputs -class DiacritizationInputs(Inputs): +class AudioTranscriptImprovementInputs(Inputs): language: InputParam = None dialect: InputParam = None + source_supplier: InputParam = None + is_medical: InputParam = None + source_audio: InputParam = None script: InputParam = None - text: InputParam = None def __init__(self, node=None): super().__init__(node=node) self.language = self.create_param(code="language", data_type=DataType.LABEL, is_required=True) - self.dialect = self.create_param(code="dialect", data_type=DataType.LABEL, is_required=True) + self.dialect = self.create_param(code="dialect", data_type=DataType.LABEL, is_required=False) + self.source_supplier = self.create_param(code="source_supplier", data_type=DataType.LABEL, is_required=False) + self.is_medical = self.create_param(code="is_medical", data_type=DataType.TEXT, is_required=True) + self.source_audio = self.create_param(code="source_audio", data_type=DataType.AUDIO, is_required=True) self.script = self.create_param(code="script", data_type=DataType.LABEL, is_required=False) - self.text = self.create_param(code="text", data_type=DataType.TEXT, is_required=True) -class DiacritizationOutputs(Outputs): +class AudioTranscriptImprovementOutputs(Outputs): data: OutputParam = None def __init__(self, node=None): @@ -2029,139 +2080,134 @@ def __init__(self, node=None): self.data = self.create_param(code="data", data_type=DataType.TEXT) -class Diacritization(AssetNode[DiacritizationInputs, DiacritizationOutputs]): +class AudioTranscriptImprovement(AssetNode[AudioTranscriptImprovementInputs, AudioTranscriptImprovementOutputs]): """ - Diacritization is the process of adding diacritical marks to letters in a text - to indicate pronunciation, stress, tone, or meaning, often used in languages - such as Arabic, Hebrew, and Vietnamese to provide clarity and accuracy in - written communication. + Refines and corrects transcriptions generated from audio data, improving +readability and accuracy. - InputType: text - OutputType: text + InputType: audio + OutputType: text """ - - function: str = "diacritization" - input_type: str = DataType.TEXT + function: str = "audio-transcript-improvement" + input_type: str = DataType.AUDIO output_type: str = DataType.TEXT - inputs_class: Type[TI] = DiacritizationInputs - outputs_class: Type[TO] = DiacritizationOutputs + inputs_class: Type[TI] = AudioTranscriptImprovementInputs + outputs_class: Type[TO] = AudioTranscriptImprovementOutputs -class AudioEmotionDetectionInputs(Inputs): +class DialectDetectionInputs(Inputs): audio: InputParam = None + language: InputParam = None def __init__(self, node=None): super().__init__(node=node) - self.audio = self.create_param(code="audio", data_type=DataType.AUDIO, is_required=False) + self.audio = self.create_param(code="audio", data_type=DataType.AUDIO, is_required=True) + self.language = self.create_param(code="language", data_type=DataType.LABEL, is_required=False) -class AudioEmotionDetectionOutputs(Outputs): +class DialectDetectionOutputs(Outputs): data: OutputParam = None def __init__(self, node=None): super().__init__(node=node) - self.data = self.create_param(code="data", data_type=DataType.LABEL) + self.data = self.create_param(code="data", data_type=DataType.TEXT) -class AudioEmotionDetection(AssetNode[AudioEmotionDetectionInputs, AudioEmotionDetectionOutputs]): +class DialectDetection(AssetNode[DialectDetectionInputs, DialectDetectionOutputs]): """ - Audio Emotion Detection is a technology that analyzes vocal characteristics and - patterns in audio recordings to identify and classify the emotional state of - the speaker. + Identifies specific dialects within a language, aiding in localized content +creation or user experience personalization. - InputType: audio - OutputType: label + InputType: audio + OutputType: text """ - - function: str = "audio-emotion-detection" + function: str = "dialect-detection" input_type: str = DataType.AUDIO - output_type: str = DataType.LABEL + output_type: str = DataType.TEXT - inputs_class: Type[TI] = AudioEmotionDetectionInputs - outputs_class: Type[TO] = AudioEmotionDetectionOutputs + inputs_class: Type[TI] = DialectDetectionInputs + outputs_class: Type[TO] = DialectDetectionOutputs -class TextSummarizationInputs(Inputs): +class SentimentAnalysisInputs(Inputs): text: InputParam = None language: InputParam = None - script: InputParam = None dialect: InputParam = None + script: InputParam = None def __init__(self, node=None): super().__init__(node=node) self.text = self.create_param(code="text", data_type=DataType.TEXT, is_required=True) self.language = self.create_param(code="language", data_type=DataType.LABEL, is_required=True) - self.script = self.create_param(code="script", data_type=DataType.LABEL, is_required=False) self.dialect = self.create_param(code="dialect", data_type=DataType.LABEL, is_required=False) + self.script = self.create_param(code="script", data_type=DataType.LABEL, is_required=False) -class TextSummarizationOutputs(Outputs): +class SentimentAnalysisOutputs(Outputs): data: OutputParam = None def __init__(self, node=None): super().__init__(node=node) - self.data = self.create_param(code="data", data_type=DataType.TEXT) + self.data = self.create_param(code="data", data_type=DataType.LABEL) -class TextSummarization(AssetNode[TextSummarizationInputs, TextSummarizationOutputs]): +class SentimentAnalysis(AssetNode[SentimentAnalysisInputs, SentimentAnalysisOutputs]): """ - Text summarization is the process of condensing a large body of text into a - shorter version, capturing the main points and essential information while - maintaining coherence and meaning. + Determines the sentiment or emotion (e.g., positive, negative, neutral) of a +piece of text, aiding in understanding user feedback or market sentiment. - InputType: text - OutputType: text + InputType: text + OutputType: label """ - - function: str = "text-summarization" + function: str = "sentiment-analysis" input_type: str = DataType.TEXT - output_type: str = DataType.TEXT + output_type: str = DataType.LABEL - inputs_class: Type[TI] = TextSummarizationInputs - outputs_class: Type[TO] = TextSummarizationOutputs + inputs_class: Type[TI] = SentimentAnalysisInputs + outputs_class: Type[TO] = SentimentAnalysisOutputs -class EntityLinkingInputs(Inputs): - text: InputParam = None +class SpeechEmbeddingInputs(Inputs): + audio: InputParam = None language: InputParam = None - domain: InputParam = None + dialect: InputParam = None + script: InputParam = None def __init__(self, node=None): super().__init__(node=node) - self.text = self.create_param(code="text", data_type=DataType.TEXT, is_required=True) + self.audio = self.create_param(code="audio", data_type=DataType.AUDIO, is_required=True) self.language = self.create_param(code="language", data_type=DataType.LABEL, is_required=True) - self.domain = self.create_param(code="domain", data_type=DataType.LABEL, is_required=False) + self.dialect = self.create_param(code="dialect", data_type=DataType.LABEL, is_required=False) + self.script = self.create_param(code="script", data_type=DataType.LABEL, is_required=False) -class EntityLinkingOutputs(Outputs): +class SpeechEmbeddingOutputs(Outputs): data: OutputParam = None def __init__(self, node=None): super().__init__(node=node) - self.data = self.create_param(code="data", data_type=DataType.LABEL) + self.data = self.create_param(code="data", data_type=DataType.TEXT) -class EntityLinking(AssetNode[EntityLinkingInputs, EntityLinkingOutputs]): +class SpeechEmbedding(AssetNode[SpeechEmbeddingInputs, SpeechEmbeddingOutputs]): """ - Entity Linking is the process of identifying and connecting mentions of - entities within a text to their corresponding entries in a structured knowledge - base, thereby enabling the disambiguation of terms and enhancing the - understanding of the text's context. + Transforms spoken content into a fixed-size vector in a high-dimensional space +that captures the content's essence. Facilitates tasks like speech recognition +and speaker verification. - InputType: text - OutputType: label + InputType: audio + OutputType: text """ + function: str = "speech-embedding" + input_type: str = DataType.AUDIO + output_type: str = DataType.TEXT - function: str = "entity-linking" - input_type: str = DataType.TEXT - output_type: str = DataType.LABEL - - inputs_class: Type[TI] = EntityLinkingInputs - outputs_class: Type[TO] = EntityLinkingOutputs + inputs_class: Type[TI] = SpeechEmbeddingInputs + outputs_class: Type[TO] = SpeechEmbeddingOutputs -class TextGenerationMetricInputs(Inputs): +class TextGenerationMetricDefaultInputs(Inputs): hypotheses: InputParam = None references: InputParam = None sources: InputParam = None @@ -2175,7 +2221,7 @@ def __init__(self, node=None): self.score_identifier = self.create_param(code="score_identifier", data_type=DataType.TEXT, is_required=True) -class TextGenerationMetricOutputs(Outputs): +class TextGenerationMetricDefaultOutputs(Outputs): data: OutputParam = None def __init__(self, node=None): @@ -2183,75 +2229,72 @@ def __init__(self, node=None): self.data = self.create_param(code="data", data_type=DataType.TEXT) -class TextGenerationMetric(BaseMetric[TextGenerationMetricInputs, TextGenerationMetricOutputs]): +class TextGenerationMetricDefault(BaseMetric[TextGenerationMetricDefaultInputs, TextGenerationMetricDefaultOutputs]): """ - A Text Generation Metric is a quantitative measure used to evaluate the quality - and effectiveness of text produced by natural language processing models, often - assessing aspects such as coherence, relevance, fluency, and adherence to given - prompts or instructions. + The "Text Generation Metric Default" function provides a standard set of +evaluation metrics for assessing the quality and performance of text generation +models. - InputType: text - OutputType: text + InputType: text + OutputType: text """ - - function: str = "text-generation-metric" + function: str = "text-generation-metric-default" input_type: str = DataType.TEXT output_type: str = DataType.TEXT - inputs_class: Type[TI] = TextGenerationMetricInputs - outputs_class: Type[TO] = TextGenerationMetricOutputs + inputs_class: Type[TI] = TextGenerationMetricDefaultInputs + outputs_class: Type[TO] = TextGenerationMetricDefaultOutputs -class SplitOnLinebreakInputs(Inputs): - text: InputParam = None +class AudioGenerationMetricInputs(Inputs): + hypotheses: InputParam = None + references: InputParam = None + sources: InputParam = None + score_identifier: InputParam = None def __init__(self, node=None): super().__init__(node=node) - self.text = self.create_param(code="text", data_type=DataType.TEXT, is_required=True) + self.hypotheses = self.create_param(code="hypotheses", data_type=DataType.AUDIO, is_required=True) + self.references = self.create_param(code="references", data_type=DataType.AUDIO, is_required=False) + self.sources = self.create_param(code="sources", data_type=DataType.TEXT, is_required=False) + self.score_identifier = self.create_param(code="score_identifier", data_type=DataType.TEXT, is_required=True) -class SplitOnLinebreakOutputs(Outputs): +class AudioGenerationMetricOutputs(Outputs): data: OutputParam = None - audio: OutputParam = None def __init__(self, node=None): super().__init__(node=node) self.data = self.create_param(code="data", data_type=DataType.TEXT) - self.audio = self.create_param(code="audio", data_type=DataType.AUDIO) -class SplitOnLinebreak(BaseSegmentor[SplitOnLinebreakInputs, SplitOnLinebreakOutputs]): +class AudioGenerationMetric(BaseMetric[AudioGenerationMetricInputs, AudioGenerationMetricOutputs]): """ - The "Split On Linebreak" function divides a given string into a list of - substrings, using linebreaks (newline characters) as the points of separation. + The Audio Generation Metric is a quantitative measure used to evaluate the +quality, accuracy, and overall performance of audio generated by artificial +intelligence systems, often considering factors such as fidelity, +intelligibility, and similarity to human-produced audio. - InputType: text - OutputType: text + InputType: text + OutputType: text """ - - function: str = "split-on-linebreak" + function: str = "audio-generation-metric" input_type: str = DataType.TEXT output_type: str = DataType.TEXT - inputs_class: Type[TI] = SplitOnLinebreakInputs - outputs_class: Type[TO] = SplitOnLinebreakOutputs + inputs_class: Type[TI] = AudioGenerationMetricInputs + outputs_class: Type[TO] = AudioGenerationMetricOutputs -class SentimentAnalysisInputs(Inputs): - text: InputParam = None - language: InputParam = None - dialect: InputParam = None - script: InputParam = None +class AudioLanguageIdentificationInputs(Inputs): + audio: InputParam = None def __init__(self, node=None): super().__init__(node=node) - self.text = self.create_param(code="text", data_type=DataType.TEXT, is_required=True) - self.language = self.create_param(code="language", data_type=DataType.LABEL, is_required=True) - self.dialect = self.create_param(code="dialect", data_type=DataType.LABEL, is_required=False) - self.script = self.create_param(code="script", data_type=DataType.LABEL, is_required=False) + self.audio = self.create_param(code="audio", data_type=DataType.AUDIO, is_required=True) -class SentimentAnalysisOutputs(Outputs): +class AudioLanguageIdentificationOutputs(Outputs): data: OutputParam = None def __init__(self, node=None): @@ -2259,34 +2302,33 @@ def __init__(self, node=None): self.data = self.create_param(code="data", data_type=DataType.LABEL) -class SentimentAnalysis(AssetNode[SentimentAnalysisInputs, SentimentAnalysisOutputs]): +class AudioLanguageIdentification(AssetNode[AudioLanguageIdentificationInputs, AudioLanguageIdentificationOutputs]): """ - Sentiment Analysis is a natural language processing technique used to determine - and classify the emotional tone or subjective information expressed in a piece - of text, such as identifying whether the sentiment is positive, negative, or - neutral. + Audio Language Identification is a process that involves analyzing an audio +recording to determine the language being spoken. - InputType: text - OutputType: label + InputType: audio + OutputType: label """ - - function: str = "sentiment-analysis" - input_type: str = DataType.TEXT + function: str = "audio-language-identification" + input_type: str = DataType.AUDIO output_type: str = DataType.LABEL - inputs_class: Type[TI] = SentimentAnalysisInputs - outputs_class: Type[TO] = SentimentAnalysisOutputs + inputs_class: Type[TI] = AudioLanguageIdentificationInputs + outputs_class: Type[TO] = AudioLanguageIdentificationOutputs -class KeywordSpottingInputs(Inputs): - audio: InputParam = None +class VideoLabelDetectionInputs(Inputs): + video: InputParam = None + min_confidence: InputParam = None def __init__(self, node=None): super().__init__(node=node) - self.audio = self.create_param(code="audio", data_type=DataType.AUDIO, is_required=False) + self.video = self.create_param(code="video", data_type=DataType.VIDEO, is_required=True) + self.min_confidence = self.create_param(code="min_confidence", data_type=DataType.TEXT, is_required=False) -class KeywordSpottingOutputs(Outputs): +class VideoLabelDetectionOutputs(Outputs): data: OutputParam = None def __init__(self, node=None): @@ -2294,39 +2336,37 @@ def __init__(self, node=None): self.data = self.create_param(code="data", data_type=DataType.LABEL) -class KeywordSpotting(AssetNode[KeywordSpottingInputs, KeywordSpottingOutputs]): +class VideoLabelDetection(AssetNode[VideoLabelDetectionInputs, VideoLabelDetectionOutputs]): """ - Keyword Spotting is a function that enables the detection and identification of - specific words or phrases within a stream of audio, often used in voice- - activated systems to trigger actions or commands based on recognized keywords. + Identifies and tags objects, scenes, or activities within a video. Useful for +content indexing and recommendation systems. - InputType: audio - OutputType: label + InputType: video + OutputType: label """ - - function: str = "keyword-spotting" - input_type: str = DataType.AUDIO + function: str = "video-label-detection" + input_type: str = DataType.VIDEO output_type: str = DataType.LABEL - inputs_class: Type[TI] = KeywordSpottingInputs - outputs_class: Type[TO] = KeywordSpottingOutputs + inputs_class: Type[TI] = VideoLabelDetectionInputs + outputs_class: Type[TO] = VideoLabelDetectionOutputs -class TextClassificationInputs(Inputs): +class TopicClassificationInputs(Inputs): text: InputParam = None language: InputParam = None - dialect: InputParam = None script: InputParam = None + dialect: InputParam = None def __init__(self, node=None): super().__init__(node=node) self.text = self.create_param(code="text", data_type=DataType.TEXT, is_required=True) self.language = self.create_param(code="language", data_type=DataType.LABEL, is_required=True) - self.dialect = self.create_param(code="dialect", data_type=DataType.LABEL, is_required=False) self.script = self.create_param(code="script", data_type=DataType.LABEL, is_required=False) + self.dialect = self.create_param(code="dialect", data_type=DataType.LABEL, is_required=False) -class TextClassificationOutputs(Outputs): +class TopicClassificationOutputs(Outputs): data: OutputParam = None def __init__(self, node=None): @@ -2334,36 +2374,35 @@ def __init__(self, node=None): self.data = self.create_param(code="data", data_type=DataType.LABEL) -class TextClassification(AssetNode[TextClassificationInputs, TextClassificationOutputs]): +class TopicClassification(AssetNode[TopicClassificationInputs, TopicClassificationOutputs]): """ - Text Classification is a natural language processing task that involves - categorizing text into predefined labels or classes based on its content, - enabling automated organization, filtering, and analysis of large volumes of - textual data. + Assigns categories or topics to a piece of text based on its content, +facilitating content organization and retrieval. - InputType: text - OutputType: label + InputType: text + OutputType: label """ - - function: str = "text-classification" + function: str = "topic-classification" input_type: str = DataType.TEXT output_type: str = DataType.LABEL - inputs_class: Type[TI] = TextClassificationInputs - outputs_class: Type[TO] = TextClassificationOutputs + inputs_class: Type[TI] = TopicClassificationInputs + outputs_class: Type[TO] = TopicClassificationOutputs -class OtherMultipurposeInputs(Inputs): - text: InputParam = None - language: InputParam = None +class ReferencelessTextGenerationMetricDefaultInputs(Inputs): + hypotheses: InputParam = None + sources: InputParam = None + score_identifier: InputParam = None def __init__(self, node=None): super().__init__(node=node) - self.text = self.create_param(code="text", data_type=DataType.TEXT, is_required=True) - self.language = self.create_param(code="language", data_type=DataType.LABEL, is_required=True) + self.hypotheses = self.create_param(code="hypotheses", data_type=DataType.TEXT, is_required=True) + self.sources = self.create_param(code="sources", data_type=DataType.TEXT, is_required=False) + self.score_identifier = self.create_param(code="score_identifier", data_type=DataType.TEXT, is_required=True) -class OtherMultipurposeOutputs(Outputs): +class ReferencelessTextGenerationMetricDefaultOutputs(Outputs): data: OutputParam = None def __init__(self, node=None): @@ -2371,80 +2410,99 @@ def __init__(self, node=None): self.data = self.create_param(code="data", data_type=DataType.TEXT) -class OtherMultipurpose(AssetNode[OtherMultipurposeInputs, OtherMultipurposeOutputs]): +class ReferencelessTextGenerationMetricDefault(BaseMetric[ReferencelessTextGenerationMetricDefaultInputs, ReferencelessTextGenerationMetricDefaultOutputs]): """ - The "Other (Multipurpose)" function serves as a versatile category designed to - accommodate a wide range of tasks and activities that do not fit neatly into - predefined classifications, offering flexibility and adaptability for various - needs. + The Referenceless Text Generation Metric Default is a function designed to +evaluate the quality of generated text without relying on reference texts for +comparison. - InputType: text - OutputType: text + InputType: text + OutputType: text """ - - function: str = "other-(multipurpose)" + function: str = "referenceless-text-generation-metric-default" input_type: str = DataType.TEXT output_type: str = DataType.TEXT - inputs_class: Type[TI] = OtherMultipurposeInputs - outputs_class: Type[TO] = OtherMultipurposeOutputs + inputs_class: Type[TI] = ReferencelessTextGenerationMetricDefaultInputs + outputs_class: Type[TO] = ReferencelessTextGenerationMetricDefaultOutputs -class SpeechSynthesisInputs(Inputs): - audio: InputParam = None - language: InputParam = None - dialect: InputParam = None - voice: InputParam = None - script: InputParam = None - text: InputParam = None - type: InputParam = None +class ImageContentModerationInputs(Inputs): + image: InputParam = None + min_confidence: InputParam = None def __init__(self, node=None): super().__init__(node=node) - self.audio = self.create_param(code="audio", data_type=DataType.AUDIO, is_required=False) - self.language = self.create_param(code="language", data_type=DataType.LABEL, is_required=True) - self.dialect = self.create_param(code="dialect", data_type=DataType.LABEL, is_required=False) - self.voice = self.create_param(code="voice", data_type=DataType.LABEL, is_required=False) - self.script = self.create_param(code="script", data_type=DataType.LABEL, is_required=False) - self.text = self.create_param(code="text", data_type=DataType.TEXT, is_required=True) - self.type = self.create_param(code="type", data_type=DataType.LABEL, is_required=False) + self.image = self.create_param(code="image", data_type=DataType.IMAGE, is_required=True) + self.min_confidence = self.create_param(code="min_confidence", data_type=DataType.TEXT, is_required=False) -class SpeechSynthesisOutputs(Outputs): +class ImageContentModerationOutputs(Outputs): data: OutputParam = None def __init__(self, node=None): super().__init__(node=node) - self.data = self.create_param(code="data", data_type=DataType.AUDIO) + self.data = self.create_param(code="data", data_type=DataType.LABEL) -class SpeechSynthesis(AssetNode[SpeechSynthesisInputs, SpeechSynthesisOutputs]): +class ImageContentModeration(AssetNode[ImageContentModerationInputs, ImageContentModerationOutputs]): """ - Speech synthesis is the artificial production of human speech, typically - achieved through software or hardware systems that convert text into spoken - words, enabling machines to communicate verbally with users. + Detects and filters out inappropriate or harmful images, essential for +platforms with user-generated visual content. - InputType: text - OutputType: audio + InputType: image + OutputType: label """ + function: str = "image-content-moderation" + input_type: str = DataType.IMAGE + output_type: str = DataType.LABEL - function: str = "speech-synthesis" - input_type: str = DataType.TEXT - output_type: str = DataType.AUDIO + inputs_class: Type[TI] = ImageContentModerationInputs + outputs_class: Type[TO] = ImageContentModerationOutputs - inputs_class: Type[TI] = SpeechSynthesisInputs - outputs_class: Type[TO] = SpeechSynthesisOutputs +class AsrAgeClassificationInputs(Inputs): + source_audio: InputParam = None + + def __init__(self, node=None): + super().__init__(node=node) + self.source_audio = self.create_param(code="source_audio", data_type=DataType.AUDIO, is_required=True) -class AudioIntentDetectionInputs(Inputs): - audio: InputParam = None + +class AsrAgeClassificationOutputs(Outputs): + data: OutputParam = None def __init__(self, node=None): super().__init__(node=node) - self.audio = self.create_param(code="audio", data_type=DataType.AUDIO, is_required=False) + self.data = self.create_param(code="data", data_type=DataType.LABEL) -class AudioIntentDetectionOutputs(Outputs): +class AsrAgeClassification(AssetNode[AsrAgeClassificationInputs, AsrAgeClassificationOutputs]): + """ + The ASR Age Classification function is designed to analyze audio recordings of +speech to determine the speaker's age group by leveraging automatic speech +recognition (ASR) technology and machine learning algorithms. + + InputType: audio + OutputType: label + """ + function: str = "asr-age-classification" + input_type: str = DataType.AUDIO + output_type: str = DataType.LABEL + + inputs_class: Type[TI] = AsrAgeClassificationInputs + outputs_class: Type[TO] = AsrAgeClassificationOutputs + + +class AsrGenderClassificationInputs(Inputs): + source_audio: InputParam = None + + def __init__(self, node=None): + super().__init__(node=node) + self.source_audio = self.create_param(code="source_audio", data_type=DataType.AUDIO, is_required=True) + + +class AsrGenderClassificationOutputs(Outputs): data: OutputParam = None def __init__(self, node=None): @@ -2452,36 +2510,66 @@ def __init__(self, node=None): self.data = self.create_param(code="data", data_type=DataType.LABEL) -class AudioIntentDetection(AssetNode[AudioIntentDetectionInputs, AudioIntentDetectionOutputs]): +class AsrGenderClassification(AssetNode[AsrGenderClassificationInputs, AsrGenderClassificationOutputs]): """ - Audio Intent Detection is a process that involves analyzing audio signals to - identify and interpret the underlying intentions or purposes behind spoken - words, enabling systems to understand and respond appropriately to human - speech. + The ASR Gender Classification function analyzes audio recordings to determine +and classify the speaker's gender based on their voice characteristics. + + InputType: audio + OutputType: label + """ + function: str = "asr-gender-classification" + input_type: str = DataType.AUDIO + output_type: str = DataType.LABEL + + inputs_class: Type[TI] = AsrGenderClassificationInputs + outputs_class: Type[TO] = AsrGenderClassificationOutputs + + +class BaseModelInputs(Inputs): + language: InputParam = None + text: InputParam = None + + def __init__(self, node=None): + super().__init__(node=node) + self.language = self.create_param(code="language", data_type=DataType.LABEL, is_required=True) + self.text = self.create_param(code="text", data_type=DataType.TEXT, is_required=True) + + +class BaseModelOutputs(Outputs): + data: OutputParam = None + + def __init__(self, node=None): + super().__init__(node=node) + self.data = self.create_param(code="data", data_type=DataType.TEXT) - InputType: audio - OutputType: label + +class BaseModel(AssetNode[BaseModelInputs, BaseModelOutputs]): """ + The Base-Model function serves as a foundational framework designed to provide +essential features and capabilities upon which more specialized or advanced +models can be built and customized. - function: str = "audio-intent-detection" - input_type: str = DataType.AUDIO - output_type: str = DataType.LABEL + InputType: text + OutputType: text + """ + function: str = "base-model" + input_type: str = DataType.TEXT + output_type: str = DataType.TEXT - inputs_class: Type[TI] = AudioIntentDetectionInputs - outputs_class: Type[TO] = AudioIntentDetectionOutputs + inputs_class: Type[TI] = BaseModelInputs + outputs_class: Type[TO] = BaseModelOutputs -class VideoLabelDetectionInputs(Inputs): - video: InputParam = None - min_confidence: InputParam = None +class LanguageIdentificationAudioInputs(Inputs): + audio: InputParam = None def __init__(self, node=None): super().__init__(node=node) - self.video = self.create_param(code="video", data_type=DataType.VIDEO, is_required=True) - self.min_confidence = self.create_param(code="min_confidence", data_type=DataType.TEXT, is_required=False) + self.audio = self.create_param(code="audio", data_type=DataType.AUDIO, is_required=True) -class VideoLabelDetectionOutputs(Outputs): +class LanguageIdentificationAudioOutputs(Outputs): data: OutputParam = None def __init__(self, node=None): @@ -2489,36 +2577,31 @@ def __init__(self, node=None): self.data = self.create_param(code="data", data_type=DataType.LABEL) -class VideoLabelDetection(AssetNode[VideoLabelDetectionInputs, VideoLabelDetectionOutputs]): +class LanguageIdentificationAudio(AssetNode[LanguageIdentificationAudioInputs, LanguageIdentificationAudioOutputs]): """ - Video Label Detection is a function that automatically identifies and tags - various objects, scenes, activities, and other relevant elements within a - video, providing descriptive labels that enhance searchability and content - organization. + The Language Identification Audio function analyzes audio input to determine +and identify the language being spoken. - InputType: video - OutputType: label + InputType: audio + OutputType: label """ - - function: str = "video-label-detection" - input_type: str = DataType.VIDEO + function: str = "language-identification-audio" + input_type: str = DataType.AUDIO output_type: str = DataType.LABEL - inputs_class: Type[TI] = VideoLabelDetectionInputs - outputs_class: Type[TO] = VideoLabelDetectionOutputs + inputs_class: Type[TI] = LanguageIdentificationAudioInputs + outputs_class: Type[TO] = LanguageIdentificationAudioOutputs -class AsrQualityEstimationInputs(Inputs): - text: InputParam = None - script: InputParam = None +class MultiClassImageClassificationInputs(Inputs): + image: InputParam = None def __init__(self, node=None): super().__init__(node=node) - self.text = self.create_param(code="text", data_type=DataType.TEXT, is_required=True) - self.script = self.create_param(code="script", data_type=DataType.LABEL, is_required=False) + self.image = self.create_param(code="image", data_type=DataType.IMAGE, is_required=False) -class AsrQualityEstimationOutputs(Outputs): +class MultiClassImageClassificationOutputs(Outputs): data: OutputParam = None def __init__(self, node=None): @@ -2526,103 +2609,91 @@ def __init__(self, node=None): self.data = self.create_param(code="data", data_type=DataType.LABEL) -class AsrQualityEstimation(AssetNode[AsrQualityEstimationInputs, AsrQualityEstimationOutputs]): +class MultiClassImageClassification(AssetNode[MultiClassImageClassificationInputs, MultiClassImageClassificationOutputs]): """ - ASR Quality Estimation is a process that evaluates the accuracy and reliability - of automatic speech recognition systems by analyzing their performance in - transcribing spoken language into text. + Multi Class Image Classification is a machine learning task where an algorithm +is trained to categorize images into one of several predefined classes or +categories based on their visual content. - InputType: text - OutputType: label + InputType: image + OutputType: label """ - - function: str = "asr-quality-estimation" - input_type: str = DataType.TEXT + function: str = "multi-class-image-classification" + input_type: str = DataType.IMAGE output_type: str = DataType.LABEL - inputs_class: Type[TI] = AsrQualityEstimationInputs - outputs_class: Type[TO] = AsrQualityEstimationOutputs + inputs_class: Type[TI] = MultiClassImageClassificationInputs + outputs_class: Type[TO] = MultiClassImageClassificationOutputs -class AudioTranscriptAnalysisInputs(Inputs): - language: InputParam = None - dialect: InputParam = None - source_supplier: InputParam = None - source_audio: InputParam = None - script: InputParam = None +class SemanticSegmentationInputs(Inputs): + image: InputParam = None def __init__(self, node=None): super().__init__(node=node) - self.language = self.create_param(code="language", data_type=DataType.LABEL, is_required=True) - self.dialect = self.create_param(code="dialect", data_type=DataType.LABEL, is_required=False) - self.source_supplier = self.create_param(code="source_supplier", data_type=DataType.LABEL, is_required=False) - self.source_audio = self.create_param(code="source_audio", data_type=DataType.AUDIO, is_required=True) - self.script = self.create_param(code="script", data_type=DataType.LABEL, is_required=False) + self.image = self.create_param(code="image", data_type=DataType.IMAGE, is_required=False) -class AudioTranscriptAnalysisOutputs(Outputs): +class SemanticSegmentationOutputs(Outputs): data: OutputParam = None def __init__(self, node=None): super().__init__(node=node) - self.data = self.create_param(code="data", data_type=DataType.TEXT) + self.data = self.create_param(code="data", data_type=DataType.LABEL) -class AudioTranscriptAnalysis(AssetNode[AudioTranscriptAnalysisInputs, AudioTranscriptAnalysisOutputs]): +class SemanticSegmentation(AssetNode[SemanticSegmentationInputs, SemanticSegmentationOutputs]): """ - Audio Transcript Analysis is a process that involves converting spoken language - from audio recordings into written text, followed by examining and interpreting - the transcribed content to extract meaningful insights, identify patterns, and - derive actionable information. + Semantic segmentation is a computer vision process that involves classifying +each pixel in an image into a predefined category, effectively partitioning the +image into meaningful segments based on the objects or regions they represent. - InputType: audio - OutputType: text + InputType: image + OutputType: label """ + function: str = "semantic-segmentation" + input_type: str = DataType.IMAGE + output_type: str = DataType.LABEL - function: str = "audio-transcript-analysis" - input_type: str = DataType.AUDIO - output_type: str = DataType.TEXT - - inputs_class: Type[TI] = AudioTranscriptAnalysisInputs - outputs_class: Type[TO] = AudioTranscriptAnalysisOutputs + inputs_class: Type[TI] = SemanticSegmentationInputs + outputs_class: Type[TO] = SemanticSegmentationOutputs -class SearchInputs(Inputs): - text: InputParam = None +class InstanceSegmentationInputs(Inputs): + image: InputParam = None def __init__(self, node=None): super().__init__(node=node) - self.text = self.create_param(code="text", data_type=DataType.TEXT, is_required=True) + self.image = self.create_param(code="image", data_type=DataType.IMAGE, is_required=False) -class SearchOutputs(Outputs): +class InstanceSegmentationOutputs(Outputs): data: OutputParam = None def __init__(self, node=None): super().__init__(node=node) - self.data = self.create_param(code="data", data_type=DataType.TEXT) + self.data = self.create_param(code="data", data_type=DataType.LABEL) -class Search(AssetNode[SearchInputs, SearchOutputs]): +class InstanceSegmentation(AssetNode[InstanceSegmentationInputs, InstanceSegmentationOutputs]): """ - The "Search" function allows users to input keywords or phrases to quickly - locate specific information, files, or content within a database, website, or - application. + Instance segmentation is a computer vision task that involves detecting and +delineating each distinct object within an image, assigning a unique label and +precise boundary to every individual instance of objects, even if they belong +to the same category. - InputType: text - OutputType: text + InputType: image + OutputType: label """ + function: str = "instance-segmentation" + input_type: str = DataType.IMAGE + output_type: str = DataType.LABEL - function: str = "search" - input_type: str = DataType.TEXT - output_type: str = DataType.TEXT - - inputs_class: Type[TI] = SearchInputs - outputs_class: Type[TO] = SearchOutputs + inputs_class: Type[TI] = InstanceSegmentationInputs + outputs_class: Type[TO] = InstanceSegmentationOutputs -class VideoForcedAlignmentInputs(Inputs): - video: InputParam = None +class EmotionDetectionInputs(Inputs): text: InputParam = None language: InputParam = None dialect: InputParam = None @@ -2630,42 +2701,37 @@ class VideoForcedAlignmentInputs(Inputs): def __init__(self, node=None): super().__init__(node=node) - self.video = self.create_param(code="video", data_type=DataType.VIDEO, is_required=True) self.text = self.create_param(code="text", data_type=DataType.TEXT, is_required=True) self.language = self.create_param(code="language", data_type=DataType.LABEL, is_required=True) self.dialect = self.create_param(code="dialect", data_type=DataType.LABEL, is_required=False) self.script = self.create_param(code="script", data_type=DataType.LABEL, is_required=False) -class VideoForcedAlignmentOutputs(Outputs): - text: OutputParam = None - video: OutputParam = None +class EmotionDetectionOutputs(Outputs): + data: OutputParam = None def __init__(self, node=None): super().__init__(node=node) - self.text = self.create_param(code="text", data_type=DataType.TEXT) - self.video = self.create_param(code="video", data_type=DataType.VIDEO) + self.data = self.create_param(code="data", data_type=DataType.LABEL) -class VideoForcedAlignment(AssetNode[VideoForcedAlignmentInputs, VideoForcedAlignmentOutputs]): +class EmotionDetection(AssetNode[EmotionDetectionInputs, EmotionDetectionOutputs]): """ - Video Forced Alignment is a process that synchronizes video footage with - corresponding audio tracks by precisely aligning the visual and auditory - elements, ensuring that the movements of speakers' lips match the spoken words. + Identifies human emotions from text or audio, enhancing user experience in +chatbots or customer feedback analysis. - InputType: video - OutputType: video + InputType: text + OutputType: label """ + function: str = "emotion-detection" + input_type: str = DataType.TEXT + output_type: str = DataType.LABEL - function: str = "video-forced-alignment" - input_type: str = DataType.VIDEO - output_type: str = DataType.VIDEO - - inputs_class: Type[TI] = VideoForcedAlignmentInputs - outputs_class: Type[TO] = VideoForcedAlignmentOutputs + inputs_class: Type[TI] = EmotionDetectionInputs + outputs_class: Type[TO] = EmotionDetectionOutputs -class VisemeGenerationInputs(Inputs): +class TextSpamDetectionInputs(Inputs): text: InputParam = None language: InputParam = None dialect: InputParam = None @@ -2679,7 +2745,7 @@ def __init__(self, node=None): self.script = self.create_param(code="script", data_type=DataType.LABEL, is_required=False) -class VisemeGenerationOutputs(Outputs): +class TextSpamDetectionOutputs(Outputs): data: OutputParam = None def __init__(self, node=None): @@ -2687,39 +2753,40 @@ def __init__(self, node=None): self.data = self.create_param(code="data", data_type=DataType.LABEL) -class VisemeGeneration(AssetNode[VisemeGenerationInputs, VisemeGenerationOutputs]): +class TextSpamDetection(AssetNode[TextSpamDetectionInputs, TextSpamDetectionOutputs]): """ - Viseme Generation is the process of creating visual representations of - phonemes, which are the distinct units of sound in speech, to synchronize lip - movements with spoken words in animations or virtual avatars. + Identifies and filters out unwanted or irrelevant text content, ideal for +moderating user-generated content or ensuring quality in communication +platforms. - InputType: text - OutputType: label + InputType: text + OutputType: label """ - - function: str = "viseme-generation" + function: str = "text-spam-detection" input_type: str = DataType.TEXT output_type: str = DataType.LABEL - inputs_class: Type[TI] = VisemeGenerationInputs - outputs_class: Type[TO] = VisemeGenerationOutputs + inputs_class: Type[TI] = TextSpamDetectionInputs + outputs_class: Type[TO] = TextSpamDetectionOutputs -class TopicClassificationInputs(Inputs): +class TextDenormalizationInputs(Inputs): text: InputParam = None language: InputParam = None - script: InputParam = None - dialect: InputParam = None + lowercase_latin: InputParam = None + remove_accents: InputParam = None + remove_punctuation: InputParam = None def __init__(self, node=None): super().__init__(node=node) self.text = self.create_param(code="text", data_type=DataType.TEXT, is_required=True) self.language = self.create_param(code="language", data_type=DataType.LABEL, is_required=True) - self.script = self.create_param(code="script", data_type=DataType.LABEL, is_required=False) - self.dialect = self.create_param(code="dialect", data_type=DataType.LABEL, is_required=False) + self.lowercase_latin = self.create_param(code="lowercase_latin", data_type=DataType.TEXT, is_required=False) + self.remove_accents = self.create_param(code="remove_accents", data_type=DataType.TEXT, is_required=False) + self.remove_punctuation = self.create_param(code="remove_punctuation", data_type=DataType.TEXT, is_required=False) -class TopicClassificationOutputs(Outputs): +class TextDenormalizationOutputs(Outputs): data: OutputParam = None def __init__(self, node=None): @@ -2727,211 +2794,192 @@ def __init__(self, node=None): self.data = self.create_param(code="data", data_type=DataType.LABEL) -class TopicClassification(AssetNode[TopicClassificationInputs, TopicClassificationOutputs]): +class TextDenormalization(AssetNode[TextDenormalizationInputs, TextDenormalizationOutputs]): """ - Topic Classification is a natural language processing function that categorizes - text into predefined topics or subjects based on its content, enabling - efficient organization and retrieval of information. + Converts standardized or normalized text into its original, often more +readable, form. Useful in natural language generation tasks. - InputType: text - OutputType: label + InputType: text + OutputType: label """ - - function: str = "topic-classification" + function: str = "text-denormalization" input_type: str = DataType.TEXT output_type: str = DataType.LABEL - inputs_class: Type[TI] = TopicClassificationInputs - outputs_class: Type[TO] = TopicClassificationOutputs + inputs_class: Type[TI] = TextDenormalizationInputs + outputs_class: Type[TO] = TextDenormalizationOutputs -class OffensiveLanguageIdentificationInputs(Inputs): - text: InputParam = None - language: InputParam = None - dialect: InputParam = None - script: InputParam = None +class ReferencelessAudioGenerationMetricInputs(Inputs): + hypotheses: InputParam = None + sources: InputParam = None + score_identifier: InputParam = None def __init__(self, node=None): super().__init__(node=node) - self.text = self.create_param(code="text", data_type=DataType.TEXT, is_required=True) - self.language = self.create_param(code="language", data_type=DataType.LABEL, is_required=True) - self.dialect = self.create_param(code="dialect", data_type=DataType.LABEL, is_required=False) - self.script = self.create_param(code="script", data_type=DataType.LABEL, is_required=False) + self.hypotheses = self.create_param(code="hypotheses", data_type=DataType.AUDIO, is_required=True) + self.sources = self.create_param(code="sources", data_type=DataType.AUDIO, is_required=False) + self.score_identifier = self.create_param(code="score_identifier", data_type=DataType.TEXT, is_required=True) -class OffensiveLanguageIdentificationOutputs(Outputs): +class ReferencelessAudioGenerationMetricOutputs(Outputs): data: OutputParam = None def __init__(self, node=None): super().__init__(node=node) - self.data = self.create_param(code="data", data_type=DataType.LABEL) + self.data = self.create_param(code="data", data_type=DataType.TEXT) -class OffensiveLanguageIdentification(AssetNode[OffensiveLanguageIdentificationInputs, OffensiveLanguageIdentificationOutputs]): +class ReferencelessAudioGenerationMetric(BaseMetric[ReferencelessAudioGenerationMetricInputs, ReferencelessAudioGenerationMetricOutputs]): """ - Offensive Language Identification is a function that analyzes text to detect - and flag language that is abusive, harmful, or inappropriate, helping to - maintain a respectful and safe communication environment. + The Referenceless Audio Generation Metric is a tool designed to evaluate the +quality of generated audio content without the need for a reference or original +audio sample for comparison. - InputType: text - OutputType: label + InputType: text + OutputType: text """ - - function: str = "offensive-language-identification" + function: str = "referenceless-audio-generation-metric" input_type: str = DataType.TEXT - output_type: str = DataType.LABEL + output_type: str = DataType.TEXT - inputs_class: Type[TI] = OffensiveLanguageIdentificationInputs - outputs_class: Type[TO] = OffensiveLanguageIdentificationOutputs + inputs_class: Type[TI] = ReferencelessAudioGenerationMetricInputs + outputs_class: Type[TO] = ReferencelessAudioGenerationMetricOutputs -class SpeechTranslationInputs(Inputs): - source_audio: InputParam = None - sourcelanguage: InputParam = None - targetlanguage: InputParam = None +class AudioForcedAlignmentInputs(Inputs): + audio: InputParam = None + text: InputParam = None + language: InputParam = None dialect: InputParam = None - voice: InputParam = None script: InputParam = None def __init__(self, node=None): super().__init__(node=node) - self.source_audio = self.create_param(code="source_audio", data_type=DataType.AUDIO, is_required=True) - self.sourcelanguage = self.create_param(code="sourcelanguage", data_type=DataType.LABEL, is_required=True) - self.targetlanguage = self.create_param(code="targetlanguage", data_type=DataType.LABEL, is_required=True) + self.audio = self.create_param(code="audio", data_type=DataType.AUDIO, is_required=True) + self.text = self.create_param(code="text", data_type=DataType.TEXT, is_required=True) + self.language = self.create_param(code="language", data_type=DataType.LABEL, is_required=True) self.dialect = self.create_param(code="dialect", data_type=DataType.LABEL, is_required=False) - self.voice = self.create_param(code="voice", data_type=DataType.LABEL, is_required=False) self.script = self.create_param(code="script", data_type=DataType.LABEL, is_required=False) -class SpeechTranslationOutputs(Outputs): - data: OutputParam = None +class AudioForcedAlignmentOutputs(Outputs): + text: OutputParam = None + audio: OutputParam = None def __init__(self, node=None): super().__init__(node=node) - self.data = self.create_param(code="data", data_type=DataType.TEXT) + self.text = self.create_param(code="text", data_type=DataType.TEXT) + self.audio = self.create_param(code="audio", data_type=DataType.AUDIO) -class SpeechTranslation(AssetNode[SpeechTranslationInputs, SpeechTranslationOutputs]): +class AudioForcedAlignment(AssetNode[AudioForcedAlignmentInputs, AudioForcedAlignmentOutputs]): """ - Speech Translation is a technology that converts spoken language in real-time - from one language to another, enabling seamless communication between speakers - of different languages. + Synchronizes phonetic and phonological text with the corresponding segments in +an audio file. Useful in linguistic research and detailed transcription tasks. - InputType: audio - OutputType: text + InputType: audio + OutputType: audio """ - - function: str = "speech-translation" + function: str = "audio-forced-alignment" input_type: str = DataType.AUDIO - output_type: str = DataType.TEXT + output_type: str = DataType.AUDIO - inputs_class: Type[TI] = SpeechTranslationInputs - outputs_class: Type[TO] = SpeechTranslationOutputs + inputs_class: Type[TI] = AudioForcedAlignmentInputs + outputs_class: Type[TO] = AudioForcedAlignmentOutputs -class SpeakerDiarizationAudioInputs(Inputs): - audio: InputParam = None +class VideoForcedAlignmentInputs(Inputs): + video: InputParam = None + text: InputParam = None language: InputParam = None - script: InputParam = None dialect: InputParam = None + script: InputParam = None def __init__(self, node=None): super().__init__(node=node) - self.audio = self.create_param(code="audio", data_type=DataType.AUDIO, is_required=True) - self.language = self.create_param(code="language", data_type=DataType.LABEL, is_required=False) - self.script = self.create_param(code="script", data_type=DataType.LABEL, is_required=False) + self.video = self.create_param(code="video", data_type=DataType.VIDEO, is_required=True) + self.text = self.create_param(code="text", data_type=DataType.TEXT, is_required=True) + self.language = self.create_param(code="language", data_type=DataType.LABEL, is_required=True) self.dialect = self.create_param(code="dialect", data_type=DataType.LABEL, is_required=False) + self.script = self.create_param(code="script", data_type=DataType.LABEL, is_required=False) -class SpeakerDiarizationAudioOutputs(Outputs): - data: OutputParam = None - audio: OutputParam = None - +class VideoForcedAlignmentOutputs(Outputs): + text: OutputParam = None + video: OutputParam = None + def __init__(self, node=None): super().__init__(node=node) - self.data = self.create_param(code="data", data_type=DataType.LABEL) - self.audio = self.create_param(code="audio", data_type=DataType.AUDIO) + self.text = self.create_param(code="text", data_type=DataType.TEXT) + self.video = self.create_param(code="video", data_type=DataType.VIDEO) -class SpeakerDiarizationAudio(BaseSegmentor[SpeakerDiarizationAudioInputs, SpeakerDiarizationAudioOutputs]): +class VideoForcedAlignment(AssetNode[VideoForcedAlignmentInputs, VideoForcedAlignmentOutputs]): """ - Speaker Diarization Audio is a process that involves segmenting an audio - recording into distinct sections, each corresponding to a different speaker, in - order to identify and differentiate between multiple speakers within the same - audio stream. + Aligns the transcription of spoken content in a video with its corresponding +timecodes, facilitating subtitle creation. - InputType: audio - OutputType: label + InputType: video + OutputType: video """ + function: str = "video-forced-alignment" + input_type: str = DataType.VIDEO + output_type: str = DataType.VIDEO - function: str = "speaker-diarization-audio" - input_type: str = DataType.AUDIO - output_type: str = DataType.LABEL - - inputs_class: Type[TI] = SpeakerDiarizationAudioInputs - outputs_class: Type[TO] = SpeakerDiarizationAudioOutputs + inputs_class: Type[TI] = VideoForcedAlignmentInputs + outputs_class: Type[TO] = VideoForcedAlignmentOutputs -class AudioTranscriptImprovementInputs(Inputs): - language: InputParam = None - dialect: InputParam = None - source_supplier: InputParam = None - is_medical: InputParam = None - source_audio: InputParam = None - script: InputParam = None +class ClassificationMetricInputs(Inputs): + hypotheses: InputParam = None + references: InputParam = None + lowerIsBetter: InputParam = None + sources: InputParam = None + score_identifier: InputParam = None def __init__(self, node=None): super().__init__(node=node) - self.language = self.create_param(code="language", data_type=DataType.LABEL, is_required=True) - self.dialect = self.create_param(code="dialect", data_type=DataType.LABEL, is_required=False) - self.source_supplier = self.create_param(code="source_supplier", data_type=DataType.LABEL, is_required=False) - self.is_medical = self.create_param(code="is_medical", data_type=DataType.TEXT, is_required=True) - self.source_audio = self.create_param(code="source_audio", data_type=DataType.AUDIO, is_required=True) - self.script = self.create_param(code="script", data_type=DataType.LABEL, is_required=False) + self.hypotheses = self.create_param(code="hypotheses", data_type=DataType.LABEL, is_required=True) + self.references = self.create_param(code="references", data_type=DataType.LABEL, is_required=True) + self.lowerIsBetter = self.create_param(code="lowerIsBetter", data_type=DataType.TEXT, is_required=False) + self.sources = self.create_param(code="sources", data_type=DataType.TEXT, is_required=False) + self.score_identifier = self.create_param(code="score_identifier", data_type=DataType.TEXT, is_required=True) -class AudioTranscriptImprovementOutputs(Outputs): +class ClassificationMetricOutputs(Outputs): data: OutputParam = None def __init__(self, node=None): super().__init__(node=node) - self.data = self.create_param(code="data", data_type=DataType.TEXT) + self.data = self.create_param(code="data", data_type=DataType.NUMBER) -class AudioTranscriptImprovement(AssetNode[AudioTranscriptImprovementInputs, AudioTranscriptImprovementOutputs]): +class ClassificationMetric(BaseMetric[ClassificationMetricInputs, ClassificationMetricOutputs]): """ - Audio Transcript Improvement is a function that enhances the accuracy and - clarity of transcribed audio recordings by correcting errors, refining - language, and ensuring the text faithfully represents the original spoken - content. + A Classification Metric is a quantitative measure used to evaluate the quality +and effectiveness of classification models. - InputType: audio - OutputType: text + InputType: text + OutputType: text """ - - function: str = "audio-transcript-improvement" - input_type: str = DataType.AUDIO + function: str = "classification-metric" + input_type: str = DataType.TEXT output_type: str = DataType.TEXT - inputs_class: Type[TI] = AudioTranscriptImprovementInputs - outputs_class: Type[TO] = AudioTranscriptImprovementOutputs + inputs_class: Type[TI] = ClassificationMetricInputs + outputs_class: Type[TO] = ClassificationMetricOutputs -class SpeechNonSpeechClassificationInputs(Inputs): - audio: InputParam = None - language: InputParam = None - script: InputParam = None - dialect: InputParam = None +class AutoMaskGenerationInputs(Inputs): + image: InputParam = None def __init__(self, node=None): super().__init__(node=node) - self.audio = self.create_param(code="audio", data_type=DataType.AUDIO, is_required=True) - self.language = self.create_param(code="language", data_type=DataType.LABEL, is_required=True) - self.script = self.create_param(code="script", data_type=DataType.LABEL, is_required=False) - self.dialect = self.create_param(code="dialect", data_type=DataType.LABEL, is_required=False) + self.image = self.create_param(code="image", data_type=DataType.IMAGE, is_required=True) -class SpeechNonSpeechClassificationOutputs(Outputs): +class AutoMaskGenerationOutputs(Outputs): data: OutputParam = None def __init__(self, node=None): @@ -2939,42 +2987,40 @@ def __init__(self, node=None): self.data = self.create_param(code="data", data_type=DataType.LABEL) -class SpeechNonSpeechClassification(AssetNode[SpeechNonSpeechClassificationInputs, SpeechNonSpeechClassificationOutputs]): +class AutoMaskGeneration(AssetNode[AutoMaskGenerationInputs, AutoMaskGenerationOutputs]): """ - The function "Speech or Non-Speech Classification" is designed to analyze audio - input and determine whether the sound is human speech or non-speech noise, - enabling applications such as voice recognition systems to filter out - irrelevant background sounds. + Auto-mask generation refers to the automated process of creating masks in image +processing or computer vision, typically for segmentation tasks. A mask is a +binary or multi-class image that labels different parts of an image, usually +separating the foreground (objects of interest) from the background, or +identifying specific object classes in an image. - InputType: audio - OutputType: label + InputType: image + OutputType: label """ - - function: str = "speech-non-speech-classification" - input_type: str = DataType.AUDIO + function: str = "auto-mask-generation" + input_type: str = DataType.IMAGE output_type: str = DataType.LABEL - inputs_class: Type[TI] = SpeechNonSpeechClassificationInputs - outputs_class: Type[TO] = SpeechNonSpeechClassificationOutputs + inputs_class: Type[TI] = AutoMaskGenerationInputs + outputs_class: Type[TO] = AutoMaskGenerationOutputs -class TextDenormalizationInputs(Inputs): +class TextEmbeddingInputs(Inputs): text: InputParam = None language: InputParam = None - lowercase_latin: InputParam = None - remove_accents: InputParam = None - remove_punctuation: InputParam = None + dialect: InputParam = None + script: InputParam = None def __init__(self, node=None): super().__init__(node=node) self.text = self.create_param(code="text", data_type=DataType.TEXT, is_required=True) self.language = self.create_param(code="language", data_type=DataType.LABEL, is_required=True) - self.lowercase_latin = self.create_param(code="lowercase_latin", data_type=DataType.TEXT, is_required=False) - self.remove_accents = self.create_param(code="remove_accents", data_type=DataType.TEXT, is_required=False) - self.remove_punctuation = self.create_param(code="remove_punctuation", data_type=DataType.TEXT, is_required=False) + self.dialect = self.create_param(code="dialect", data_type=DataType.LABEL, is_required=False) + self.script = self.create_param(code="script", data_type=DataType.LABEL, is_required=False) -class TextDenormalizationOutputs(Outputs): +class TextEmbeddingOutputs(Outputs): data: OutputParam = None def __init__(self, node=None): @@ -2982,35 +3028,35 @@ def __init__(self, node=None): self.data = self.create_param(code="data", data_type=DataType.LABEL) -class TextDenormalization(AssetNode[TextDenormalizationInputs, TextDenormalizationOutputs]): +class TextEmbedding(AssetNode[TextEmbeddingInputs, TextEmbeddingOutputs]): """ - Text Denormalization is the process of converting abbreviated, contracted, or - otherwise simplified text into its full, standard form, often to improve - readability and ensure consistency in natural language processing tasks. + Text embedding is a process that converts text into numerical vectors, +capturing the semantic meaning and contextual relationships of words or +phrases, enabling machines to understand and analyze natural language more +effectively. - InputType: text - OutputType: label + InputType: text + OutputType: text """ - - function: str = "text-denormalization" + function: str = "text-embedding" input_type: str = DataType.TEXT - output_type: str = DataType.LABEL + output_type: str = DataType.TEXT - inputs_class: Type[TI] = TextDenormalizationInputs - outputs_class: Type[TO] = TextDenormalizationOutputs + inputs_class: Type[TI] = TextEmbeddingInputs + outputs_class: Type[TO] = TextEmbeddingOutputs -class ImageContentModerationInputs(Inputs): - image: InputParam = None - min_confidence: InputParam = None +class FactCheckingInputs(Inputs): + language: InputParam = None + text: InputParam = None def __init__(self, node=None): super().__init__(node=node) - self.image = self.create_param(code="image", data_type=DataType.IMAGE, is_required=True) - self.min_confidence = self.create_param(code="min_confidence", data_type=DataType.TEXT, is_required=False) + self.language = self.create_param(code="language", data_type=DataType.LABEL, is_required=True) + self.text = self.create_param(code="text", data_type=DataType.TEXT, is_required=False) -class ImageContentModerationOutputs(Outputs): +class FactCheckingOutputs(Outputs): data: OutputParam = None def __init__(self, node=None): @@ -3018,70 +3064,62 @@ def __init__(self, node=None): self.data = self.create_param(code="data", data_type=DataType.LABEL) -class ImageContentModeration(AssetNode[ImageContentModerationInputs, ImageContentModerationOutputs]): +class FactChecking(AssetNode[FactCheckingInputs, FactCheckingOutputs]): """ - Image Content Moderation is a process that involves analyzing and filtering - images to detect and manage inappropriate, harmful, or sensitive content, - ensuring compliance with community guidelines and legal standards. + Fact Checking is the process of verifying the accuracy and truthfulness of +information, statements, or claims by cross-referencing with reliable sources +and evidence. - InputType: image - OutputType: label + InputType: text + OutputType: label """ - - function: str = "image-content-moderation" - input_type: str = DataType.IMAGE + function: str = "fact-checking" + input_type: str = DataType.TEXT output_type: str = DataType.LABEL - inputs_class: Type[TI] = ImageContentModerationInputs - outputs_class: Type[TO] = ImageContentModerationOutputs + inputs_class: Type[TI] = FactCheckingInputs + outputs_class: Type[TO] = FactCheckingOutputs -class ReferencelessTextGenerationMetricDefaultInputs(Inputs): - hypotheses: InputParam = None - sources: InputParam = None - score_identifier: InputParam = None +class TextToAudioInputs(Inputs): + text: InputParam = None + language: InputParam = None def __init__(self, node=None): super().__init__(node=node) - self.hypotheses = self.create_param(code="hypotheses", data_type=DataType.TEXT, is_required=True) - self.sources = self.create_param(code="sources", data_type=DataType.TEXT, is_required=False) - self.score_identifier = self.create_param(code="score_identifier", data_type=DataType.TEXT, is_required=True) + self.text = self.create_param(code="text", data_type=DataType.TEXT, is_required=True) + self.language = self.create_param(code="language", data_type=DataType.LABEL, is_required=False) -class ReferencelessTextGenerationMetricDefaultOutputs(Outputs): +class TextToAudioOutputs(Outputs): data: OutputParam = None def __init__(self, node=None): super().__init__(node=node) - self.data = self.create_param(code="data", data_type=DataType.TEXT) + self.data = self.create_param(code="data", data_type=DataType.AUDIO) -class ReferencelessTextGenerationMetricDefault( - BaseMetric[ReferencelessTextGenerationMetricDefaultInputs, ReferencelessTextGenerationMetricDefaultOutputs] -): +class TextToAudio(AssetNode[TextToAudioInputs, TextToAudioOutputs]): """ - The Referenceless Text Generation Metric Default is a function designed to - evaluate the quality of generated text without relying on reference texts for - comparison. + The Text to Audio function converts written text into spoken words, allowing +users to listen to the content instead of reading it. - InputType: text - OutputType: text + InputType: text + OutputType: audio """ - - function: str = "referenceless-text-generation-metric-default" + function: str = "text-to-audio" input_type: str = DataType.TEXT - output_type: str = DataType.TEXT + output_type: str = DataType.AUDIO - inputs_class: Type[TI] = ReferencelessTextGenerationMetricDefaultInputs - outputs_class: Type[TO] = ReferencelessTextGenerationMetricDefaultOutputs + inputs_class: Type[TI] = TextToAudioInputs + outputs_class: Type[TO] = TextToAudioOutputs -class NamedEntityRecognitionInputs(Inputs): +class FillTextMaskInputs(Inputs): text: InputParam = None language: InputParam = None dialect: InputParam = None script: InputParam = None - domain: InputParam = None def __init__(self, node=None): super().__init__(node=node) @@ -3089,192 +3127,203 @@ def __init__(self, node=None): self.language = self.create_param(code="language", data_type=DataType.LABEL, is_required=True) self.dialect = self.create_param(code="dialect", data_type=DataType.LABEL, is_required=False) self.script = self.create_param(code="script", data_type=DataType.LABEL, is_required=False) - self.domain = self.create_param(code="domain", data_type=DataType.LABEL, is_required=False) -class NamedEntityRecognitionOutputs(Outputs): +class FillTextMaskOutputs(Outputs): data: OutputParam = None def __init__(self, node=None): super().__init__(node=node) - self.data = self.create_param(code="data", data_type=DataType.LABEL) + self.data = self.create_param(code="data", data_type=DataType.TEXT) -class NamedEntityRecognition(AssetNode[NamedEntityRecognitionInputs, NamedEntityRecognitionOutputs]): +class FillTextMask(AssetNode[FillTextMaskInputs, FillTextMaskOutputs]): """ - Named Entity Recognition (NER) is a natural language processing task that - involves identifying and classifying proper nouns in text into predefined - categories such as names of people, organizations, locations, dates, and other - entities. + Completes missing parts of a text based on the context, ideal for content +generation or data augmentation tasks. - InputType: text - OutputType: label + InputType: text + OutputType: text """ - - function: str = "named-entity-recognition" + function: str = "fill-text-mask" input_type: str = DataType.TEXT - output_type: str = DataType.LABEL + output_type: str = DataType.TEXT - inputs_class: Type[TI] = NamedEntityRecognitionInputs - outputs_class: Type[TO] = NamedEntityRecognitionOutputs + inputs_class: Type[TI] = FillTextMaskInputs + outputs_class: Type[TO] = FillTextMaskOutputs -class TextContentModerationInputs(Inputs): +class VoiceCloningInputs(Inputs): text: InputParam = None + audio: InputParam = None language: InputParam = None dialect: InputParam = None + voice: InputParam = None script: InputParam = None + type: InputParam = None def __init__(self, node=None): super().__init__(node=node) self.text = self.create_param(code="text", data_type=DataType.TEXT, is_required=True) + self.audio = self.create_param(code="audio", data_type=DataType.AUDIO, is_required=True) self.language = self.create_param(code="language", data_type=DataType.LABEL, is_required=True) self.dialect = self.create_param(code="dialect", data_type=DataType.LABEL, is_required=False) + self.voice = self.create_param(code="voice", data_type=DataType.LABEL, is_required=False) self.script = self.create_param(code="script", data_type=DataType.LABEL, is_required=False) + self.type = self.create_param(code="type", data_type=DataType.LABEL, is_required=False) -class TextContentModerationOutputs(Outputs): +class VoiceCloningOutputs(Outputs): data: OutputParam = None def __init__(self, node=None): super().__init__(node=node) - self.data = self.create_param(code="data", data_type=DataType.LABEL) + self.data = self.create_param(code="data", data_type=DataType.AUDIO) -class TextContentModeration(AssetNode[TextContentModerationInputs, TextContentModerationOutputs]): +class VoiceCloning(AssetNode[VoiceCloningInputs, VoiceCloningOutputs]): """ - Text Content Moderation is the process of reviewing, filtering, and managing - user-generated content to ensure it adheres to community guidelines, legal - standards, and platform policies, thereby maintaining a safe and respectful - online environment. + Replicates a person's voice based on a sample, allowing for the generation of +speech in that person's tone and style. Used cautiously due to ethical +considerations. - InputType: text - OutputType: label + InputType: text + OutputType: audio """ - - function: str = "text-content-moderation" + function: str = "voice-cloning" input_type: str = DataType.TEXT - output_type: str = DataType.LABEL + output_type: str = DataType.AUDIO - inputs_class: Type[TI] = TextContentModerationInputs - outputs_class: Type[TO] = TextContentModerationOutputs + inputs_class: Type[TI] = VoiceCloningInputs + outputs_class: Type[TO] = VoiceCloningOutputs -class SpeakerDiarizationVideoInputs(Inputs): - video: InputParam = None +class DiacritizationInputs(Inputs): language: InputParam = None - script: InputParam = None dialect: InputParam = None + script: InputParam = None + text: InputParam = None def __init__(self, node=None): super().__init__(node=node) - self.video = self.create_param(code="video", data_type=DataType.VIDEO, is_required=True) - self.language = self.create_param(code="language", data_type=DataType.LABEL, is_required=False) + self.language = self.create_param(code="language", data_type=DataType.LABEL, is_required=True) + self.dialect = self.create_param(code="dialect", data_type=DataType.LABEL, is_required=True) self.script = self.create_param(code="script", data_type=DataType.LABEL, is_required=False) - self.dialect = self.create_param(code="dialect", data_type=DataType.LABEL, is_required=False) + self.text = self.create_param(code="text", data_type=DataType.TEXT, is_required=True) -class SpeakerDiarizationVideoOutputs(Outputs): +class DiacritizationOutputs(Outputs): data: OutputParam = None def __init__(self, node=None): super().__init__(node=node) - self.data = self.create_param(code="data", data_type=DataType.VIDEO) + self.data = self.create_param(code="data", data_type=DataType.TEXT) -class SpeakerDiarizationVideo(AssetNode[SpeakerDiarizationVideoInputs, SpeakerDiarizationVideoOutputs]): +class Diacritization(AssetNode[DiacritizationInputs, DiacritizationOutputs]): """ - The Speaker Diarization Video function identifies and segments different - speakers in a video, attributing portions of the audio to individual speakers - to facilitate analysis and understanding of multi-speaker conversations. + Adds diacritical marks to text, essential for languages where meaning can +change based on diacritics. - InputType: video - OutputType: label + InputType: text + OutputType: text """ + function: str = "diacritization" + input_type: str = DataType.TEXT + output_type: str = DataType.TEXT - function: str = "speaker-diarization-video" - input_type: str = DataType.VIDEO - output_type: str = DataType.LABEL - - inputs_class: Type[TI] = SpeakerDiarizationVideoInputs - outputs_class: Type[TO] = SpeakerDiarizationVideoOutputs + inputs_class: Type[TI] = DiacritizationInputs + outputs_class: Type[TO] = DiacritizationOutputs -class SplitOnSilenceInputs(Inputs): - audio: InputParam = None +class SpeechTranslationInputs(Inputs): + source_audio: InputParam = None + sourcelanguage: InputParam = None + targetlanguage: InputParam = None + dialect: InputParam = None + voice: InputParam = None + script: InputParam = None def __init__(self, node=None): super().__init__(node=node) - self.audio = self.create_param(code="audio", data_type=DataType.AUDIO, is_required=True) + self.source_audio = self.create_param(code="source_audio", data_type=DataType.AUDIO, is_required=True) + self.sourcelanguage = self.create_param(code="sourcelanguage", data_type=DataType.LABEL, is_required=True) + self.targetlanguage = self.create_param(code="targetlanguage", data_type=DataType.LABEL, is_required=True) + self.dialect = self.create_param(code="dialect", data_type=DataType.LABEL, is_required=False) + self.voice = self.create_param(code="voice", data_type=DataType.LABEL, is_required=False) + self.script = self.create_param(code="script", data_type=DataType.LABEL, is_required=False) -class SplitOnSilenceOutputs(Outputs): +class SpeechTranslationOutputs(Outputs): data: OutputParam = None def __init__(self, node=None): super().__init__(node=node) - self.data = self.create_param(code="data", data_type=DataType.AUDIO) + self.data = self.create_param(code="data", data_type=DataType.TEXT) -class SplitOnSilence(AssetNode[SplitOnSilenceInputs, SplitOnSilenceOutputs]): +class SpeechTranslation(AssetNode[SpeechTranslationInputs, SpeechTranslationOutputs]): """ - The "Split On Silence" function divides an audio recording into separate - segments based on periods of silence, allowing for easier editing and analysis - of individual sections. + Speech Translation is a technology that converts spoken language in real-time +from one language to another, enabling seamless communication between speakers +of different languages. - InputType: audio - OutputType: audio + InputType: audio + OutputType: text """ - - function: str = "split-on-silence" + function: str = "speech-translation" input_type: str = DataType.AUDIO - output_type: str = DataType.AUDIO + output_type: str = DataType.TEXT - inputs_class: Type[TI] = SplitOnSilenceInputs - outputs_class: Type[TO] = SplitOnSilenceOutputs + inputs_class: Type[TI] = SpeechTranslationInputs + outputs_class: Type[TO] = SpeechTranslationOutputs -class EmotionDetectionInputs(Inputs): - text: InputParam = None +class SpeechSynthesisInputs(Inputs): + audio: InputParam = None language: InputParam = None dialect: InputParam = None + voice: InputParam = None script: InputParam = None + text: InputParam = None + type: InputParam = None def __init__(self, node=None): super().__init__(node=node) - self.text = self.create_param(code="text", data_type=DataType.TEXT, is_required=True) + self.audio = self.create_param(code="audio", data_type=DataType.AUDIO, is_required=False) self.language = self.create_param(code="language", data_type=DataType.LABEL, is_required=True) self.dialect = self.create_param(code="dialect", data_type=DataType.LABEL, is_required=False) + self.voice = self.create_param(code="voice", data_type=DataType.LABEL, is_required=False) self.script = self.create_param(code="script", data_type=DataType.LABEL, is_required=False) + self.text = self.create_param(code="text", data_type=DataType.TEXT, is_required=True) + self.type = self.create_param(code="type", data_type=DataType.LABEL, is_required=False) -class EmotionDetectionOutputs(Outputs): +class SpeechSynthesisOutputs(Outputs): data: OutputParam = None def __init__(self, node=None): super().__init__(node=node) - self.data = self.create_param(code="data", data_type=DataType.LABEL) + self.data = self.create_param(code="data", data_type=DataType.AUDIO) -class EmotionDetection(AssetNode[EmotionDetectionInputs, EmotionDetectionOutputs]): +class SpeechSynthesis(AssetNode[SpeechSynthesisInputs, SpeechSynthesisOutputs]): """ - Emotion Detection is a process that involves analyzing text to identify and - categorize the emotional states or sentiments expressed by individuals, such as - happiness, sadness, anger, or fear. + Generates human-like speech from written text. Ideal for text-to-speech +applications, audiobooks, and voice assistants. - InputType: text - OutputType: label + InputType: text + OutputType: audio """ - - function: str = "emotion-detection" + function: str = "speech-synthesis" input_type: str = DataType.TEXT - output_type: str = DataType.LABEL + output_type: str = DataType.AUDIO - inputs_class: Type[TI] = EmotionDetectionInputs - outputs_class: Type[TO] = EmotionDetectionOutputs + inputs_class: Type[TI] = SpeechSynthesisInputs + outputs_class: Type[TO] = SpeechSynthesisOutputs -class TextSpamDetectionInputs(Inputs): +class TextContentModerationInputs(Inputs): text: InputParam = None language: InputParam = None dialect: InputParam = None @@ -3288,7 +3337,7 @@ def __init__(self, node=None): self.script = self.create_param(code="script", data_type=DataType.LABEL, is_required=False) -class TextSpamDetectionOutputs(Outputs): +class TextContentModerationOutputs(Outputs): data: OutputParam = None def __init__(self, node=None): @@ -3296,48 +3345,39 @@ def __init__(self, node=None): self.data = self.create_param(code="data", data_type=DataType.LABEL) -class TextSpamDetection(AssetNode[TextSpamDetectionInputs, TextSpamDetectionOutputs]): +class TextContentModeration(AssetNode[TextContentModerationInputs, TextContentModerationOutputs]): """ - Text Spam Detection is a process that involves analyzing and identifying - unsolicited or irrelevant messages within text communications, typically using - algorithms and machine learning techniques to filter out spam and ensure the - integrity of the communication platform. + Scans and identifies potentially harmful, offensive, or inappropriate textual +content, ensuring safer user environments. - InputType: text - OutputType: label + InputType: text + OutputType: label """ - - function: str = "text-spam-detection" + function: str = "text-content-moderation" input_type: str = DataType.TEXT output_type: str = DataType.LABEL - inputs_class: Type[TI] = TextSpamDetectionInputs - outputs_class: Type[TO] = TextSpamDetectionOutputs + inputs_class: Type[TI] = TextContentModerationInputs + outputs_class: Type[TO] = TextContentModerationOutputs -class TranslationInputs(Inputs): +class SubtitlingTranslationInputs(Inputs): text: InputParam = None sourcelanguage: InputParam = None - targetlanguage: InputParam = None - script_in: InputParam = None - script_out: InputParam = None dialect_in: InputParam = None - dialect_out: InputParam = None - context: InputParam = None + target_supplier: InputParam = None + targetlanguages: InputParam = None def __init__(self, node=None): super().__init__(node=node) self.text = self.create_param(code="text", data_type=DataType.TEXT, is_required=True) self.sourcelanguage = self.create_param(code="sourcelanguage", data_type=DataType.LABEL, is_required=True) - self.targetlanguage = self.create_param(code="targetlanguage", data_type=DataType.LABEL, is_required=True) - self.script_in = self.create_param(code="script_in", data_type=DataType.LABEL, is_required=False) - self.script_out = self.create_param(code="script_out", data_type=DataType.LABEL, is_required=False) self.dialect_in = self.create_param(code="dialect_in", data_type=DataType.LABEL, is_required=False) - self.dialect_out = self.create_param(code="dialect_out", data_type=DataType.LABEL, is_required=False) - self.context = self.create_param(code="context", data_type=DataType.LABEL, is_required=False) + self.target_supplier = self.create_param(code="target_supplier", data_type=DataType.LABEL, is_required=False) + self.targetlanguages = self.create_param(code="targetlanguages", data_type=DataType.LABEL, is_required=False) -class TranslationOutputs(Outputs): +class SubtitlingTranslationOutputs(Outputs): data: OutputParam = None def __init__(self, node=None): @@ -3345,83 +3385,79 @@ def __init__(self, node=None): self.data = self.create_param(code="data", data_type=DataType.TEXT) -class Translation(AssetNode[TranslationInputs, TranslationOutputs]): +class SubtitlingTranslation(AssetNode[SubtitlingTranslationInputs, SubtitlingTranslationOutputs]): """ - Translation is the process of converting text from one language into an - equivalent text in another language, preserving the original meaning and - context. + Converts the text of subtitles from one language to another, ensuring context +and cultural nuances are maintained. Essential for global content distribution. - InputType: text - OutputType: text + InputType: text + OutputType: text """ - - function: str = "translation" + function: str = "subtitling-translation" input_type: str = DataType.TEXT output_type: str = DataType.TEXT - inputs_class: Type[TI] = TranslationInputs - outputs_class: Type[TO] = TranslationOutputs + inputs_class: Type[TI] = SubtitlingTranslationInputs + outputs_class: Type[TO] = SubtitlingTranslationOutputs -class VoiceActivityDetectionInputs(Inputs): - audio: InputParam = None - onset: InputParam = None - offset: InputParam = None - min_duration_on: InputParam = None - min_duration_off: InputParam = None +class AudioTranscriptAnalysisInputs(Inputs): + language: InputParam = None + dialect: InputParam = None + source_supplier: InputParam = None + source_audio: InputParam = None + script: InputParam = None def __init__(self, node=None): super().__init__(node=node) - self.audio = self.create_param(code="audio", data_type=DataType.AUDIO, is_required=True) - self.onset = self.create_param(code="onset", data_type=DataType.TEXT, is_required=False) - self.offset = self.create_param(code="offset", data_type=DataType.TEXT, is_required=False) - self.min_duration_on = self.create_param(code="min_duration_on", data_type=DataType.TEXT, is_required=False) - self.min_duration_off = self.create_param(code="min_duration_off", data_type=DataType.TEXT, is_required=False) + self.language = self.create_param(code="language", data_type=DataType.LABEL, is_required=True) + self.dialect = self.create_param(code="dialect", data_type=DataType.LABEL, is_required=False) + self.source_supplier = self.create_param(code="source_supplier", data_type=DataType.LABEL, is_required=False) + self.source_audio = self.create_param(code="source_audio", data_type=DataType.AUDIO, is_required=True) + self.script = self.create_param(code="script", data_type=DataType.LABEL, is_required=False) -class VoiceActivityDetectionOutputs(Outputs): +class AudioTranscriptAnalysisOutputs(Outputs): data: OutputParam = None - audio: OutputParam = None def __init__(self, node=None): super().__init__(node=node) - self.data = self.create_param(code="data", data_type=DataType.AUDIO) - self.audio = self.create_param(code="audio", data_type=DataType.AUDIO) + self.data = self.create_param(code="data", data_type=DataType.TEXT) -class VoiceActivityDetection(BaseSegmentor[VoiceActivityDetectionInputs, VoiceActivityDetectionOutputs]): +class AudioTranscriptAnalysis(AssetNode[AudioTranscriptAnalysisInputs, AudioTranscriptAnalysisOutputs]): """ - Voice Activity Detection (VAD) is a technology that identifies the presence or - absence of human speech within an audio signal, enabling systems to distinguish - between spoken words and background noise. + Analyzes transcribed audio data for insights, patterns, or specific information +extraction. - InputType: audio - OutputType: audio + InputType: audio + OutputType: text """ - - function: str = "voice-activity-detection" + function: str = "audio-transcript-analysis" input_type: str = DataType.AUDIO - output_type: str = DataType.AUDIO + output_type: str = DataType.TEXT - inputs_class: Type[TI] = VoiceActivityDetectionInputs - outputs_class: Type[TO] = VoiceActivityDetectionOutputs + inputs_class: Type[TI] = AudioTranscriptAnalysisInputs + outputs_class: Type[TO] = AudioTranscriptAnalysisOutputs -class SpeechEmbeddingInputs(Inputs): - audio: InputParam = None +class TextGenerationInputs(Inputs): + text: InputParam = None + prompt: InputParam = None + context: InputParam = None language: InputParam = None - dialect: InputParam = None script: InputParam = None def __init__(self, node=None): super().__init__(node=node) - self.audio = self.create_param(code="audio", data_type=DataType.AUDIO, is_required=True) - self.language = self.create_param(code="language", data_type=DataType.LABEL, is_required=True) - self.dialect = self.create_param(code="dialect", data_type=DataType.LABEL, is_required=False) + self.text = self.create_param(code="text", data_type=DataType.TEXT, is_required=True) + self.prompt = self.create_param(code="prompt", data_type=DataType.TEXT, is_required=False) + self.context = self.create_param(code="context", data_type=DataType.TEXT, is_required=False) + self.language = self.create_param(code="language", data_type=DataType.LABEL, is_required=False) self.script = self.create_param(code="script", data_type=DataType.LABEL, is_required=False) -class SpeechEmbeddingOutputs(Outputs): +class TextGenerationOutputs(Outputs): data: OutputParam = None def __init__(self, node=None): @@ -3429,107 +3465,100 @@ def __init__(self, node=None): self.data = self.create_param(code="data", data_type=DataType.TEXT) -class SpeechEmbedding(AssetNode[SpeechEmbeddingInputs, SpeechEmbeddingOutputs]): +class TextGeneration(AssetNode[TextGenerationInputs, TextGenerationOutputs]): """ - Speech Embedding is a process that transforms spoken language into a fixed- - dimensional vector representation, capturing essential features and - characteristics of the speech for tasks such as recognition, classification, - and analysis. + Creates coherent and contextually relevant textual content based on prompts or +certain parameters. Useful for chatbots, content creation, and data +augmentation. - InputType: audio - OutputType: text + InputType: text + OutputType: text """ - - function: str = "speech-embedding" - input_type: str = DataType.AUDIO + function: str = "text-generation" + input_type: str = DataType.TEXT output_type: str = DataType.TEXT - inputs_class: Type[TI] = SpeechEmbeddingInputs - outputs_class: Type[TO] = SpeechEmbeddingOutputs + inputs_class: Type[TI] = TextGenerationInputs + outputs_class: Type[TO] = TextGenerationOutputs -class SubtitlingTranslationInputs(Inputs): +class TextNormalizationInputs(Inputs): text: InputParam = None - sourcelanguage: InputParam = None - dialect_in: InputParam = None - target_supplier: InputParam = None - targetlanguages: InputParam = None + language: InputParam = None + settings: InputParam = None def __init__(self, node=None): super().__init__(node=node) self.text = self.create_param(code="text", data_type=DataType.TEXT, is_required=True) - self.sourcelanguage = self.create_param(code="sourcelanguage", data_type=DataType.LABEL, is_required=True) - self.dialect_in = self.create_param(code="dialect_in", data_type=DataType.LABEL, is_required=False) - self.target_supplier = self.create_param(code="target_supplier", data_type=DataType.LABEL, is_required=False) - self.targetlanguages = self.create_param(code="targetlanguages", data_type=DataType.LABEL, is_required=False) + self.language = self.create_param(code="language", data_type=DataType.LABEL, is_required=False) + self.settings = self.create_param(code="settings", data_type=DataType.TEXT, is_required=False) -class SubtitlingTranslationOutputs(Outputs): +class TextNormalizationOutputs(Outputs): data: OutputParam = None def __init__(self, node=None): super().__init__(node=node) - self.data = self.create_param(code="data", data_type=DataType.TEXT) + self.data = self.create_param(code="data", data_type=DataType.LABEL) -class SubtitlingTranslation(AssetNode[SubtitlingTranslationInputs, SubtitlingTranslationOutputs]): +class TextNormalization(AssetNode[TextNormalizationInputs, TextNormalizationOutputs]): """ - Subtitling Translation is the process of converting spoken dialogue from one - language into written text in another language, which is then displayed on- - screen to aid viewers in understanding the content. + Converts unstructured or non-standard textual data into a more readable and +uniform format, dealing with abbreviations, numerals, and other non-standard +words. - InputType: text - OutputType: text + InputType: text + OutputType: label """ - - function: str = "subtitling-translation" + function: str = "text-normalization" input_type: str = DataType.TEXT - output_type: str = DataType.TEXT + output_type: str = DataType.LABEL - inputs_class: Type[TI] = SubtitlingTranslationInputs - outputs_class: Type[TO] = SubtitlingTranslationOutputs + inputs_class: Type[TI] = TextNormalizationInputs + outputs_class: Type[TO] = TextNormalizationOutputs -class TextGenerationInputs(Inputs): - text: InputParam = None - prompt: InputParam = None - context: InputParam = None - language: InputParam = None - script: InputParam = None +class VoiceActivityDetectionInputs(Inputs): + audio: InputParam = None + onset: InputParam = None + offset: InputParam = None + min_duration_on: InputParam = None + min_duration_off: InputParam = None def __init__(self, node=None): super().__init__(node=node) - self.text = self.create_param(code="text", data_type=DataType.TEXT, is_required=True) - self.prompt = self.create_param(code="prompt", data_type=DataType.TEXT, is_required=False) - self.context = self.create_param(code="context", data_type=DataType.TEXT, is_required=False) - self.language = self.create_param(code="language", data_type=DataType.LABEL, is_required=False) - self.script = self.create_param(code="script", data_type=DataType.LABEL, is_required=False) + self.audio = self.create_param(code="audio", data_type=DataType.AUDIO, is_required=True) + self.onset = self.create_param(code="onset", data_type=DataType.TEXT, is_required=False) + self.offset = self.create_param(code="offset", data_type=DataType.TEXT, is_required=False) + self.min_duration_on = self.create_param(code="min_duration_on", data_type=DataType.TEXT, is_required=False) + self.min_duration_off = self.create_param(code="min_duration_off", data_type=DataType.TEXT, is_required=False) -class TextGenerationOutputs(Outputs): +class VoiceActivityDetectionOutputs(Outputs): data: OutputParam = None + audio: OutputParam = None def __init__(self, node=None): super().__init__(node=node) - self.data = self.create_param(code="data", data_type=DataType.TEXT) + self.data = self.create_param(code="data", data_type=DataType.AUDIO) + self.audio = self.create_param(code="audio", data_type=DataType.AUDIO) -class TextGeneration(AssetNode[TextGenerationInputs, TextGenerationOutputs]): +class VoiceActivityDetection(BaseSegmentor[VoiceActivityDetectionInputs, VoiceActivityDetectionOutputs]): """ - Text Generation is a process in which artificial intelligence models, such as - neural networks, produce coherent and contextually relevant text based on a - given input or prompt, often mimicking human writing styles and patterns. + Determines when a person is speaking in an audio clip. It's an essential +preprocessing step for other audio-related tasks. - InputType: text - OutputType: text + InputType: audio + OutputType: audio """ + function: str = "voice-activity-detection" + input_type: str = DataType.AUDIO + output_type: str = DataType.AUDIO - function: str = "text-generation" - input_type: str = DataType.TEXT - output_type: str = DataType.TEXT - - inputs_class: Type[TI] = TextGenerationInputs - outputs_class: Type[TO] = TextGenerationOutputs + inputs_class: Type[TI] = VoiceActivityDetectionInputs + outputs_class: Type[TO] = VoiceActivityDetectionOutputs class VideoUnderstandingInputs(Inputs): @@ -3558,14 +3587,13 @@ def __init__(self, node=None): class VideoUnderstanding(AssetNode[VideoUnderstandingInputs, VideoUnderstandingOutputs]): """ - Video Understanding is the process of analyzing and interpreting video content - to extract meaningful information, such as identifying objects, actions, - events, and contextual relationships within the footage. + Video Understanding is the process of analyzing and interpreting video content +to extract meaningful information, such as identifying objects, actions, +events, and contextual relationships within the footage. - InputType: video - OutputType: text + InputType: video + OutputType: text """ - function: str = "video-understanding" input_type: str = DataType.VIDEO output_type: str = DataType.TEXT @@ -3574,79 +3602,50 @@ class VideoUnderstanding(AssetNode[VideoUnderstandingInputs, VideoUnderstandingO outputs_class: Type[TO] = VideoUnderstandingOutputs -class TextToVideoGenerationInputs(Inputs): - text: InputParam = None - language: InputParam = None - - def __init__(self, node=None): - super().__init__(node=node) - self.text = self.create_param(code="text", data_type=DataType.TEXT, is_required=True) - self.language = self.create_param(code="language", data_type=DataType.LABEL, is_required=False) - - -class TextToVideoGenerationOutputs(Outputs): - data: OutputParam = None - - def __init__(self, node=None): - super().__init__(node=node) - self.data = self.create_param(code="data", data_type=DataType.VIDEO) - - -class TextToVideoGeneration(AssetNode[TextToVideoGenerationInputs, TextToVideoGenerationOutputs]): - """ - Text To Video Generation is a process that converts written descriptions or - scripts into dynamic, visual video content using advanced algorithms and - artificial intelligence. - - InputType: text - OutputType: video - """ - - function: str = "text-to-video-generation" - input_type: str = DataType.TEXT - output_type: str = DataType.VIDEO - - inputs_class: Type[TI] = TextToVideoGenerationInputs - outputs_class: Type[TO] = TextToVideoGenerationOutputs - - -class TextNormalizationInputs(Inputs): +class TranslationInputs(Inputs): text: InputParam = None - language: InputParam = None - settings: InputParam = None + sourcelanguage: InputParam = None + targetlanguage: InputParam = None + script_in: InputParam = None + script_out: InputParam = None + dialect_in: InputParam = None + dialect_out: InputParam = None + context: InputParam = None def __init__(self, node=None): super().__init__(node=node) self.text = self.create_param(code="text", data_type=DataType.TEXT, is_required=True) - self.language = self.create_param(code="language", data_type=DataType.LABEL, is_required=False) - self.settings = self.create_param(code="settings", data_type=DataType.TEXT, is_required=False) + self.sourcelanguage = self.create_param(code="sourcelanguage", data_type=DataType.LABEL, is_required=True) + self.targetlanguage = self.create_param(code="targetlanguage", data_type=DataType.LABEL, is_required=True) + self.script_in = self.create_param(code="script_in", data_type=DataType.LABEL, is_required=False) + self.script_out = self.create_param(code="script_out", data_type=DataType.LABEL, is_required=False) + self.dialect_in = self.create_param(code="dialect_in", data_type=DataType.LABEL, is_required=False) + self.dialect_out = self.create_param(code="dialect_out", data_type=DataType.LABEL, is_required=False) + self.context = self.create_param(code="context", data_type=DataType.LABEL, is_required=False) -class TextNormalizationOutputs(Outputs): +class TranslationOutputs(Outputs): data: OutputParam = None def __init__(self, node=None): super().__init__(node=node) - self.data = self.create_param(code="data", data_type=DataType.LABEL) + self.data = self.create_param(code="data", data_type=DataType.TEXT) -class TextNormalization(AssetNode[TextNormalizationInputs, TextNormalizationOutputs]): +class Translation(AssetNode[TranslationInputs, TranslationOutputs]): """ - Text normalization is the process of transforming text into a standard, - consistent format by correcting spelling errors, converting all characters to a - uniform case, removing punctuation, and expanding abbreviations to improve the - text's readability and usability for further processing or analysis. + Converts text from one language to another while maintaining the original +message's essence and context. Crucial for global communication. - InputType: text - OutputType: label + InputType: text + OutputType: text """ - - function: str = "text-normalization" + function: str = "translation" input_type: str = DataType.TEXT - output_type: str = DataType.LABEL + output_type: str = DataType.TEXT - inputs_class: Type[TI] = TextNormalizationInputs - outputs_class: Type[TO] = TextNormalizationOutputs + inputs_class: Type[TI] = TranslationInputs + outputs_class: Type[TO] = TranslationOutputs class SpeechRecognitionInputs(Inputs): @@ -3675,13 +3674,12 @@ def __init__(self, node=None): class SpeechRecognition(AssetNode[SpeechRecognitionInputs, SpeechRecognitionOutputs]): """ - Speech recognition is a technology that enables a computer or device to - identify and process spoken language, converting it into text. + Converts spoken language into written text. Useful for transcription services, +voice assistants, and applications requiring voice-to-text capabilities. - InputType: audio - OutputType: text + InputType: audio + OutputType: text """ - function: str = "speech-recognition" input_type: str = DataType.AUDIO output_type: str = DataType.TEXT @@ -3718,15 +3716,12 @@ def __init__(self, node=None): class Subtitling(AssetNode[SubtitlingInputs, SubtitlingOutputs]): """ - Subtitling is the process of displaying written text on a screen to represent - the spoken dialogue, narration, or other audio elements in a video, typically - to aid viewers who are deaf or hard of hearing, or to provide translations for - audiences who speak different languages. + Generates accurate subtitles for videos, enhancing accessibility for diverse +audiences. - InputType: audio - OutputType: text + InputType: audio + OutputType: text """ - function: str = "subtitling" input_type: str = DataType.AUDIO output_type: str = DataType.TEXT @@ -3735,927 +3730,800 @@ class Subtitling(AssetNode[SubtitlingInputs, SubtitlingOutputs]): outputs_class: Type[TO] = SubtitlingOutputs -class ClassificationMetricInputs(Inputs): - hypotheses: InputParam = None - references: InputParam = None - lowerIsBetter: InputParam = None - sources: InputParam = None - score_identifier: InputParam = None - - def __init__(self, node=None): - super().__init__(node=node) - self.hypotheses = self.create_param(code="hypotheses", data_type=DataType.LABEL, is_required=True) - self.references = self.create_param(code="references", data_type=DataType.LABEL, is_required=True) - self.lowerIsBetter = self.create_param(code="lowerIsBetter", data_type=DataType.TEXT, is_required=False) - self.sources = self.create_param(code="sources", data_type=DataType.TEXT, is_required=False) - self.score_identifier = self.create_param(code="score_identifier", data_type=DataType.TEXT, is_required=True) - - -class ClassificationMetricOutputs(Outputs): - data: OutputParam = None - - def __init__(self, node=None): - super().__init__(node=node) - self.data = self.create_param(code="data", data_type=DataType.NUMBER) - - -class ClassificationMetric(BaseMetric[ClassificationMetricInputs, ClassificationMetricOutputs]): - """ - A Classification Metric is a quantitative measure used to evaluate the quality - and effectiveness of classification models. - - InputType: text - OutputType: text - """ - - function: str = "classification-metric" - input_type: str = DataType.TEXT - output_type: str = DataType.TEXT - - inputs_class: Type[TI] = ClassificationMetricInputs - outputs_class: Type[TO] = ClassificationMetricOutputs - - -class TextToImageGenerationInputs(Inputs): - text: InputParam = None - - def __init__(self, node=None): - super().__init__(node=node) - self.text = self.create_param(code="text", data_type=DataType.TEXT, is_required=True) - - -class TextToImageGenerationOutputs(Outputs): - data: OutputParam = None - - def __init__(self, node=None): - super().__init__(node=node) - self.data = self.create_param(code="data", data_type=DataType.IMAGE) - - -class TextToImageGeneration(AssetNode[TextToImageGenerationInputs, TextToImageGenerationOutputs]): - """ - Text To Image Generation is a process where a system creates visual images - based on descriptive text input, translating written language into - corresponding graphical representations. - - InputType: text - OutputType: image - """ - - function: str = "text-to-image-generation" - input_type: str = DataType.TEXT - output_type: str = DataType.IMAGE - - inputs_class: Type[TI] = TextToImageGenerationInputs - outputs_class: Type[TO] = TextToImageGenerationOutputs - class Pipeline(DefaultPipeline): + def object_detection(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> ObjectDetection: """ - Object Detection is a computer vision technology that identifies and locates - objects within an image, typically by drawing bounding boxes around the - detected objects and classifying them into predefined categories. + Object Detection is a computer vision technology that identifies and locates +objects within an image, typically by drawing bounding boxes around the +detected objects and classifying them into predefined categories. """ return ObjectDetection(*args, asset_id=asset_id, pipeline=self, **kwargs) def language_identification(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> LanguageIdentification: """ - Language Identification is the process of automatically determining the - language in which a given piece of text is written. + Detects the language in which a given text is written, aiding in multilingual +platforms or content localization. """ return LanguageIdentification(*args, asset_id=asset_id, pipeline=self, **kwargs) - def ocr(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> Ocr: + def depth_estimation(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> DepthEstimation: """ - OCR, or Optical Character Recognition, is a technology that converts different - types of documents, such as scanned paper documents, PDFs, or images captured - by a digital camera, into editable and searchable data by recognizing and - extracting text from the images. + Depth estimation is a computational process that determines the distance of +objects from a viewpoint, typically using visual data from cameras or sensors +to create a three-dimensional understanding of a scene. """ - return Ocr(*args, asset_id=asset_id, pipeline=self, **kwargs) + return DepthEstimation(*args, asset_id=asset_id, pipeline=self, **kwargs) def script_execution(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> ScriptExecution: """ - Script Execution refers to the process of running a set of programmed - instructions or code within a computing environment, enabling the automated - performance of tasks, calculations, or operations as defined by the script. + Script Execution refers to the process of running a set of programmed +instructions or code within a computing environment, enabling the automated +performance of tasks, calculations, or operations as defined by the script. """ return ScriptExecution(*args, asset_id=asset_id, pipeline=self, **kwargs) - def image_label_detection(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> ImageLabelDetection: + def image_embedding(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> ImageEmbedding: """ - Image Label Detection is a function that automatically identifies and assigns - descriptive tags or labels to objects, scenes, or elements within an image, - enabling easier categorization, search, and analysis of visual content. + Image Embedding is a process that transforms an image into a fixed-dimensional +vector representation, capturing its essential features and enabling efficient +comparison, retrieval, and analysis in various machine learning and computer +vision tasks. """ - return ImageLabelDetection(*args, asset_id=asset_id, pipeline=self, **kwargs) + return ImageEmbedding(*args, asset_id=asset_id, pipeline=self, **kwargs) - def image_captioning(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> ImageCaptioning: + def image_to_video_generation(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> ImageToVideoGeneration: """ - Image Captioning is a process that involves generating a textual description of - an image, typically using machine learning models to analyze the visual content - and produce coherent and contextually relevant sentences that describe the - objects, actions, and scenes depicted in the image. + The Image To Video Generation function transforms a series of static images +into a cohesive, dynamic video sequence, often incorporating transitions, +effects, and synchronization with audio to create a visually engaging +narrative. """ - return ImageCaptioning(*args, asset_id=asset_id, pipeline=self, **kwargs) + return ImageToVideoGeneration(*args, asset_id=asset_id, pipeline=self, **kwargs) - def audio_language_identification(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> AudioLanguageIdentification: + def image_impainting(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> ImageImpainting: """ - Audio Language Identification is a process that involves analyzing an audio - recording to determine the language being spoken. + Image inpainting is a process that involves filling in missing or damaged parts +of an image in a way that is visually coherent and seamlessly blends with the +surrounding areas, often using advanced algorithms and techniques to restore +the image to its original or intended appearance. """ - return AudioLanguageIdentification(*args, asset_id=asset_id, pipeline=self, **kwargs) + return ImageImpainting(*args, asset_id=asset_id, pipeline=self, **kwargs) - def asr_age_classification(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> AsrAgeClassification: + def style_transfer(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> StyleTransfer: """ - The ASR Age Classification function is designed to analyze audio recordings of - speech to determine the speaker's age group by leveraging automatic speech - recognition (ASR) technology and machine learning algorithms. + Style Transfer is a technique in artificial intelligence that applies the +visual style of one image (such as the brushstrokes of a famous painting) to +the content of another image, effectively blending the artistic elements of the +first image with the subject matter of the second. """ - return AsrAgeClassification(*args, asset_id=asset_id, pipeline=self, **kwargs) + return StyleTransfer(*args, asset_id=asset_id, pipeline=self, **kwargs) - def benchmark_scoring_mt(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> BenchmarkScoringMt: + def multi_class_text_classification(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> MultiClassTextClassification: """ - Benchmark Scoring MT is a function designed to evaluate and score machine - translation systems by comparing their output against a set of predefined - benchmarks, thereby assessing their accuracy and performance. + Multi Class Text Classification is a natural language processing task that +involves categorizing a given text into one of several predefined classes or +categories based on its content. """ - return BenchmarkScoringMt(*args, asset_id=asset_id, pipeline=self, **kwargs) + return MultiClassTextClassification(*args, asset_id=asset_id, pipeline=self, **kwargs) - def asr_gender_classification(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> AsrGenderClassification: + def part_of_speech_tagging(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> PartOfSpeechTagging: """ - The ASR Gender Classification function analyzes audio recordings to determine - and classify the speaker's gender based on their voice characteristics. + Part of Speech Tagging is a natural language processing task that involves +assigning each word in a sentence its corresponding part of speech, such as +noun, verb, adjective, or adverb, based on its role and context within the +sentence. """ - return AsrGenderClassification(*args, asset_id=asset_id, pipeline=self, **kwargs) + return PartOfSpeechTagging(*args, asset_id=asset_id, pipeline=self, **kwargs) - def base_model(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> BaseModel: + def metric_aggregation(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> MetricAggregation: """ - The Base-Model function serves as a foundational framework designed to provide - essential features and capabilities upon which more specialized or advanced - models can be built and customized. + Metric Aggregation is a function that computes and summarizes numerical data by +applying statistical operations, such as averaging, summing, or finding the +minimum and maximum values, to provide insights and facilitate analysis of +large datasets. """ - return BaseModel(*args, asset_id=asset_id, pipeline=self, **kwargs) + return MetricAggregation(*args, asset_id=asset_id, pipeline=self, **kwargs) - def language_identification_audio(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> LanguageIdentificationAudio: + def image_colorization(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> ImageColorization: """ - The Language Identification Audio function analyzes audio input to determine - and identify the language being spoken. + Image colorization is a process that involves adding color to grayscale images, +transforming them from black-and-white to full-color representations, often +using advanced algorithms and machine learning techniques to predict and apply +the appropriate hues and shades. """ - return LanguageIdentificationAudio(*args, asset_id=asset_id, pipeline=self, **kwargs) + return ImageColorization(*args, asset_id=asset_id, pipeline=self, **kwargs) - def loglikelihood(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> Loglikelihood: + def intent_classification(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> IntentClassification: """ - The Log Likelihood function measures the probability of observing the given - data under a specific statistical model by taking the natural logarithm of the - likelihood function, thereby transforming the product of probabilities into a - sum, which simplifies the process of optimization and parameter estimation. + Intent Classification is a natural language processing task that involves +analyzing and categorizing user text input to determine the underlying purpose +or goal behind the communication, such as booking a flight, asking for weather +information, or setting a reminder. """ - return Loglikelihood(*args, asset_id=asset_id, pipeline=self, **kwargs) + return IntentClassification(*args, asset_id=asset_id, pipeline=self, **kwargs) - def video_embedding(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> VideoEmbedding: + def audio_intent_detection(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> AudioIntentDetection: """ - Video Embedding is a process that transforms video content into a fixed- - dimensional vector representation, capturing essential features and patterns to - facilitate tasks such as retrieval, classification, and recommendation. + Audio Intent Detection is a process that involves analyzing audio signals to +identify and interpret the underlying intentions or purposes behind spoken +words, enabling systems to understand and respond appropriately to human +speech. """ - return VideoEmbedding(*args, asset_id=asset_id, pipeline=self, **kwargs) + return AudioIntentDetection(*args, asset_id=asset_id, pipeline=self, **kwargs) - def text_segmenation(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> TextSegmenation: + def asr_quality_estimation(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> AsrQualityEstimation: """ - Text Segmentation is the process of dividing a continuous text into meaningful - units, such as words, sentences, or topics, to facilitate easier analysis and - understanding. + ASR Quality Estimation is a process that evaluates the accuracy and reliability +of automatic speech recognition systems by analyzing their performance in +transcribing spoken language into text. """ - return TextSegmenation(*args, asset_id=asset_id, pipeline=self, **kwargs) + return AsrQualityEstimation(*args, asset_id=asset_id, pipeline=self, **kwargs) - def image_embedding(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> ImageEmbedding: + def search(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> Search: """ - Image Embedding is a process that transforms an image into a fixed-dimensional - vector representation, capturing its essential features and enabling efficient - comparison, retrieval, and analysis in various machine learning and computer - vision tasks. + An algorithm that identifies and returns data or items that match particular +keywords or conditions from a dataset. A fundamental tool for databases and +websites. """ - return ImageEmbedding(*args, asset_id=asset_id, pipeline=self, **kwargs) + return Search(*args, asset_id=asset_id, pipeline=self, **kwargs) - def image_manipulation(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> ImageManipulation: + def viseme_generation(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> VisemeGeneration: """ - Image Manipulation refers to the process of altering or enhancing digital - images using various techniques and tools to achieve desired visual effects, - correct imperfections, or transform the image's appearance. + Viseme Generation is the process of creating visual representations of +phonemes, which are the distinct units of sound in speech, to synchronize lip +movements with spoken words in animations or virtual avatars. """ - return ImageManipulation(*args, asset_id=asset_id, pipeline=self, **kwargs) + return VisemeGeneration(*args, asset_id=asset_id, pipeline=self, **kwargs) - def image_to_video_generation(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> ImageToVideoGeneration: + def ocr(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> Ocr: """ - The Image To Video Generation function transforms a series of static images - into a cohesive, dynamic video sequence, often incorporating transitions, - effects, and synchronization with audio to create a visually engaging - narrative. + Converts images of typed, handwritten, or printed text into machine-encoded +text. Used in digitizing printed texts for data retrieval. """ - return ImageToVideoGeneration(*args, asset_id=asset_id, pipeline=self, **kwargs) + return Ocr(*args, asset_id=asset_id, pipeline=self, **kwargs) - def audio_forced_alignment(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> AudioForcedAlignment: + def loglikelihood(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> Loglikelihood: """ - Audio Forced Alignment is a process that synchronizes a given audio recording - with its corresponding transcript by precisely aligning each spoken word or - phoneme to its exact timing within the audio. + The Log Likelihood function measures the probability of observing the given +data under a specific statistical model by taking the natural logarithm of the +likelihood function, thereby transforming the product of probabilities into a +sum, which simplifies the process of optimization and parameter estimation. """ - return AudioForcedAlignment(*args, asset_id=asset_id, pipeline=self, **kwargs) + return Loglikelihood(*args, asset_id=asset_id, pipeline=self, **kwargs) - def benchmark_scoring_asr(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> BenchmarkScoringAsr: + def video_embedding(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> VideoEmbedding: """ - Benchmark Scoring ASR is a function that evaluates and compares the performance - of automatic speech recognition systems by analyzing their accuracy, speed, and - other relevant metrics against a standardized set of benchmarks. + Video Embedding is a process that transforms video content into a fixed- +dimensional vector representation, capturing essential features and patterns to +facilitate tasks such as retrieval, classification, and recommendation. """ - return BenchmarkScoringAsr(*args, asset_id=asset_id, pipeline=self, **kwargs) + return VideoEmbedding(*args, asset_id=asset_id, pipeline=self, **kwargs) - def visual_question_answering(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> VisualQuestionAnswering: + def text_segmenation(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> TextSegmenation: """ - Visual Question Answering (VQA) is a task in artificial intelligence that - involves analyzing an image and providing accurate, contextually relevant - answers to questions posed about the visual content of that image. + Text Segmentation is the process of dividing a continuous text into meaningful +units, such as words, sentences, or topics, to facilitate easier analysis and +understanding. """ - return VisualQuestionAnswering(*args, asset_id=asset_id, pipeline=self, **kwargs) + return TextSegmenation(*args, asset_id=asset_id, pipeline=self, **kwargs) - def document_image_parsing(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> DocumentImageParsing: + def expression_detection(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> ExpressionDetection: """ - Document Image Parsing is the process of analyzing and converting scanned or - photographed images of documents into structured, machine-readable formats by - identifying and extracting text, layout, and other relevant information. + Expression Detection is the process of identifying and analyzing facial +expressions to interpret emotions or intentions using AI and computer vision +techniques. """ - return DocumentImageParsing(*args, asset_id=asset_id, pipeline=self, **kwargs) + return ExpressionDetection(*args, asset_id=asset_id, pipeline=self, **kwargs) - def document_information_extraction( - self, asset_id: Union[str, asset.Asset], *args, **kwargs - ) -> DocumentInformationExtraction: + def speech_classification(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> SpeechClassification: """ - Document Information Extraction is the process of automatically identifying, - extracting, and structuring relevant data from unstructured or semi-structured - documents, such as invoices, receipts, contracts, and forms, to facilitate - easier data management and analysis. + Categorizes audio clips based on their content, aiding in content organization +and targeted actions. """ - return DocumentInformationExtraction(*args, asset_id=asset_id, pipeline=self, **kwargs) + return SpeechClassification(*args, asset_id=asset_id, pipeline=self, **kwargs) - def depth_estimation(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> DepthEstimation: + def inverse_text_normalization(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> InverseTextNormalization: """ - Depth estimation is a computational process that determines the distance of - objects from a viewpoint, typically using visual data from cameras or sensors - to create a three-dimensional understanding of a scene. + Inverse Text Normalization is the process of converting spoken or written +language in its normalized form, such as numbers, dates, and abbreviations, +back into their original, more complex or detailed textual representations. """ - return DepthEstimation(*args, asset_id=asset_id, pipeline=self, **kwargs) + return InverseTextNormalization(*args, asset_id=asset_id, pipeline=self, **kwargs) - def video_generation(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> VideoGeneration: + def extract_audio_from_video(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> ExtractAudioFromVideo: """ - Video Generation is the process of creating video content through automated or - semi-automated means, often utilizing algorithms, artificial intelligence, or - software tools to produce visual and audio elements that can range from simple - animations to complex, realistic scenes. + Isolates and extracts audio tracks from video files, aiding in audio analysis +or transcription tasks. """ - return VideoGeneration(*args, asset_id=asset_id, pipeline=self, **kwargs) + return ExtractAudioFromVideo(*args, asset_id=asset_id, pipeline=self, **kwargs) - def referenceless_audio_generation_metric( - self, asset_id: Union[str, asset.Asset], *args, **kwargs - ) -> ReferencelessAudioGenerationMetric: + def image_compression(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> ImageCompression: """ - The Referenceless Audio Generation Metric is a tool designed to evaluate the - quality of generated audio content without the need for a reference or original - audio sample for comparison. + Reduces the size of image files without significantly compromising their visual +quality. Useful for optimizing storage and improving webpage load times. """ - return ReferencelessAudioGenerationMetric(*args, asset_id=asset_id, pipeline=self, **kwargs) + return ImageCompression(*args, asset_id=asset_id, pipeline=self, **kwargs) - def multi_class_image_classification( - self, asset_id: Union[str, asset.Asset], *args, **kwargs - ) -> MultiClassImageClassification: + def noise_removal(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> NoiseRemoval: """ - Multi Class Image Classification is a machine learning task where an algorithm - is trained to categorize images into one of several predefined classes or - categories based on their visual content. + Noise Removal is a process that involves identifying and eliminating unwanted +random variations or disturbances from an audio signal to enhance the clarity +and quality of the underlying information. """ - return MultiClassImageClassification(*args, asset_id=asset_id, pipeline=self, **kwargs) + return NoiseRemoval(*args, asset_id=asset_id, pipeline=self, **kwargs) - def semantic_segmentation(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> SemanticSegmentation: + def text_summarization(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> TextSummarization: """ - Semantic segmentation is a computer vision process that involves classifying - each pixel in an image into a predefined category, effectively partitioning the - image into meaningful segments based on the objects or regions they represent. + Extracts the main points from a larger body of text, producing a concise +summary without losing the primary message. """ - return SemanticSegmentation(*args, asset_id=asset_id, pipeline=self, **kwargs) + return TextSummarization(*args, asset_id=asset_id, pipeline=self, **kwargs) - def instance_segmentation(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> InstanceSegmentation: + def text_generation_metric(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> TextGenerationMetric: """ - Instance segmentation is a computer vision task that involves detecting and - delineating each distinct object within an image, assigning a unique label and - precise boundary to every individual instance of objects, even if they belong - to the same category. + A Text Generation Metric is a quantitative measure used to evaluate the quality +and effectiveness of text produced by natural language processing models, often +assessing aspects such as coherence, relevance, fluency, and adherence to given +prompts or instructions. """ - return InstanceSegmentation(*args, asset_id=asset_id, pipeline=self, **kwargs) + return TextGenerationMetric(*args, asset_id=asset_id, pipeline=self, **kwargs) - def image_colorization(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> ImageColorization: + def image_captioning(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> ImageCaptioning: """ - Image colorization is a process that involves adding color to grayscale images, - transforming them from black-and-white to full-color representations, often - using advanced algorithms and machine learning techniques to predict and apply - the appropriate hues and shades. + Image Captioning is a process that involves generating a textual description of +an image, typically using machine learning models to analyze the visual content +and produce coherent and contextually relevant sentences that describe the +objects, actions, and scenes depicted in the image. """ - return ImageColorization(*args, asset_id=asset_id, pipeline=self, **kwargs) + return ImageCaptioning(*args, asset_id=asset_id, pipeline=self, **kwargs) - def audio_generation_metric(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> AudioGenerationMetric: + def benchmark_scoring_mt(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> BenchmarkScoringMt: """ - The Audio Generation Metric is a quantitative measure used to evaluate the - quality, accuracy, and overall performance of audio generated by artificial - intelligence systems, often considering factors such as fidelity, - intelligibility, and similarity to human-produced audio. + Benchmark Scoring MT is a function designed to evaluate and score machine +translation systems by comparing their output against a set of predefined +benchmarks, thereby assessing their accuracy and performance. """ - return AudioGenerationMetric(*args, asset_id=asset_id, pipeline=self, **kwargs) + return BenchmarkScoringMt(*args, asset_id=asset_id, pipeline=self, **kwargs) - def image_impainting(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> ImageImpainting: + def speaker_diarization_audio(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> SpeakerDiarizationAudio: """ - Image inpainting is a process that involves filling in missing or damaged parts - of an image in a way that is visually coherent and seamlessly blends with the - surrounding areas, often using advanced algorithms and techniques to restore - the image to its original or intended appearance. + Identifies individual speakers and their respective speech segments within an +audio clip. Ideal for multi-speaker recordings or conference calls. """ - return ImageImpainting(*args, asset_id=asset_id, pipeline=self, **kwargs) + return SpeakerDiarizationAudio(*args, asset_id=asset_id, pipeline=self, **kwargs) - def style_transfer(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> StyleTransfer: + def benchmark_scoring_asr(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> BenchmarkScoringAsr: """ - Style Transfer is a technique in artificial intelligence that applies the - visual style of one image (such as the brushstrokes of a famous painting) to - the content of another image, effectively blending the artistic elements of the - first image with the subject matter of the second. + Benchmark Scoring ASR is a function that evaluates and compares the performance +of automatic speech recognition systems by analyzing their accuracy, speed, and +other relevant metrics against a standardized set of benchmarks. """ - return StyleTransfer(*args, asset_id=asset_id, pipeline=self, **kwargs) + return BenchmarkScoringAsr(*args, asset_id=asset_id, pipeline=self, **kwargs) - def multi_class_text_classification( - self, asset_id: Union[str, asset.Asset], *args, **kwargs - ) -> MultiClassTextClassification: + def visual_question_answering(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> VisualQuestionAnswering: """ - Multi Class Text Classification is a natural language processing task that - involves categorizing a given text into one of several predefined classes or - categories based on its content. + Visual Question Answering (VQA) is a task in artificial intelligence that +involves analyzing an image and providing accurate, contextually relevant +answers to questions posed about the visual content of that image. """ - return MultiClassTextClassification(*args, asset_id=asset_id, pipeline=self, **kwargs) + return VisualQuestionAnswering(*args, asset_id=asset_id, pipeline=self, **kwargs) - def text_embedding(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> TextEmbedding: + def document_image_parsing(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> DocumentImageParsing: """ - Text embedding is a process that converts text into numerical vectors, - capturing the semantic meaning and contextual relationships of words or - phrases, enabling machines to understand and analyze natural language more - effectively. + Document Image Parsing is the process of analyzing and converting scanned or +photographed images of documents into structured, machine-readable formats by +identifying and extracting text, layout, and other relevant information. """ - return TextEmbedding(*args, asset_id=asset_id, pipeline=self, **kwargs) + return DocumentImageParsing(*args, asset_id=asset_id, pipeline=self, **kwargs) - def multi_label_text_classification( - self, asset_id: Union[str, asset.Asset], *args, **kwargs - ) -> MultiLabelTextClassification: + def multi_label_text_classification(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> MultiLabelTextClassification: """ - Multi Label Text Classification is a natural language processing task where a - given text is analyzed and assigned multiple relevant labels or categories from - a predefined set, allowing for the text to belong to more than one category - simultaneously. + Multi Label Text Classification is a natural language processing task where a +given text is analyzed and assigned multiple relevant labels or categories from +a predefined set, allowing for the text to belong to more than one category +simultaneously. """ return MultiLabelTextClassification(*args, asset_id=asset_id, pipeline=self, **kwargs) def text_reconstruction(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> TextReconstruction: """ - Text Reconstruction is a process that involves piecing together fragmented or - incomplete text data to restore it to its original, coherent form. + Text Reconstruction is a process that involves piecing together fragmented or +incomplete text data to restore it to its original, coherent form. """ return TextReconstruction(*args, asset_id=asset_id, pipeline=self, **kwargs) - def fact_checking(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> FactChecking: + def video_content_moderation(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> VideoContentModeration: + """ + Automatically reviews video content to detect and possibly remove inappropriate +or harmful material. Essential for user-generated content platforms. + """ + return VideoContentModeration(*args, asset_id=asset_id, pipeline=self, **kwargs) + + def multilingual_speech_recognition(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> MultilingualSpeechRecognition: + """ + Multilingual Speech Recognition is a technology that enables the automatic +transcription of spoken language into text across multiple languages, allowing +for seamless communication and understanding in diverse linguistic contexts. + """ + return MultilingualSpeechRecognition(*args, asset_id=asset_id, pipeline=self, **kwargs) + + def entity_linking(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> EntityLinking: + """ + Associates identified entities in the text with specific entries in a knowledge +base or database. + """ + return EntityLinking(*args, asset_id=asset_id, pipeline=self, **kwargs) + + def audio_reconstruction(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> AudioReconstruction: + """ + Audio Reconstruction is the process of restoring or recreating audio signals +from incomplete, damaged, or degraded recordings to achieve a high-quality, +accurate representation of the original sound. + """ + return AudioReconstruction(*args, asset_id=asset_id, pipeline=self, **kwargs) + + def audio_emotion_detection(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> AudioEmotionDetection: """ - Fact Checking is the process of verifying the accuracy and truthfulness of - information, statements, or claims by cross-referencing with reliable sources - and evidence. + Audio Emotion Detection is a technology that analyzes vocal characteristics and +patterns in audio recordings to identify and classify the emotional state of +the speaker. """ - return FactChecking(*args, asset_id=asset_id, pipeline=self, **kwargs) + return AudioEmotionDetection(*args, asset_id=asset_id, pipeline=self, **kwargs) - def speech_classification(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> SpeechClassification: + def split_on_linebreak(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> SplitOnLinebreak: """ - Speech Classification is a process that involves analyzing and categorizing - spoken language into predefined categories or classes based on various features - such as tone, pitch, and linguistic content. + The "Split On Linebreak" function divides a given string into a list of +substrings, using linebreaks (newline characters) as the points of separation. """ - return SpeechClassification(*args, asset_id=asset_id, pipeline=self, **kwargs) + return SplitOnLinebreak(*args, asset_id=asset_id, pipeline=self, **kwargs) - def intent_classification(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> IntentClassification: + def keyword_spotting(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> KeywordSpotting: """ - Intent Classification is a natural language processing task that involves - analyzing and categorizing user text input to determine the underlying purpose - or goal behind the communication, such as booking a flight, asking for weather - information, or setting a reminder. + Keyword Spotting is a function that enables the detection and identification of +specific words or phrases within a stream of audio, often used in voice- +activated systems to trigger actions or commands based on recognized keywords. """ - return IntentClassification(*args, asset_id=asset_id, pipeline=self, **kwargs) + return KeywordSpotting(*args, asset_id=asset_id, pipeline=self, **kwargs) - def part_of_speech_tagging(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> PartOfSpeechTagging: + def text_classification(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> TextClassification: """ - Part of Speech Tagging is a natural language processing task that involves - assigning each word in a sentence its corresponding part of speech, such as - noun, verb, adjective, or adverb, based on its role and context within the - sentence. + Categorizes text into predefined groups or topics, facilitating content +organization and targeted actions. """ - return PartOfSpeechTagging(*args, asset_id=asset_id, pipeline=self, **kwargs) + return TextClassification(*args, asset_id=asset_id, pipeline=self, **kwargs) - def metric_aggregation(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> MetricAggregation: + def offensive_language_identification(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> OffensiveLanguageIdentification: """ - Metric Aggregation is a function that computes and summarizes numerical data by - applying statistical operations, such as averaging, summing, or finding the - minimum and maximum values, to provide insights and facilitate analysis of - large datasets. + Detects language or phrases that might be considered offensive, aiding in +content moderation and creating respectful user interactions. """ - return MetricAggregation(*args, asset_id=asset_id, pipeline=self, **kwargs) + return OffensiveLanguageIdentification(*args, asset_id=asset_id, pipeline=self, **kwargs) - def dialect_detection(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> DialectDetection: + def speech_non_speech_classification(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> SpeechNonSpeechClassification: """ - Dialect Detection is a function that identifies and classifies the specific - regional or social variations of a language spoken or written by an individual, - enabling the recognition of distinct linguistic patterns and nuances associated - with different dialects. + Differentiates between speech and non-speech audio segments. Great for editing +software and transcription services to exclude irrelevant audio. """ - return DialectDetection(*args, asset_id=asset_id, pipeline=self, **kwargs) + return SpeechNonSpeechClassification(*args, asset_id=asset_id, pipeline=self, **kwargs) - def inverse_text_normalization(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> InverseTextNormalization: + def named_entity_recognition(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> NamedEntityRecognition: """ - Inverse Text Normalization is the process of converting spoken or written - language in its normalized form, such as numbers, dates, and abbreviations, - back into their original, more complex or detailed textual representations. + Identifies and classifies named entities (e.g., persons, organizations, +locations) within text. Useful for information extraction, content tagging, and +search enhancements. """ - return InverseTextNormalization(*args, asset_id=asset_id, pipeline=self, **kwargs) + return NamedEntityRecognition(*args, asset_id=asset_id, pipeline=self, **kwargs) - def text_to_audio(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> TextToAudio: + def image_manipulation(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> ImageManipulation: """ - The Text to Audio function converts written text into spoken words, allowing - users to listen to the content instead of reading it. + Image Manipulation refers to the process of altering or enhancing digital +images using various techniques and tools to achieve desired visual effects, +correct imperfections, or transform the image's appearance. """ - return TextToAudio(*args, asset_id=asset_id, pipeline=self, **kwargs) + return ImageManipulation(*args, asset_id=asset_id, pipeline=self, **kwargs) - def fill_text_mask(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> FillTextMask: + def split_on_silence(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> SplitOnSilence: """ - The "Fill Text Mask" function takes a text input with masked or placeholder - characters and replaces those placeholders with specified or contextually - appropriate characters to generate a complete and coherent text output. + The "Split On Silence" function divides an audio recording into separate +segments based on periods of silence, allowing for easier editing and analysis +of individual sections. """ - return FillTextMask(*args, asset_id=asset_id, pipeline=self, **kwargs) + return SplitOnSilence(*args, asset_id=asset_id, pipeline=self, **kwargs) - def video_content_moderation(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> VideoContentModeration: + def text_to_video_generation(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> TextToVideoGeneration: """ - Video Content Moderation is the process of reviewing, analyzing, and filtering - video content to ensure it adheres to community guidelines, legal standards, - and platform policies, thereby preventing the dissemination of inappropriate, - harmful, or illegal material. + Text To Video Generation is a process that converts written descriptions or +scripts into dynamic, visual video content using advanced algorithms and +artificial intelligence. """ - return VideoContentModeration(*args, asset_id=asset_id, pipeline=self, **kwargs) + return TextToVideoGeneration(*args, asset_id=asset_id, pipeline=self, **kwargs) - def extract_audio_from_video(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> ExtractAudioFromVideo: + def document_information_extraction(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> DocumentInformationExtraction: """ - The "Extract Audio From Video" function allows users to separate and save the - audio track from a video file, enabling them to obtain just the sound without - the accompanying visual content. + Document Information Extraction is the process of automatically identifying, +extracting, and structuring relevant data from unstructured or semi-structured +documents, such as invoices, receipts, contracts, and forms, to facilitate +easier data management and analysis. """ - return ExtractAudioFromVideo(*args, asset_id=asset_id, pipeline=self, **kwargs) + return DocumentInformationExtraction(*args, asset_id=asset_id, pipeline=self, **kwargs) - def image_compression(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> ImageCompression: + def video_generation(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> VideoGeneration: """ - Image compression is a process that reduces the file size of an image by - removing redundant or non-essential data, while maintaining an acceptable level - of visual quality. + Produces video content based on specific inputs or datasets. Can be used for +simulations, animations, or even deepfake detection. """ - return ImageCompression(*args, asset_id=asset_id, pipeline=self, **kwargs) + return VideoGeneration(*args, asset_id=asset_id, pipeline=self, **kwargs) - def multilingual_speech_recognition( - self, asset_id: Union[str, asset.Asset], *args, **kwargs - ) -> MultilingualSpeechRecognition: + def text_to_image_generation(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> TextToImageGeneration: """ - Multilingual Speech Recognition is a technology that enables the automatic - transcription of spoken language into text across multiple languages, allowing - for seamless communication and understanding in diverse linguistic contexts. + Creates a visual representation based on textual input, turning descriptions +into pictorial forms. Used in creative processes and content generation. """ - return MultilingualSpeechRecognition(*args, asset_id=asset_id, pipeline=self, **kwargs) + return TextToImageGeneration(*args, asset_id=asset_id, pipeline=self, **kwargs) - def referenceless_text_generation_metric( - self, asset_id: Union[str, asset.Asset], *args, **kwargs - ) -> ReferencelessTextGenerationMetric: + def referenceless_text_generation_metric(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> ReferencelessTextGenerationMetric: """ - The Referenceless Text Generation Metric is a method for evaluating the quality - of generated text without requiring a reference text for comparison, often - leveraging models or algorithms to assess coherence, relevance, and fluency - based on intrinsic properties of the text itself. + The Referenceless Text Generation Metric is a method for evaluating the quality +of generated text without requiring a reference text for comparison, often +leveraging models or algorithms to assess coherence, relevance, and fluency +based on intrinsic properties of the text itself. """ return ReferencelessTextGenerationMetric(*args, asset_id=asset_id, pipeline=self, **kwargs) - def text_generation_metric_default(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> TextGenerationMetricDefault: + def other__multipurpose_(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> OtherMultipurpose: """ - The "Text Generation Metric Default" function provides a standard set of - evaluation metrics for assessing the quality and performance of text generation - models. + The "Other (Multipurpose)" function serves as a versatile category designed to +accommodate a wide range of tasks and activities that do not fit neatly into +predefined classifications, offering flexibility and adaptability for various +needs. """ - return TextGenerationMetricDefault(*args, asset_id=asset_id, pipeline=self, **kwargs) + return OtherMultipurpose(*args, asset_id=asset_id, pipeline=self, **kwargs) - def noise_removal(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> NoiseRemoval: + def image_label_detection(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> ImageLabelDetection: """ - Noise Removal is a process that involves identifying and eliminating unwanted - random variations or disturbances from an audio signal to enhance the clarity - and quality of the underlying information. + Identifies objects, themes, or topics within images, useful for image +categorization, search, and recommendation systems. """ - return NoiseRemoval(*args, asset_id=asset_id, pipeline=self, **kwargs) + return ImageLabelDetection(*args, asset_id=asset_id, pipeline=self, **kwargs) - def audio_reconstruction(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> AudioReconstruction: + def speaker_diarization_video(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> SpeakerDiarizationVideo: """ - Audio Reconstruction is the process of restoring or recreating audio signals - from incomplete, damaged, or degraded recordings to achieve a high-quality, - accurate representation of the original sound. + Segments a video based on different speakers, identifying when each individual +speaks. Useful for transcriptions and understanding multi-person conversations. """ - return AudioReconstruction(*args, asset_id=asset_id, pipeline=self, **kwargs) + return SpeakerDiarizationVideo(*args, asset_id=asset_id, pipeline=self, **kwargs) - def voice_cloning(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> VoiceCloning: + def audio_transcript_improvement(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> AudioTranscriptImprovement: """ - Voice cloning is a technology that uses artificial intelligence to create a - digital replica of a person's voice, allowing for the generation of speech that - mimics the tone, pitch, and speaking style of the original speaker. + Refines and corrects transcriptions generated from audio data, improving +readability and accuracy. """ - return VoiceCloning(*args, asset_id=asset_id, pipeline=self, **kwargs) + return AudioTranscriptImprovement(*args, asset_id=asset_id, pipeline=self, **kwargs) - def diacritization(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> Diacritization: + def dialect_detection(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> DialectDetection: """ - Diacritization is the process of adding diacritical marks to letters in a text - to indicate pronunciation, stress, tone, or meaning, often used in languages - such as Arabic, Hebrew, and Vietnamese to provide clarity and accuracy in - written communication. + Identifies specific dialects within a language, aiding in localized content +creation or user experience personalization. """ - return Diacritization(*args, asset_id=asset_id, pipeline=self, **kwargs) + return DialectDetection(*args, asset_id=asset_id, pipeline=self, **kwargs) - def audio_emotion_detection(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> AudioEmotionDetection: + def sentiment_analysis(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> SentimentAnalysis: """ - Audio Emotion Detection is a technology that analyzes vocal characteristics and - patterns in audio recordings to identify and classify the emotional state of - the speaker. + Determines the sentiment or emotion (e.g., positive, negative, neutral) of a +piece of text, aiding in understanding user feedback or market sentiment. """ - return AudioEmotionDetection(*args, asset_id=asset_id, pipeline=self, **kwargs) + return SentimentAnalysis(*args, asset_id=asset_id, pipeline=self, **kwargs) - def text_summarization(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> TextSummarization: + def speech_embedding(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> SpeechEmbedding: """ - Text summarization is the process of condensing a large body of text into a - shorter version, capturing the main points and essential information while - maintaining coherence and meaning. + Transforms spoken content into a fixed-size vector in a high-dimensional space +that captures the content's essence. Facilitates tasks like speech recognition +and speaker verification. """ - return TextSummarization(*args, asset_id=asset_id, pipeline=self, **kwargs) + return SpeechEmbedding(*args, asset_id=asset_id, pipeline=self, **kwargs) - def entity_linking(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> EntityLinking: + def text_generation_metric_default(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> TextGenerationMetricDefault: """ - Entity Linking is the process of identifying and connecting mentions of - entities within a text to their corresponding entries in a structured knowledge - base, thereby enabling the disambiguation of terms and enhancing the - understanding of the text's context. + The "Text Generation Metric Default" function provides a standard set of +evaluation metrics for assessing the quality and performance of text generation +models. """ - return EntityLinking(*args, asset_id=asset_id, pipeline=self, **kwargs) + return TextGenerationMetricDefault(*args, asset_id=asset_id, pipeline=self, **kwargs) - def text_generation_metric(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> TextGenerationMetric: + def audio_generation_metric(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> AudioGenerationMetric: """ - A Text Generation Metric is a quantitative measure used to evaluate the quality - and effectiveness of text produced by natural language processing models, often - assessing aspects such as coherence, relevance, fluency, and adherence to given - prompts or instructions. + The Audio Generation Metric is a quantitative measure used to evaluate the +quality, accuracy, and overall performance of audio generated by artificial +intelligence systems, often considering factors such as fidelity, +intelligibility, and similarity to human-produced audio. """ - return TextGenerationMetric(*args, asset_id=asset_id, pipeline=self, **kwargs) + return AudioGenerationMetric(*args, asset_id=asset_id, pipeline=self, **kwargs) - def split_on_linebreak(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> SplitOnLinebreak: + def audio_language_identification(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> AudioLanguageIdentification: """ - The "Split On Linebreak" function divides a given string into a list of - substrings, using linebreaks (newline characters) as the points of separation. + Audio Language Identification is a process that involves analyzing an audio +recording to determine the language being spoken. """ - return SplitOnLinebreak(*args, asset_id=asset_id, pipeline=self, **kwargs) + return AudioLanguageIdentification(*args, asset_id=asset_id, pipeline=self, **kwargs) - def sentiment_analysis(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> SentimentAnalysis: + def video_label_detection(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> VideoLabelDetection: """ - Sentiment Analysis is a natural language processing technique used to determine - and classify the emotional tone or subjective information expressed in a piece - of text, such as identifying whether the sentiment is positive, negative, or - neutral. + Identifies and tags objects, scenes, or activities within a video. Useful for +content indexing and recommendation systems. """ - return SentimentAnalysis(*args, asset_id=asset_id, pipeline=self, **kwargs) + return VideoLabelDetection(*args, asset_id=asset_id, pipeline=self, **kwargs) - def keyword_spotting(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> KeywordSpotting: + def topic_classification(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> TopicClassification: """ - Keyword Spotting is a function that enables the detection and identification of - specific words or phrases within a stream of audio, often used in voice- - activated systems to trigger actions or commands based on recognized keywords. + Assigns categories or topics to a piece of text based on its content, +facilitating content organization and retrieval. """ - return KeywordSpotting(*args, asset_id=asset_id, pipeline=self, **kwargs) + return TopicClassification(*args, asset_id=asset_id, pipeline=self, **kwargs) - def text_classification(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> TextClassification: + def referenceless_text_generation_metric_default(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> ReferencelessTextGenerationMetricDefault: """ - Text Classification is a natural language processing task that involves - categorizing text into predefined labels or classes based on its content, - enabling automated organization, filtering, and analysis of large volumes of - textual data. + The Referenceless Text Generation Metric Default is a function designed to +evaluate the quality of generated text without relying on reference texts for +comparison. """ - return TextClassification(*args, asset_id=asset_id, pipeline=self, **kwargs) + return ReferencelessTextGenerationMetricDefault(*args, asset_id=asset_id, pipeline=self, **kwargs) - def other__multipurpose_(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> OtherMultipurpose: + def image_content_moderation(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> ImageContentModeration: """ - The "Other (Multipurpose)" function serves as a versatile category designed to - accommodate a wide range of tasks and activities that do not fit neatly into - predefined classifications, offering flexibility and adaptability for various - needs. + Detects and filters out inappropriate or harmful images, essential for +platforms with user-generated visual content. """ - return OtherMultipurpose(*args, asset_id=asset_id, pipeline=self, **kwargs) + return ImageContentModeration(*args, asset_id=asset_id, pipeline=self, **kwargs) - def speech_synthesis(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> SpeechSynthesis: + def asr_age_classification(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> AsrAgeClassification: """ - Speech synthesis is the artificial production of human speech, typically - achieved through software or hardware systems that convert text into spoken - words, enabling machines to communicate verbally with users. + The ASR Age Classification function is designed to analyze audio recordings of +speech to determine the speaker's age group by leveraging automatic speech +recognition (ASR) technology and machine learning algorithms. """ - return SpeechSynthesis(*args, asset_id=asset_id, pipeline=self, **kwargs) + return AsrAgeClassification(*args, asset_id=asset_id, pipeline=self, **kwargs) - def audio_intent_detection(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> AudioIntentDetection: + def asr_gender_classification(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> AsrGenderClassification: """ - Audio Intent Detection is a process that involves analyzing audio signals to - identify and interpret the underlying intentions or purposes behind spoken - words, enabling systems to understand and respond appropriately to human - speech. + The ASR Gender Classification function analyzes audio recordings to determine +and classify the speaker's gender based on their voice characteristics. """ - return AudioIntentDetection(*args, asset_id=asset_id, pipeline=self, **kwargs) + return AsrGenderClassification(*args, asset_id=asset_id, pipeline=self, **kwargs) - def video_label_detection(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> VideoLabelDetection: + def base_model(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> BaseModel: """ - Video Label Detection is a function that automatically identifies and tags - various objects, scenes, activities, and other relevant elements within a - video, providing descriptive labels that enhance searchability and content - organization. + The Base-Model function serves as a foundational framework designed to provide +essential features and capabilities upon which more specialized or advanced +models can be built and customized. """ - return VideoLabelDetection(*args, asset_id=asset_id, pipeline=self, **kwargs) + return BaseModel(*args, asset_id=asset_id, pipeline=self, **kwargs) - def asr_quality_estimation(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> AsrQualityEstimation: + def language_identification_audio(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> LanguageIdentificationAudio: """ - ASR Quality Estimation is a process that evaluates the accuracy and reliability - of automatic speech recognition systems by analyzing their performance in - transcribing spoken language into text. + The Language Identification Audio function analyzes audio input to determine +and identify the language being spoken. """ - return AsrQualityEstimation(*args, asset_id=asset_id, pipeline=self, **kwargs) + return LanguageIdentificationAudio(*args, asset_id=asset_id, pipeline=self, **kwargs) - def audio_transcript_analysis(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> AudioTranscriptAnalysis: + def multi_class_image_classification(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> MultiClassImageClassification: """ - Audio Transcript Analysis is a process that involves converting spoken language - from audio recordings into written text, followed by examining and interpreting - the transcribed content to extract meaningful insights, identify patterns, and - derive actionable information. + Multi Class Image Classification is a machine learning task where an algorithm +is trained to categorize images into one of several predefined classes or +categories based on their visual content. """ - return AudioTranscriptAnalysis(*args, asset_id=asset_id, pipeline=self, **kwargs) + return MultiClassImageClassification(*args, asset_id=asset_id, pipeline=self, **kwargs) - def search(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> Search: + def semantic_segmentation(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> SemanticSegmentation: """ - The "Search" function allows users to input keywords or phrases to quickly - locate specific information, files, or content within a database, website, or - application. + Semantic segmentation is a computer vision process that involves classifying +each pixel in an image into a predefined category, effectively partitioning the +image into meaningful segments based on the objects or regions they represent. """ - return Search(*args, asset_id=asset_id, pipeline=self, **kwargs) + return SemanticSegmentation(*args, asset_id=asset_id, pipeline=self, **kwargs) - def video_forced_alignment(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> VideoForcedAlignment: + def instance_segmentation(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> InstanceSegmentation: """ - Video Forced Alignment is a process that synchronizes video footage with - corresponding audio tracks by precisely aligning the visual and auditory - elements, ensuring that the movements of speakers' lips match the spoken words. + Instance segmentation is a computer vision task that involves detecting and +delineating each distinct object within an image, assigning a unique label and +precise boundary to every individual instance of objects, even if they belong +to the same category. """ - return VideoForcedAlignment(*args, asset_id=asset_id, pipeline=self, **kwargs) + return InstanceSegmentation(*args, asset_id=asset_id, pipeline=self, **kwargs) - def viseme_generation(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> VisemeGeneration: + def emotion_detection(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> EmotionDetection: """ - Viseme Generation is the process of creating visual representations of - phonemes, which are the distinct units of sound in speech, to synchronize lip - movements with spoken words in animations or virtual avatars. + Identifies human emotions from text or audio, enhancing user experience in +chatbots or customer feedback analysis. """ - return VisemeGeneration(*args, asset_id=asset_id, pipeline=self, **kwargs) + return EmotionDetection(*args, asset_id=asset_id, pipeline=self, **kwargs) - def topic_classification(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> TopicClassification: + def text_spam_detection(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> TextSpamDetection: """ - Topic Classification is a natural language processing function that categorizes - text into predefined topics or subjects based on its content, enabling - efficient organization and retrieval of information. + Identifies and filters out unwanted or irrelevant text content, ideal for +moderating user-generated content or ensuring quality in communication +platforms. """ - return TopicClassification(*args, asset_id=asset_id, pipeline=self, **kwargs) + return TextSpamDetection(*args, asset_id=asset_id, pipeline=self, **kwargs) - def offensive_language_identification( - self, asset_id: Union[str, asset.Asset], *args, **kwargs - ) -> OffensiveLanguageIdentification: + def text_denormalization(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> TextDenormalization: """ - Offensive Language Identification is a function that analyzes text to detect - and flag language that is abusive, harmful, or inappropriate, helping to - maintain a respectful and safe communication environment. + Converts standardized or normalized text into its original, often more +readable, form. Useful in natural language generation tasks. """ - return OffensiveLanguageIdentification(*args, asset_id=asset_id, pipeline=self, **kwargs) + return TextDenormalization(*args, asset_id=asset_id, pipeline=self, **kwargs) - def speech_translation(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> SpeechTranslation: + def referenceless_audio_generation_metric(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> ReferencelessAudioGenerationMetric: """ - Speech Translation is a technology that converts spoken language in real-time - from one language to another, enabling seamless communication between speakers - of different languages. + The Referenceless Audio Generation Metric is a tool designed to evaluate the +quality of generated audio content without the need for a reference or original +audio sample for comparison. """ - return SpeechTranslation(*args, asset_id=asset_id, pipeline=self, **kwargs) + return ReferencelessAudioGenerationMetric(*args, asset_id=asset_id, pipeline=self, **kwargs) - def speaker_diarization_audio(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> SpeakerDiarizationAudio: + def audio_forced_alignment(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> AudioForcedAlignment: """ - Speaker Diarization Audio is a process that involves segmenting an audio - recording into distinct sections, each corresponding to a different speaker, in - order to identify and differentiate between multiple speakers within the same - audio stream. + Synchronizes phonetic and phonological text with the corresponding segments in +an audio file. Useful in linguistic research and detailed transcription tasks. """ - return SpeakerDiarizationAudio(*args, asset_id=asset_id, pipeline=self, **kwargs) + return AudioForcedAlignment(*args, asset_id=asset_id, pipeline=self, **kwargs) - def audio_transcript_improvement(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> AudioTranscriptImprovement: + def video_forced_alignment(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> VideoForcedAlignment: """ - Audio Transcript Improvement is a function that enhances the accuracy and - clarity of transcribed audio recordings by correcting errors, refining - language, and ensuring the text faithfully represents the original spoken - content. + Aligns the transcription of spoken content in a video with its corresponding +timecodes, facilitating subtitle creation. """ - return AudioTranscriptImprovement(*args, asset_id=asset_id, pipeline=self, **kwargs) + return VideoForcedAlignment(*args, asset_id=asset_id, pipeline=self, **kwargs) - def speech_non_speech_classification( - self, asset_id: Union[str, asset.Asset], *args, **kwargs - ) -> SpeechNonSpeechClassification: + def classification_metric(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> ClassificationMetric: """ - The function "Speech or Non-Speech Classification" is designed to analyze audio - input and determine whether the sound is human speech or non-speech noise, - enabling applications such as voice recognition systems to filter out - irrelevant background sounds. + A Classification Metric is a quantitative measure used to evaluate the quality +and effectiveness of classification models. """ - return SpeechNonSpeechClassification(*args, asset_id=asset_id, pipeline=self, **kwargs) + return ClassificationMetric(*args, asset_id=asset_id, pipeline=self, **kwargs) - def text_denormalization(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> TextDenormalization: + def auto_mask_generation(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> AutoMaskGeneration: """ - Text Denormalization is the process of converting abbreviated, contracted, or - otherwise simplified text into its full, standard form, often to improve - readability and ensure consistency in natural language processing tasks. + Auto-mask generation refers to the automated process of creating masks in image +processing or computer vision, typically for segmentation tasks. A mask is a +binary or multi-class image that labels different parts of an image, usually +separating the foreground (objects of interest) from the background, or +identifying specific object classes in an image. """ - return TextDenormalization(*args, asset_id=asset_id, pipeline=self, **kwargs) + return AutoMaskGeneration(*args, asset_id=asset_id, pipeline=self, **kwargs) - def image_content_moderation(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> ImageContentModeration: + def text_embedding(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> TextEmbedding: """ - Image Content Moderation is a process that involves analyzing and filtering - images to detect and manage inappropriate, harmful, or sensitive content, - ensuring compliance with community guidelines and legal standards. + Text embedding is a process that converts text into numerical vectors, +capturing the semantic meaning and contextual relationships of words or +phrases, enabling machines to understand and analyze natural language more +effectively. """ - return ImageContentModeration(*args, asset_id=asset_id, pipeline=self, **kwargs) + return TextEmbedding(*args, asset_id=asset_id, pipeline=self, **kwargs) - def referenceless_text_generation_metric_default( - self, asset_id: Union[str, asset.Asset], *args, **kwargs - ) -> ReferencelessTextGenerationMetricDefault: + def fact_checking(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> FactChecking: """ - The Referenceless Text Generation Metric Default is a function designed to - evaluate the quality of generated text without relying on reference texts for - comparison. + Fact Checking is the process of verifying the accuracy and truthfulness of +information, statements, or claims by cross-referencing with reliable sources +and evidence. """ - return ReferencelessTextGenerationMetricDefault(*args, asset_id=asset_id, pipeline=self, **kwargs) + return FactChecking(*args, asset_id=asset_id, pipeline=self, **kwargs) - def named_entity_recognition(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> NamedEntityRecognition: + def text_to_audio(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> TextToAudio: """ - Named Entity Recognition (NER) is a natural language processing task that - involves identifying and classifying proper nouns in text into predefined - categories such as names of people, organizations, locations, dates, and other - entities. + The Text to Audio function converts written text into spoken words, allowing +users to listen to the content instead of reading it. """ - return NamedEntityRecognition(*args, asset_id=asset_id, pipeline=self, **kwargs) + return TextToAudio(*args, asset_id=asset_id, pipeline=self, **kwargs) - def text_content_moderation(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> TextContentModeration: + def fill_text_mask(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> FillTextMask: """ - Text Content Moderation is the process of reviewing, filtering, and managing - user-generated content to ensure it adheres to community guidelines, legal - standards, and platform policies, thereby maintaining a safe and respectful - online environment. + Completes missing parts of a text based on the context, ideal for content +generation or data augmentation tasks. """ - return TextContentModeration(*args, asset_id=asset_id, pipeline=self, **kwargs) + return FillTextMask(*args, asset_id=asset_id, pipeline=self, **kwargs) - def speaker_diarization_video(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> SpeakerDiarizationVideo: + def voice_cloning(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> VoiceCloning: """ - The Speaker Diarization Video function identifies and segments different - speakers in a video, attributing portions of the audio to individual speakers - to facilitate analysis and understanding of multi-speaker conversations. + Replicates a person's voice based on a sample, allowing for the generation of +speech in that person's tone and style. Used cautiously due to ethical +considerations. """ - return SpeakerDiarizationVideo(*args, asset_id=asset_id, pipeline=self, **kwargs) + return VoiceCloning(*args, asset_id=asset_id, pipeline=self, **kwargs) - def split_on_silence(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> SplitOnSilence: + def diacritization(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> Diacritization: """ - The "Split On Silence" function divides an audio recording into separate - segments based on periods of silence, allowing for easier editing and analysis - of individual sections. + Adds diacritical marks to text, essential for languages where meaning can +change based on diacritics. """ - return SplitOnSilence(*args, asset_id=asset_id, pipeline=self, **kwargs) + return Diacritization(*args, asset_id=asset_id, pipeline=self, **kwargs) - def emotion_detection(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> EmotionDetection: + def speech_translation(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> SpeechTranslation: """ - Emotion Detection is a process that involves analyzing text to identify and - categorize the emotional states or sentiments expressed by individuals, such as - happiness, sadness, anger, or fear. + Speech Translation is a technology that converts spoken language in real-time +from one language to another, enabling seamless communication between speakers +of different languages. """ - return EmotionDetection(*args, asset_id=asset_id, pipeline=self, **kwargs) + return SpeechTranslation(*args, asset_id=asset_id, pipeline=self, **kwargs) - def text_spam_detection(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> TextSpamDetection: + def speech_synthesis(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> SpeechSynthesis: """ - Text Spam Detection is a process that involves analyzing and identifying - unsolicited or irrelevant messages within text communications, typically using - algorithms and machine learning techniques to filter out spam and ensure the - integrity of the communication platform. + Generates human-like speech from written text. Ideal for text-to-speech +applications, audiobooks, and voice assistants. """ - return TextSpamDetection(*args, asset_id=asset_id, pipeline=self, **kwargs) + return SpeechSynthesis(*args, asset_id=asset_id, pipeline=self, **kwargs) - def translation(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> Translation: + def text_content_moderation(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> TextContentModeration: """ - Translation is the process of converting text from one language into an - equivalent text in another language, preserving the original meaning and - context. + Scans and identifies potentially harmful, offensive, or inappropriate textual +content, ensuring safer user environments. """ - return Translation(*args, asset_id=asset_id, pipeline=self, **kwargs) + return TextContentModeration(*args, asset_id=asset_id, pipeline=self, **kwargs) - def voice_activity_detection(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> VoiceActivityDetection: + def subtitling_translation(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> SubtitlingTranslation: """ - Voice Activity Detection (VAD) is a technology that identifies the presence or - absence of human speech within an audio signal, enabling systems to distinguish - between spoken words and background noise. + Converts the text of subtitles from one language to another, ensuring context +and cultural nuances are maintained. Essential for global content distribution. """ - return VoiceActivityDetection(*args, asset_id=asset_id, pipeline=self, **kwargs) + return SubtitlingTranslation(*args, asset_id=asset_id, pipeline=self, **kwargs) - def speech_embedding(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> SpeechEmbedding: + def audio_transcript_analysis(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> AudioTranscriptAnalysis: """ - Speech Embedding is a process that transforms spoken language into a fixed- - dimensional vector representation, capturing essential features and - characteristics of the speech for tasks such as recognition, classification, - and analysis. + Analyzes transcribed audio data for insights, patterns, or specific information +extraction. """ - return SpeechEmbedding(*args, asset_id=asset_id, pipeline=self, **kwargs) + return AudioTranscriptAnalysis(*args, asset_id=asset_id, pipeline=self, **kwargs) - def subtitling_translation(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> SubtitlingTranslation: + def text_generation(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> TextGeneration: """ - Subtitling Translation is the process of converting spoken dialogue from one - language into written text in another language, which is then displayed on- - screen to aid viewers in understanding the content. + Creates coherent and contextually relevant textual content based on prompts or +certain parameters. Useful for chatbots, content creation, and data +augmentation. """ - return SubtitlingTranslation(*args, asset_id=asset_id, pipeline=self, **kwargs) + return TextGeneration(*args, asset_id=asset_id, pipeline=self, **kwargs) - def text_generation(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> TextGeneration: + def text_normalization(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> TextNormalization: """ - Text Generation is a process in which artificial intelligence models, such as - neural networks, produce coherent and contextually relevant text based on a - given input or prompt, often mimicking human writing styles and patterns. + Converts unstructured or non-standard textual data into a more readable and +uniform format, dealing with abbreviations, numerals, and other non-standard +words. """ - return TextGeneration(*args, asset_id=asset_id, pipeline=self, **kwargs) + return TextNormalization(*args, asset_id=asset_id, pipeline=self, **kwargs) - def video_understanding(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> VideoUnderstanding: + def voice_activity_detection(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> VoiceActivityDetection: """ - Video Understanding is the process of analyzing and interpreting video content - to extract meaningful information, such as identifying objects, actions, - events, and contextual relationships within the footage. + Determines when a person is speaking in an audio clip. It's an essential +preprocessing step for other audio-related tasks. """ - return VideoUnderstanding(*args, asset_id=asset_id, pipeline=self, **kwargs) + return VoiceActivityDetection(*args, asset_id=asset_id, pipeline=self, **kwargs) - def text_to_video_generation(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> TextToVideoGeneration: + def video_understanding(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> VideoUnderstanding: """ - Text To Video Generation is a process that converts written descriptions or - scripts into dynamic, visual video content using advanced algorithms and - artificial intelligence. + Video Understanding is the process of analyzing and interpreting video content +to extract meaningful information, such as identifying objects, actions, +events, and contextual relationships within the footage. """ - return TextToVideoGeneration(*args, asset_id=asset_id, pipeline=self, **kwargs) + return VideoUnderstanding(*args, asset_id=asset_id, pipeline=self, **kwargs) - def text_normalization(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> TextNormalization: + def translation(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> Translation: """ - Text normalization is the process of transforming text into a standard, - consistent format by correcting spelling errors, converting all characters to a - uniform case, removing punctuation, and expanding abbreviations to improve the - text's readability and usability for further processing or analysis. + Converts text from one language to another while maintaining the original +message's essence and context. Crucial for global communication. """ - return TextNormalization(*args, asset_id=asset_id, pipeline=self, **kwargs) + return Translation(*args, asset_id=asset_id, pipeline=self, **kwargs) def speech_recognition(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> SpeechRecognition: """ - Speech recognition is a technology that enables a computer or device to - identify and process spoken language, converting it into text. + Converts spoken language into written text. Useful for transcription services, +voice assistants, and applications requiring voice-to-text capabilities. """ return SpeechRecognition(*args, asset_id=asset_id, pipeline=self, **kwargs) def subtitling(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> Subtitling: """ - Subtitling is the process of displaying written text on a screen to represent - the spoken dialogue, narration, or other audio elements in a video, typically - to aid viewers who are deaf or hard of hearing, or to provide translations for - audiences who speak different languages. + Generates accurate subtitles for videos, enhancing accessibility for diverse +audiences. """ return Subtitling(*args, asset_id=asset_id, pipeline=self, **kwargs) - def classification_metric(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> ClassificationMetric: - """ - A Classification Metric is a quantitative measure used to evaluate the quality - and effectiveness of classification models. - """ - return ClassificationMetric(*args, asset_id=asset_id, pipeline=self, **kwargs) - - def text_to_image_generation(self, asset_id: Union[str, asset.Asset], *args, **kwargs) -> TextToImageGeneration: - """ - Text To Image Generation is a process where a system creates visual images - based on descriptive text input, translating written language into - corresponding graphical representations. - """ - return TextToImageGeneration(*args, asset_id=asset_id, pipeline=self, **kwargs) From 4096c7f98f48366eedcc762626721627b90196fe Mon Sep 17 00:00:00 2001 From: Kadir Pekel Date: Thu, 19 Dec 2024 18:21:07 +0100 Subject: [PATCH 4/5] ENG-1235 Only input params are taken from node spec for utility functions --- aixplain/modules/pipeline/designer/nodes.py | 19 ++++++++----------- 1 file changed, 8 insertions(+), 11 deletions(-) diff --git a/aixplain/modules/pipeline/designer/nodes.py b/aixplain/modules/pipeline/designer/nodes.py index 8be8350b..7e6e1803 100644 --- a/aixplain/modules/pipeline/designer/nodes.py +++ b/aixplain/modules/pipeline/designer/nodes.py @@ -97,7 +97,9 @@ def populate_asset(self): def _auto_populate_params(self): from aixplain.enums.function import FunctionInputOutput - # When the node is a utility, we need to create it's parameters + spec = FunctionInputOutput[self.function]["spec"] + + # When the node is a utility, we need to create it's input parameters # dynamically by referring the node data. if self.function == Function.UTILITIES: for param in self.asset.input_params.values(): @@ -106,12 +108,7 @@ def _auto_populate_params(self): data_type=param["dataType"], is_required=param["required"], ) - for param in self.asset.output_params.values(): - self.outputs.create_param( - code=param["code"], data_type=param["dataType"] - ) else: - spec = FunctionInputOutput[self.function]["spec"] for item in spec["params"]: self.inputs.create_param( code=item["code"], @@ -119,11 +116,11 @@ def _auto_populate_params(self): is_required=item["required"], ) - for item in spec["output"]: - self.outputs.create_param( - code=item["code"], - data_type=item["dataType"], - ) + for item in spec["output"]: + self.outputs.create_param( + code=item["code"], + data_type=item["dataType"], + ) def _auto_set_params(self): for k, v in self.asset.additional_info["parameters"].items(): From 6087fe934b1b4f24265c3a55b136452d4147627b Mon Sep 17 00:00:00 2001 From: Kadir Pekel Date: Thu, 19 Dec 2024 18:25:01 +0100 Subject: [PATCH 5/5] ENG-1235 minor --- aixplain/modules/pipeline/designer/pipeline.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/aixplain/modules/pipeline/designer/pipeline.py b/aixplain/modules/pipeline/designer/pipeline.py index ed12016c..58c46112 100644 --- a/aixplain/modules/pipeline/designer/pipeline.py +++ b/aixplain/modules/pipeline/designer/pipeline.py @@ -262,7 +262,7 @@ def utility( self, asset_id: str, *args, asset_class: Type[T] = Utility, **kwargs ) -> T: """ - Shortcut to create an asset node for the current pipeline. + Shortcut to create an utility nodes for the current pipeline. All params will be passed as keyword arguments to the node constructor.