Skip to content

Crypto Function Migration #12840

New issue

Have a question about this project? # for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “#”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? # to your account

Merged
merged 3 commits into from
Oct 10, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
48 changes: 47 additions & 1 deletion datafusion/functions/src/crypto/digest.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,10 +19,12 @@
use super::basic::{digest, utf8_or_binary_to_binary_type};
use arrow::datatypes::DataType;
use datafusion_common::Result;
use datafusion_expr::scalar_doc_sections::DOC_SECTION_HASHING;
use datafusion_expr::{
ColumnarValue, ScalarUDFImpl, Signature, TypeSignature::*, Volatility,
ColumnarValue, Documentation, ScalarUDFImpl, Signature, TypeSignature::*, Volatility,
};
use std::any::Any;
use std::sync::OnceLock;

#[derive(Debug)]
pub struct DigestFunc {
Expand Down Expand Up @@ -69,4 +71,48 @@ impl ScalarUDFImpl for DigestFunc {
fn invoke(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> {
digest(args)
}

fn documentation(&self) -> Option<&Documentation> {
Some(get_digest_doc())
}
}

static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();

fn get_digest_doc() -> &'static Documentation {
DOCUMENTATION.get_or_init(|| {
Documentation::builder()
.with_doc_section(DOC_SECTION_HASHING)
.with_description(
"Computes the binary hash of an expression using the specified algorithm.",
)
.with_syntax_example("digest(expression, algorithm)")
.with_sql_example(
r#"```sql
> select digest('foo', 'sha256');
+------------------------------------------+
| digest(Utf8("foo"), Utf8("sha256")) |
+------------------------------------------+
| <binary_hash_result> |
+------------------------------------------+
```"#,
)
.with_standard_argument(
"expression", "String")
.with_argument(
"algorithm",
"String expression specifying algorithm to use. Must be one of:

- md5
- sha224
- sha256
- sha384
- sha512
- blake2s
- blake2b
- blake3",
)
.build()
.unwrap()
})
}
34 changes: 33 additions & 1 deletion datafusion/functions/src/crypto/md5.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,12 @@
use crate::crypto::basic::md5;
use arrow::datatypes::DataType;
use datafusion_common::{plan_err, Result};
use datafusion_expr::{ColumnarValue, ScalarUDFImpl, Signature, Volatility};
use datafusion_expr::scalar_doc_sections::DOC_SECTION_HASHING;
use datafusion_expr::{
ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility,
};
use std::any::Any;
use std::sync::OnceLock;

#[derive(Debug)]
pub struct Md5Func {
Expand Down Expand Up @@ -84,4 +88,32 @@ impl ScalarUDFImpl for Md5Func {
fn invoke(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> {
md5(args)
}

fn documentation(&self) -> Option<&Documentation> {
Some(get_md5_doc())
}
}

static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();

fn get_md5_doc() -> &'static Documentation {
DOCUMENTATION.get_or_init(|| {
Documentation::builder()
.with_doc_section(DOC_SECTION_HASHING)
.with_description("Computes an MD5 128-bit checksum for a string expression.")
.with_syntax_example("md5(expression)")
.with_sql_example(
r#"```sql
> select md5('foo');
+-------------------------------------+
| md5(Utf8("foo")) |
+-------------------------------------+
| <md5_checksum_result> |
+-------------------------------------+
```"#,
)
.with_standard_argument("expression", "String")
.build()
.unwrap()
})
}
10 changes: 10 additions & 0 deletions datafusion/functions/src/crypto/sha224.rs
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,16 @@ fn get_sha224_doc() -> &'static Documentation {
.with_doc_section(DOC_SECTION_HASHING)
.with_description("Computes the SHA-224 hash of a binary string.")
.with_syntax_example("sha224(expression)")
.with_sql_example(
r#"```sql
> select sha224('foo');
+------------------------------------------+
| sha224(Utf8("foo")) |
+------------------------------------------+
| <sha224_hash_result> |
+------------------------------------------+
```"#,
)
.with_standard_argument("expression", "String")
.build()
.unwrap()
Expand Down
35 changes: 34 additions & 1 deletion datafusion/functions/src/crypto/sha256.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,12 @@
use super::basic::{sha256, utf8_or_binary_to_binary_type};
use arrow::datatypes::DataType;
use datafusion_common::Result;
use datafusion_expr::{ColumnarValue, ScalarUDFImpl, Signature, Volatility};
use datafusion_expr::scalar_doc_sections::DOC_SECTION_HASHING;
use datafusion_expr::{
ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility,
};
use std::any::Any;
use std::sync::OnceLock;

#[derive(Debug)]
pub struct SHA256Func {
Expand Down Expand Up @@ -60,7 +64,36 @@ impl ScalarUDFImpl for SHA256Func {
fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
utf8_or_binary_to_binary_type(&arg_types[0], self.name())
}

fn invoke(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> {
sha256(args)
}

fn documentation(&self) -> Option<&Documentation> {
Some(get_sha256_doc())
}
}

static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();

fn get_sha256_doc() -> &'static Documentation {
DOCUMENTATION.get_or_init(|| {
Documentation::builder()
.with_doc_section(DOC_SECTION_HASHING)
.with_description("Computes the SHA-256 hash of a binary string.")
.with_syntax_example("sha256(expression)")
.with_sql_example(
r#"```sql
> select sha256('foo');
+--------------------------------------+
| sha256(Utf8("foo")) |
+--------------------------------------+
| <sha256_hash_result> |
+--------------------------------------+
```"#,
)
.with_standard_argument("expression", "String")
.build()
.unwrap()
})
}
35 changes: 34 additions & 1 deletion datafusion/functions/src/crypto/sha384.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,12 @@
use super::basic::{sha384, utf8_or_binary_to_binary_type};
use arrow::datatypes::DataType;
use datafusion_common::Result;
use datafusion_expr::{ColumnarValue, ScalarUDFImpl, Signature, Volatility};
use datafusion_expr::scalar_doc_sections::DOC_SECTION_HASHING;
use datafusion_expr::{
ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility,
};
use std::any::Any;
use std::sync::OnceLock;

#[derive(Debug)]
pub struct SHA384Func {
Expand Down Expand Up @@ -60,7 +64,36 @@ impl ScalarUDFImpl for SHA384Func {
fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
utf8_or_binary_to_binary_type(&arg_types[0], self.name())
}

fn invoke(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> {
sha384(args)
}

fn documentation(&self) -> Option<&Documentation> {
Some(get_sha384_doc())
}
}

static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();

fn get_sha384_doc() -> &'static Documentation {
DOCUMENTATION.get_or_init(|| {
Documentation::builder()
.with_doc_section(DOC_SECTION_HASHING)
.with_description("Computes the SHA-384 hash of a binary string.")
.with_syntax_example("sha384(expression)")
.with_sql_example(
r#"```sql
> select sha384('foo');
+-----------------------------------------+
| sha384(Utf8("foo")) |
+-----------------------------------------+
| <sha384_hash_result> |
+-----------------------------------------+
```"#,
)
.with_standard_argument("expression", "String")
.build()
.unwrap()
})
}
35 changes: 34 additions & 1 deletion datafusion/functions/src/crypto/sha512.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,12 @@
use super::basic::{sha512, utf8_or_binary_to_binary_type};
use arrow::datatypes::DataType;
use datafusion_common::Result;
use datafusion_expr::{ColumnarValue, ScalarUDFImpl, Signature, Volatility};
use datafusion_expr::scalar_doc_sections::DOC_SECTION_HASHING;
use datafusion_expr::{
ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility,
};
use std::any::Any;
use std::sync::OnceLock;

#[derive(Debug)]
pub struct SHA512Func {
Expand Down Expand Up @@ -60,7 +64,36 @@ impl ScalarUDFImpl for SHA512Func {
fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
utf8_or_binary_to_binary_type(&arg_types[0], self.name())
}

fn invoke(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> {
sha512(args)
}

fn documentation(&self) -> Option<&Documentation> {
Some(get_sha512_doc())
}
}

static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();

fn get_sha512_doc() -> &'static Documentation {
DOCUMENTATION.get_or_init(|| {
Documentation::builder()
.with_doc_section(DOC_SECTION_HASHING)
.with_description("Computes the SHA-512 hash of a binary string.")
.with_syntax_example("sha512(expression)")
.with_sql_example(
r#"```sql
> select sha512('foo');
+-------------------------------------------+
| sha512(Utf8("foo")) |
+-------------------------------------------+
| <sha512_hash_result> |
+-------------------------------------------+
```"#,
)
.with_argument("expression", "String")
.build()
.unwrap()
})
}
84 changes: 0 additions & 84 deletions docs/source/user-guide/sql/scalar_functions.md
Original file line number Diff line number Diff line change
Expand Up @@ -3058,90 +3058,6 @@ select map_values(map([100, 5], [42,43]));
[42, 43]
```

## Hashing Functions

- [digest](#digest)
- [md5](#md5)
- [sha256](#sha256)
- [sha384](#sha384)
- [sha512](#sha512)

### `digest`

Computes the binary hash of an expression using the specified algorithm.

```
digest(expression, algorithm)
```

#### Arguments

- **expression**: String expression to operate on.
Can be a constant, column, or function, and any combination of string operators.
- **algorithm**: String expression specifying algorithm to use.
Must be one of:

- md5
- sha224
- sha256
- sha384
- sha512
- blake2s
- blake2b
- blake3

### `md5`

Computes an MD5 128-bit checksum for a string expression.

```
md5(expression)
```

#### Arguments

- **expression**: String expression to operate on.
Can be a constant, column, or function, and any combination of string operators.

### `sha256`

Computes the SHA-256 hash of a binary string.

```
sha256(expression)
```

#### Arguments

- **expression**: String expression to operate on.
Can be a constant, column, or function, and any combination of string operators.

### `sha384`

Computes the SHA-384 hash of a binary string.

```
sha384(expression)
```

#### Arguments

- **expression**: String expression to operate on.
Can be a constant, column, or function, and any combination of string operators.

### `sha512`

Computes the SHA-512 hash of a binary string.

```
sha512(expression)
```

#### Arguments

- **expression**: String expression to operate on.
Can be a constant, column, or function, and any combination of string operators.

## Other Functions

- [arrow_cast](#arrow_cast)
Expand Down
Loading
Loading