|
15 | 15 | # specific language governing permissions and limitations
|
16 | 16 | # under the License.
|
17 | 17 |
|
18 |
| -from abc import ABCMeta, abstractmethod |
19 |
| -from typing import List |
| 18 | +"""DataFusion python package. |
| 19 | +
|
| 20 | +This is a Python library that binds to Apache Arrow in-memory query engine DataFusion. |
| 21 | +See https://datafusion.apache.org/python for more information. |
| 22 | +""" |
20 | 23 |
|
21 | 24 | try:
|
22 | 25 | import importlib.metadata as importlib_metadata
|
23 | 26 | except ImportError:
|
24 | 27 | import importlib_metadata
|
25 | 28 |
|
26 |
| -import pyarrow as pa |
27 |
| - |
28 |
| -from ._internal import ( |
29 |
| - AggregateUDF, |
30 |
| - Config, |
31 |
| - DataFrame, |
| 29 | +from .context import ( |
32 | 30 | SessionContext,
|
33 | 31 | SessionConfig,
|
34 | 32 | RuntimeConfig,
|
35 |
| - ScalarUDF, |
36 | 33 | SQLOptions,
|
37 | 34 | )
|
38 | 35 |
|
| 36 | +# The following imports are okay to remain as opaque to the user. |
| 37 | +from ._internal import Config |
| 38 | + |
| 39 | +from .udf import ScalarUDF, AggregateUDF, Accumulator |
| 40 | + |
39 | 41 | from .common import (
|
40 | 42 | DFSchema,
|
41 | 43 | )
|
42 | 44 |
|
| 45 | +from .dataframe import DataFrame |
| 46 | + |
43 | 47 | from .expr import (
|
44 |
| - Alias, |
45 |
| - Analyze, |
46 | 48 | Expr,
|
47 |
| - Filter, |
48 |
| - Limit, |
49 |
| - Like, |
50 |
| - ILike, |
51 |
| - Projection, |
52 |
| - SimilarTo, |
53 |
| - ScalarVariable, |
54 |
| - Sort, |
55 |
| - TableScan, |
56 |
| - Not, |
57 |
| - IsNotNull, |
58 |
| - IsTrue, |
59 |
| - IsFalse, |
60 |
| - IsUnknown, |
61 |
| - IsNotTrue, |
62 |
| - IsNotFalse, |
63 |
| - IsNotUnknown, |
64 |
| - Negative, |
65 |
| - InList, |
66 |
| - Exists, |
67 |
| - Subquery, |
68 |
| - InSubquery, |
69 |
| - ScalarSubquery, |
70 |
| - GroupingSet, |
71 |
| - Placeholder, |
72 |
| - Case, |
73 |
| - Cast, |
74 |
| - TryCast, |
75 |
| - Between, |
76 |
| - Explain, |
77 |
| - CreateMemoryTable, |
78 |
| - SubqueryAlias, |
79 |
| - Extension, |
80 |
| - CreateView, |
81 |
| - Distinct, |
82 |
| - DropTable, |
83 |
| - Repartition, |
84 |
| - Partitioning, |
85 |
| - Window, |
86 | 49 | WindowFrame,
|
87 | 50 | )
|
88 | 51 |
|
89 | 52 | __version__ = importlib_metadata.version(__name__)
|
90 | 53 |
|
91 | 54 | __all__ = [
|
| 55 | + "Accumulator", |
92 | 56 | "Config",
|
93 | 57 | "DataFrame",
|
94 | 58 | "SessionContext",
|
95 | 59 | "SessionConfig",
|
96 | 60 | "SQLOptions",
|
97 | 61 | "RuntimeConfig",
|
98 | 62 | "Expr",
|
99 |
| - "AggregateUDF", |
100 | 63 | "ScalarUDF",
|
101 |
| - "Window", |
102 | 64 | "WindowFrame",
|
103 | 65 | "column",
|
104 | 66 | "literal",
|
105 |
| - "TableScan", |
106 |
| - "Projection", |
107 | 67 | "DFSchema",
|
108 |
| - "DFField", |
109 |
| - "Analyze", |
110 |
| - "Sort", |
111 |
| - "Limit", |
112 |
| - "Filter", |
113 |
| - "Like", |
114 |
| - "ILike", |
115 |
| - "SimilarTo", |
116 |
| - "ScalarVariable", |
117 |
| - "Alias", |
118 |
| - "Not", |
119 |
| - "IsNotNull", |
120 |
| - "IsTrue", |
121 |
| - "IsFalse", |
122 |
| - "IsUnknown", |
123 |
| - "IsNotTrue", |
124 |
| - "IsNotFalse", |
125 |
| - "IsNotUnknown", |
126 |
| - "Negative", |
127 |
| - "ScalarFunction", |
128 |
| - "BuiltinScalarFunction", |
129 |
| - "InList", |
130 |
| - "Exists", |
131 |
| - "Subquery", |
132 |
| - "InSubquery", |
133 |
| - "ScalarSubquery", |
134 |
| - "GroupingSet", |
135 |
| - "Placeholder", |
136 |
| - "Case", |
137 |
| - "Cast", |
138 |
| - "TryCast", |
139 |
| - "Between", |
140 |
| - "Explain", |
141 |
| - "SubqueryAlias", |
142 |
| - "Extension", |
143 |
| - "CreateMemoryTable", |
144 |
| - "CreateView", |
145 |
| - "Distinct", |
146 |
| - "DropTable", |
147 |
| - "Repartition", |
148 |
| - "Partitioning", |
149 | 68 | ]
|
150 | 69 |
|
151 | 70 |
|
152 |
| -class Accumulator(metaclass=ABCMeta): |
153 |
| - @abstractmethod |
154 |
| - def state(self) -> List[pa.Scalar]: |
155 |
| - pass |
156 |
| - |
157 |
| - @abstractmethod |
158 |
| - def update(self, values: pa.Array) -> None: |
159 |
| - pass |
160 |
| - |
161 |
| - @abstractmethod |
162 |
| - def merge(self, states: pa.Array) -> None: |
163 |
| - pass |
164 |
| - |
165 |
| - @abstractmethod |
166 |
| - def evaluate(self) -> pa.Scalar: |
167 |
| - pass |
168 |
| - |
169 |
| - |
170 |
| -def column(value): |
| 71 | +def column(value: str): |
| 72 | + """Create a column expression.""" |
171 | 73 | return Expr.column(value)
|
172 | 74 |
|
173 | 75 |
|
174 | 76 | col = column
|
175 | 77 |
|
176 | 78 |
|
177 | 79 | def literal(value):
|
178 |
| - if not isinstance(value, pa.Scalar): |
179 |
| - value = pa.scalar(value) |
| 80 | + """Create a literal expression.""" |
180 | 81 | return Expr.literal(value)
|
181 | 82 |
|
182 | 83 |
|
183 | 84 | lit = literal
|
184 | 85 |
|
| 86 | +udf = ScalarUDF.udf |
185 | 87 |
|
186 |
| -def udf(func, input_types, return_type, volatility, name=None): |
187 |
| - """ |
188 |
| - Create a new User Defined Function |
189 |
| - """ |
190 |
| - if not callable(func): |
191 |
| - raise TypeError("`func` argument must be callable") |
192 |
| - if name is None: |
193 |
| - name = func.__qualname__.lower() |
194 |
| - return ScalarUDF( |
195 |
| - name=name, |
196 |
| - func=func, |
197 |
| - input_types=input_types, |
198 |
| - return_type=return_type, |
199 |
| - volatility=volatility, |
200 |
| - ) |
201 |
| - |
202 |
| - |
203 |
| -def udaf(accum, input_type, return_type, state_type, volatility, name=None): |
204 |
| - """ |
205 |
| - Create a new User Defined Aggregate Function |
206 |
| - """ |
207 |
| - if not issubclass(accum, Accumulator): |
208 |
| - raise TypeError("`accum` must implement the abstract base class Accumulator") |
209 |
| - if name is None: |
210 |
| - name = accum.__qualname__.lower() |
211 |
| - if isinstance(input_type, pa.lib.DataType): |
212 |
| - input_type = [input_type] |
213 |
| - return AggregateUDF( |
214 |
| - name=name, |
215 |
| - accumulator=accum, |
216 |
| - input_type=input_type, |
217 |
| - return_type=return_type, |
218 |
| - state_type=state_type, |
219 |
| - volatility=volatility, |
220 |
| - ) |
| 88 | +udaf = AggregateUDF.udaf |
0 commit comments