You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
When i try to use a dict of column in setPredefinedTypes(column_dict) pydeequ send "An error occurred while calling o97.run. java.lang.String cannot be cast to scala.Enumeration$Value"
dict example = {'code': 'string', 'filler': 'int'}
Deequ version: 1.0.3
Pydeequ: 0.1.7
.setKLLParameters(KLLParameters(spark, 2, 0.64, 4))
File "/tmp/pydeequ.zip/pydeequ/profiles.py", line 103, in run
run = self._ColumnProfilerRunBuilder.run()
File "/opt/amazon/spark/python/lib/py4j-0.10.7-src.zip/py4j/java_gateway.py", line 1257, in call
answer, self.gateway_client, self.target_id, self.name)
File "/opt/amazon/spark/python/lib/pyspark.zip/pyspark/sql/utils.py", line 63, in deco
return f(*a, **kw)
File "/opt/amazon/spark/python/lib/py4j-0.10.7-src.zip/py4j/protocol.py", line 328, in get_return_value
format(target_id, ".", name), value)
py4j.protocol.Py4JJavaError: An error occurred while calling o97.run.
: java.lang.ClassCastException: java.lang.String cannot be cast to scala.Enumeration$Value
at com.amazon.deequ.profiles.GenericColumnStatistics.typeOf(ColumnProfiler.scala:41)
at com.amazon.deequ.profiles.ColumnProfiler$$anonfun$castNumericStringColumns$1.apply(ColumnProfiler.scala:454)
at com.amazon.deequ.profiles.ColumnProfiler$$anonfun$castNumericStringColumns$1.apply(ColumnProfiler.scala:452)
at scala.collection.mutable.ArraySeq.foreach(ArraySeq.scala:74)
at com.amazon.deequ.profiles.ColumnProfiler$.castNumericStringColumns(ColumnProfiler.scala:452)
at com.amazon.deequ.profiles.ColumnProfiler$.profile(ColumnProfiler.scala:155)
at com.amazon.deequ.profiles.ColumnProfilerRunner.run(ColumnProfilerRunner.scala:61)
at com.amazon.deequ.profiles.ColumnProfilerRunBuilder.run(ColumnProfilerRunBuilder.scala:168)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)
at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)
at py4j.Gateway.invoke(Gateway.java:282)
at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)
at py4j.commands.CallCommand.execute(CallCommand.java:79)
at py4j.GatewayConnection.run(GatewayConnection.java:238)
at java.lang.Thread.run(Thread.java:748)
The text was updated successfully, but these errors were encountered:
Hi @costalauro! The method setDefinedTypes is now implemented and is used to baseline the dataType for the columns.
An an example of the dictionary would be: {'columnName': DataTypeInstances.String} . Where the key references the column name and the value references the DataTypeInstances class. Try it out and let me know if you have any questions!
When i try to use a dict of column in setPredefinedTypes(column_dict) pydeequ send "An error occurred while calling o97.run. java.lang.String cannot be cast to scala.Enumeration$Value"
dict example = {'code': 'string', 'filler': 'int'}
Deequ version: 1.0.3
Pydeequ: 0.1.7
.setKLLParameters(KLLParameters(spark, 2, 0.64, 4))
File "/tmp/pydeequ.zip/pydeequ/profiles.py", line 103, in run
run = self._ColumnProfilerRunBuilder.run()
File "/opt/amazon/spark/python/lib/py4j-0.10.7-src.zip/py4j/java_gateway.py", line 1257, in call
answer, self.gateway_client, self.target_id, self.name)
File "/opt/amazon/spark/python/lib/pyspark.zip/pyspark/sql/utils.py", line 63, in deco
return f(*a, **kw)
File "/opt/amazon/spark/python/lib/py4j-0.10.7-src.zip/py4j/protocol.py", line 328, in get_return_value
format(target_id, ".", name), value)
py4j.protocol.Py4JJavaError: An error occurred while calling o97.run.
: java.lang.ClassCastException: java.lang.String cannot be cast to scala.Enumeration$Value
at com.amazon.deequ.profiles.GenericColumnStatistics.typeOf(ColumnProfiler.scala:41)
at com.amazon.deequ.profiles.ColumnProfiler$$anonfun$castNumericStringColumns$1.apply(ColumnProfiler.scala:454)
at com.amazon.deequ.profiles.ColumnProfiler$$anonfun$castNumericStringColumns$1.apply(ColumnProfiler.scala:452)
at scala.collection.mutable.ArraySeq.foreach(ArraySeq.scala:74)
at com.amazon.deequ.profiles.ColumnProfiler$.castNumericStringColumns(ColumnProfiler.scala:452)
at com.amazon.deequ.profiles.ColumnProfiler$.profile(ColumnProfiler.scala:155)
at com.amazon.deequ.profiles.ColumnProfilerRunner.run(ColumnProfilerRunner.scala:61)
at com.amazon.deequ.profiles.ColumnProfilerRunBuilder.run(ColumnProfilerRunBuilder.scala:168)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)
at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)
at py4j.Gateway.invoke(Gateway.java:282)
at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)
at py4j.commands.CallCommand.execute(CallCommand.java:79)
at py4j.GatewayConnection.run(GatewayConnection.java:238)
at java.lang.Thread.run(Thread.java:748)
The text was updated successfully, but these errors were encountered: