You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
=================================== FAILURES ===================================
_____________________ test_regexp_extract_all_idx_positive _____________________
def test_regexp_extract_all_idx_positive():
gen = mk_str_gen('[abcd]{0,3}[0-9]{0,3}-[0-9]{0,3}[abcd]{1,3}')
> assert_gpu_and_cpu_are_equal_collect(
lambda spark: unary_op_df(spark, gen).selectExpr(
'regexp_extract_all(a, "([a-d]+).*([0-9])", 1)',
'regexp_extract_all(a, "(a)(b)", 2)',
'regexp_extract_all(a, "([a-z0-9]((([abcd](\\\\d?)))))", 3)',
'regexp_extract_all(a, "(\\\\d+)-(\\\\d+)", 2)',
),
conf=_regexp_conf)
../../src/main/python/regexp_test.py:818:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
../../src/main/python/asserts.py:595: in assert_gpu_and_cpu_are_equal_collect
_assert_gpu_and_cpu_are_equal(func, 'COLLECT', conf=conf, is_cpu_first=is_cpu_first, result_canonicalize_func_before_compare=result_canonicalize_func_before_compare)
../../src/main/python/asserts.py:503: in _assert_gpu_and_cpu_are_equal
from_gpu = run_on_gpu()
../../src/main/python/asserts.py:496: in run_on_gpu
from_gpu = with_gpu_session(bring_back, conf=conf)
../../src/main/python/spark_session.py:164: in with_gpu_session
return with_spark_session(func, conf=copy)
/opt/miniconda3/lib/python3.8/contextlib.py:75: in inner
return func(*args, **kwds)
../../src/main/python/spark_session.py:131: in with_spark_session
ret = func(_spark)
../../src/main/python/asserts.py:205: in <lambda>
bring_back = lambda spark: limit_func(spark).collect()
/var/lib/jenkins/spark/spark-3.3.0-bin-hadoop3.2/python/lib/pyspark.zip/pyspark/sql/dataframe.py:817: in collect
sock_info = self._jdf.collectToPython()
/var/lib/jenkins/spark/spark-3.3.0-bin-hadoop3.2/python/lib/py4j-0.10.9.5-src.zip/py4j/java_gateway.py:1321: in __call__
return_value = get_return_value(
/var/lib/jenkins/spark/spark-3.3.0-bin-hadoop3.2/python/lib/pyspark.zip/pyspark/sql/utils.py:190: in deco
return f(*a, **kw)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
answer = 'xro6619237'
gateway_client = <py4j.clientserver.JavaClient object at 0x7f89722fa460>
target_id = 'o6619236', name = 'collectToPython'
def get_return_value(answer, gateway_client, target_id=None, name=None):
"""Converts an answer received from the Java gateway into a Python object.
For example, string representation of integers are converted to Python
integer, string representation of objects are converted to JavaObject
instances, etc.
:param answer: the string returned by the Java gateway
:param gateway_client: the gateway client used to communicate with the Java
Gateway. Only necessary if the answer is a reference (e.g., object,
list, map)
:param target_id: the name of the object from which the answer comes from
(e.g., *object1* in `object1.hello()`). Optional.
:param name: the name of the member from which the answer comes from
(e.g., *hello* in `object1.hello()`). Optional.
"""
if is_error(answer)[0]:
if len(answer) > 1:
type = answer[1]
value = OUTPUT_CONVERTER[type](answer[2:], gateway_client)
if answer[1] == REFERENCE_TYPE:
> raise Py4JJavaError(
"An error occurred while calling {0}{1}{2}.\n".
format(target_id, ".", name), value)
E py4j.protocol.Py4JJavaError: An error occurred while calling o6619236.collectToPython.
E : org.apache.spark.SparkException: Job aborted due to stage failure: Task 0 in stage 102675.0 failed 1 times, most recent failure: Lost task 0.0 in stage 102675.0 (TID 2662435) (10.136.6.4 executor 0): java.lang.IllegalArgumentException: At least one column is needed to get the row count
E at ai.rapids.cudf.ColumnVector.makeList(ColumnVector.java:472)
E at org.apache.spark.sql.rapids.GpuRegExpExtractAll.$anonfun$doColumnar$71(stringFunctions.scala:1485)
E at com.nvidia.spark.rapids.Arm$.withResource(Arm.scala:48)
E at org.apache.spark.sql.rapids.GpuRegExpExtractAll.$anonfun$doColumnar$67(stringFunctions.scala:1484)
E at com.nvidia.spark.rapids.Arm$.withResource(Arm.scala:30)
E at org.apache.spark.sql.rapids.GpuRegExpExtractAll.$anonfun$doColumnar$66(stringFunctions.scala:1474)
E at com.nvidia.spark.rapids.Arm$.withResource(Arm.scala:30)
E at org.apache.spark.sql.rapids.GpuRegExpExtractAll.$anonfun$doColumnar$65(stringFunctions.scala:1473)
E at com.nvidia.spark.rapids.Arm$.withResource(Arm.scala:30)
E at org.apache.spark.sql.rapids.GpuRegExpExtractAll.doColumnar(stringFunctions.scala:1472)
E at com.nvidia.spark.rapids.GpuTernaryExpression.$anonfun$columnarEval$8(GpuExpressions.scala:496)
E at com.nvidia.spark.rapids.Arm$.withResourceIfAllowed(Arm.scala:84)
E at com.nvidia.spark.rapids.GpuTernaryExpression.$anonfun$columnarEval$7(GpuExpressions.scala:481)
E at com.nvidia.spark.rapids.Arm$.withResourceIfAllowed(Arm.scala:84)
E at com.nvidia.spark.rapids.GpuTernaryExpression.$anonfun$columnarEval$6(GpuExpressions.scala:480)
E at com.nvidia.spark.rapids.Arm$.withResourceIfAllowed(Arm.scala:84)
E at com.nvidia.spark.rapids.GpuTernaryExpression.columnarEval(GpuExpressions.scala:479)
E at com.nvidia.spark.rapids.GpuTernaryExpression.columnarEval$(GpuExpressions.scala:477)
E at org.apache.spark.sql.rapids.GpuRegExpTernaryBase.columnarEval(stringFunctions.scala:1112)
E at com.nvidia.spark.rapids.RapidsPluginImplicits$ReallyAGpuExpression.columnarEval(implicits.scala:35)
E at com.nvidia.spark.rapids.GpuAlias.columnarEval(namedExpressions.scala:110)
E at com.nvidia.spark.rapids.RapidsPluginImplicits$ReallyAGpuExpression.columnarEval(implicits.scala:35)
E at com.nvidia.spark.rapids.GpuProjectExec$.$anonfun$project$1(basicPhysicalOperators.scala:110)
E at com.nvidia.spark.rapids.RapidsPluginImplicits$MapsSafely.$anonfun$safeMap$1(implicits.scala:221)
E at com.nvidia.spark.rapids.RapidsPluginImplicits$MapsSafely.$anonfun$safeMap$1$adapted(implicits.scala:218)
E at scala.collection.immutable.List.foreach(List.scala:431)
E at com.nvidia.spark.rapids.RapidsPluginImplicits$MapsSafely.safeMap(implicits.scala:218)
E at com.nvidia.spark.rapids.RapidsPluginImplicits$AutoCloseableProducingSeq.safeMap(implicits.scala:253)
E at com.nvidia.spark.rapids.GpuProjectExec$.project(basicPhysicalOperators.scala:110)
E at com.nvidia.spark.rapids.GpuTieredProject.$anonfun$project$2(basicPhysicalOperators.scala:619)
E at com.nvidia.spark.rapids.Arm$.withResource(Arm.scala:30)
E at com.nvidia.spark.rapids.GpuTieredProject.recurse$2(basicPhysicalOperators.scala:618)
E at com.nvidia.spark.rapids.GpuTieredProject.project(basicPhysicalOperators.scala:631)
E at com.nvidia.spark.rapids.GpuTieredProject.$anonfun$projectWithRetrySingleBatchInternal$5(basicPhysicalOperators.scala:567)
E at com.nvidia.spark.rapids.RmmRapidsRetryIterator$.withRestoreOnRetry(RmmRapidsRetryIterator.scala:272)
E at com.nvidia.spark.rapids.GpuTieredProject.$anonfun$projectWithRetrySingleBatchInternal$4(basicPhysicalOperators.scala:567)
E at com.nvidia.spark.rapids.Arm$.withResource(Arm.scala:30)
E at com.nvidia.spark.rapids.GpuTieredProject.$anonfun$projectWithRetrySingleBatchInternal$3(basicPhysicalOperators.scala:565)
E at com.nvidia.spark.rapids.RmmRapidsRetryIterator$NoInputSpliterator.next(RmmRapidsRetryIterator.scala:395)
E at com.nvidia.spark.rapids.RmmRapidsRetryIterator$RmmRapidsRetryIterator.next(RmmRapidsRetryIterator.scala:613)
E at com.nvidia.spark.rapids.RmmRapidsRetryIterator$RmmRapidsRetryAutoCloseableIterator.next(RmmRapidsRetryIterator.scala:517)
E at com.nvidia.spark.rapids.RmmRapidsRetryIterator$.drainSingleWithVerification(RmmRapidsRetryIterator.scala:291)
E at com.nvidia.spark.rapids.RmmRapidsRetryIterator$.withRetryNoSplit(RmmRapidsRetryIterator.scala:185)
E at com.nvidia.spark.rapids.GpuTieredProject.$anonfun$projectWithRetrySingleBatchInternal$1(basicPhysicalOperators.scala:565)
E at com.nvidia.spark.rapids.Arm$.withResource(Arm.scala:39)
E at com.nvidia.spark.rapids.GpuTieredProject.projectWithRetrySingleBatchInternal(basicPhysicalOperators.scala:562)
E at com.nvidia.spark.rapids.GpuTieredProject.projectAndCloseWithRetrySingleBatch(basicPhysicalOperators.scala:601)
E at com.nvidia.spark.rapids.GpuProjectExec.$anonfun$internalDoExecuteColumnar$2(basicPhysicalOperators.scala:384)
E at com.nvidia.spark.rapids.Arm$.withResource(Arm.scala:30)
E at com.nvidia.spark.rapids.GpuProjectExec.$anonfun$internalDoExecuteColumnar$1(basicPhysicalOperators.scala:380)
E at scala.collection.Iterator$$anon$10.next(Iterator.scala:461)
E at com.nvidia.spark.rapids.ColumnarToRowIterator.$anonfun$fetchNextBatch$3(GpuColumnarToRowExec.scala:290)
E at com.nvidia.spark.rapids.Arm$.withResource(Arm.scala:30)
E at com.nvidia.spark.rapids.ColumnarToRowIterator.fetchNextBatch(GpuColumnarToRowExec.scala:287)
E at com.nvidia.spark.rapids.ColumnarToRowIterator.loadNextBatch(GpuColumnarToRowExec.scala:257)
E at com.nvidia.spark.rapids.ColumnarToRowIterator.hasNext(GpuColumnarToRowExec.scala:304)
E at scala.collection.Iterator$$anon$10.hasNext(Iterator.scala:460)
E at org.apache.spark.sql.execution.SparkPlan.$anonfun$getByteArrayRdd$1(SparkPlan.scala:364)
E at org.apache.spark.rdd.RDD.$anonfun$mapPartitionsInternal$2(RDD.scala:890)
E at org.apache.spark.rdd.RDD.$anonfun$mapPartitionsInternal$2$adapted(RDD.scala:890)
E at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)
E at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:365)
E at org.apache.spark.rdd.RDD.iterator(RDD.scala:329)
E at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
E at org.apache.spark.scheduler.Task.run(Task.scala:136)
E at org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:548)
E at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1504)
E at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:551)
E at java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1136)
E at java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:635)
E at java.base/java.lang.Thread.run(Thread.java:833)
E Suppressed: com.nvidia.spark.rapids.jni.GpuRetryOOM: injected RetryOOM
E at ai.rapids.cudf.ColumnView.extractAllRecord(Native Method)
E at ai.rapids.cudf.ColumnView.extractAllRecord(ColumnView.java:3573)
E ... 62 more
E
E Driver stacktrace:
E at org.apache.spark.scheduler.DAGScheduler.failJobAndIndependentStages(DAGScheduler.scala:2672)
E at org.apache.spark.scheduler.DAGScheduler.$anonfun$abortStage$2(DAGScheduler.scala:2608)
E at org.apache.spark.scheduler.DAGScheduler.$anonfun$abortStage$2$adapted(DAGScheduler.scala:2607)
E at scala.collection.mutable.ResizableArray.foreach(ResizableArray.scala:62)
E at scala.collection.mutable.ResizableArray.foreach$(ResizableArray.scala:55)
E at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:49)
E at org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:2607)
E at org.apache.spark.scheduler.DAGScheduler.$anonfun$handleTaskSetFailed$1(DAGScheduler.scala:1182)
E at org.apache.spark.scheduler.DAGScheduler.$anonfun$handleTaskSetFailed$1$adapted(DAGScheduler.scala:1182)
E at scala.Option.foreach(Option.scala:407)
E at org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:1182)
E at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:2860)
E at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2802)
E at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2791)
E at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:49)
E at org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:952)
E at org.apache.spark.SparkContext.runJob(SparkContext.scala:2228)
E at org.apache.spark.SparkContext.runJob(SparkContext.scala:2249)
E at org.apache.spark.SparkContext.runJob(SparkContext.scala:2268)
E at org.apache.spark.SparkContext.runJob(SparkContext.scala:2293)
E at org.apache.spark.rdd.RDD.$anonfun$collect$1(RDD.scala:1021)
E at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
E at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112)
E at org.apache.spark.rdd.RDD.withScope(RDD.scala:406)
E at org.apache.spark.rdd.RDD.collect(RDD.scala:1020)
E at org.apache.spark.sql.execution.SparkPlan.executeCollect(SparkPlan.scala:424)
E at org.apache.spark.sql.Dataset.$anonfun$collectToPython$1(Dataset.scala:3688)
E at org.apache.spark.sql.Dataset.$anonfun$withAction$2(Dataset.scala:3858)
E at org.apache.spark.sql.execution.QueryExecution$.withInternalError(QueryExecution.scala:510)
E at org.apache.spark.sql.Dataset.$anonfun$withAction$1(Dataset.scala:3856)
E at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$6(SQLExecution.scala:109)
E at org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:169)
E at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$1(SQLExecution.scala:95)
E at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:779)
E at org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:64)
E at org.apache.spark.sql.Dataset.withAction(Dataset.scala:3856)
E at org.apache.spark.sql.Dataset.collectToPython(Dataset.scala:3685)
E at jdk.internal.reflect.GeneratedMethodAccessor100.invoke(Unknown Source)
E at java.base/jdk.internal.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
E at java.base/java.lang.reflect.Method.invoke(Method.java:568)
E at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)
E at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)
E at py4j.Gateway.invoke(Gateway.java:282)
E at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)
E at py4j.commands.CallCommand.execute(CallCommand.java:79)
E at py4j.ClientServerConnection.waitForCommands(ClientServerConnection.java:182)
E at py4j.ClientServerConnection.run(ClientServerConnection.java:106)
E at java.base/java.lang.Thread.run(Thread.java:833)
E Caused by: java.lang.IllegalArgumentException: At least one column is needed to get the row count
E at ai.rapids.cudf.ColumnVector.makeList(ColumnVector.java:472)
E at org.apache.spark.sql.rapids.GpuRegExpExtractAll.$anonfun$doColumnar$71(stringFunctions.scala:1485)
E at com.nvidia.spark.rapids.Arm$.withResource(Arm.scala:48)
E at org.apache.spark.sql.rapids.GpuRegExpExtractAll.$anonfun$doColumnar$67(stringFunctions.scala:1484)
E at com.nvidia.spark.rapids.Arm$.withResource(Arm.scala:30)
E at org.apache.spark.sql.rapids.GpuRegExpExtractAll.$anonfun$doColumnar$66(stringFunctions.scala:1474)
E at com.nvidia.spark.rapids.Arm$.withResource(Arm.scala:30)
E at org.apache.spark.sql.rapids.GpuRegExpExtractAll.$anonfun$doColumnar$65(stringFunctions.scala:1473)
E at com.nvidia.spark.rapids.Arm$.withResource(Arm.scala:30)
E at org.apache.spark.sql.rapids.GpuRegExpExtractAll.doColumnar(stringFunctions.scala:1472)
E at com.nvidia.spark.rapids.GpuTernaryExpression.$anonfun$columnarEval$8(GpuExpressions.scala:496)
E at com.nvidia.spark.rapids.Arm$.withResourceIfAllowed(Arm.scala:84)
E at com.nvidia.spark.rapids.GpuTernaryExpression.$anonfun$columnarEval$7(GpuExpressions.scala:481)
E at com.nvidia.spark.rapids.Arm$.withResourceIfAllowed(Arm.scala:84)
E at com.nvidia.spark.rapids.GpuTernaryExpression.$anonfun$columnarEval$6(GpuExpressions.scala:480)
E at com.nvidia.spark.rapids.Arm$.withResourceIfAllowed(Arm.scala:84)
E at com.nvidia.spark.rapids.GpuTernaryExpression.columnarEval(GpuExpressions.scala:479)
E at com.nvidia.spark.rapids.GpuTernaryExpression.columnarEval$(GpuExpressions.scala:477)
E at org.apache.spark.sql.rapids.GpuRegExpTernaryBase.columnarEval(stringFunctions.scala:1112)
E at com.nvidia.spark.rapids.RapidsPluginImplicits$ReallyAGpuExpression.columnarEval(implicits.scala:35)
E at com.nvidia.spark.rapids.GpuAlias.columnarEval(namedExpressions.scala:110)
E at com.nvidia.spark.rapids.RapidsPluginImplicits$ReallyAGpuExpression.columnarEval(implicits.scala:35)
E at com.nvidia.spark.rapids.GpuProjectExec$.$anonfun$project$1(basicPhysicalOperators.scala:110)
E at com.nvidia.spark.rapids.RapidsPluginImplicits$MapsSafely.$anonfun$safeMap$1(implicits.scala:221)
E at com.nvidia.spark.rapids.RapidsPluginImplicits$MapsSafely.$anonfun$safeMap$1$adapted(implicits.scala:218)
E at scala.collection.immutable.List.foreach(List.scala:431)
E at com.nvidia.spark.rapids.RapidsPluginImplicits$MapsSafely.safeMap(implicits.scala:218)
E at com.nvidia.spark.rapids.RapidsPluginImplicits$AutoCloseableProducingSeq.safeMap(implicits.scala:253)
E at com.nvidia.spark.rapids.GpuProjectExec$.project(basicPhysicalOperators.scala:110)
E at com.nvidia.spark.rapids.GpuTieredProject.$anonfun$project$2(basicPhysicalOperators.scala:619)
E at com.nvidia.spark.rapids.Arm$.withResource(Arm.scala:30)
E at com.nvidia.spark.rapids.GpuTieredProject.recurse$2(basicPhysicalOperators.scala:618)
E at com.nvidia.spark.rapids.GpuTieredProject.project(basicPhysicalOperators.scala:631)
E at com.nvidia.spark.rapids.GpuTieredProject.$anonfun$projectWithRetrySingleBatchInternal$5(basicPhysicalOperators.scala:567)
E at com.nvidia.spark.rapids.RmmRapidsRetryIterator$.withRestoreOnRetry(RmmRapidsRetryIterator.scala:272)
E at com.nvidia.spark.rapids.GpuTieredProject.$anonfun$projectWithRetrySingleBatchInternal$4(basicPhysicalOperators.scala:567)
E at com.nvidia.spark.rapids.Arm$.withResource(Arm.scala:30)
E at com.nvidia.spark.rapids.GpuTieredProject.$anonfun$projectWithRetrySingleBatchInternal$3(basicPhysicalOperators.scala:565)
E at com.nvidia.spark.rapids.RmmRapidsRetryIterator$NoInputSpliterator.next(RmmRapidsRetryIterator.scala:395)
E at com.nvidia.spark.rapids.RmmRapidsRetryIterator$RmmRapidsRetryIterator.next(RmmRapidsRetryIterator.scala:613)
E at com.nvidia.spark.rapids.RmmRapidsRetryIterator$RmmRapidsRetryAutoCloseableIterator.next(RmmRapidsRetryIterator.scala:517)
E at com.nvidia.spark.rapids.RmmRapidsRetryIterator$.drainSingleWithVerification(RmmRapidsRetryIterator.scala:291)
E at com.nvidia.spark.rapids.RmmRapidsRetryIterator$.withRetryNoSplit(RmmRapidsRetryIterator.scala:185)
E at com.nvidia.spark.rapids.GpuTieredProject.$anonfun$projectWithRetrySingleBatchInternal$1(basicPhysicalOperators.scala:565)
E at com.nvidia.spark.rapids.Arm$.withResource(Arm.scala:39)
E at com.nvidia.spark.rapids.GpuTieredProject.projectWithRetrySingleBatchInternal(basicPhysicalOperators.scala:562)
E at com.nvidia.spark.rapids.GpuTieredProject.projectAndCloseWithRetrySingleBatch(basicPhysicalOperators.scala:601)
E at com.nvidia.spark.rapids.GpuProjectExec.$anonfun$internalDoExecuteColumnar$2(basicPhysicalOperators.scala:384)
E at com.nvidia.spark.rapids.Arm$.withResource(Arm.scala:30)
E at com.nvidia.spark.rapids.GpuProjectExec.$anonfun$internalDoExecuteColumnar$1(basicPhysicalOperators.scala:380)
E at scala.collection.Iterator$$anon$10.next(Iterator.scala:461)
E at com.nvidia.spark.rapids.ColumnarToRowIterator.$anonfun$fetchNextBatch$3(GpuColumnarToRowExec.scala:290)
E at com.nvidia.spark.rapids.Arm$.withResource(Arm.scala:30)
E at com.nvidia.spark.rapids.ColumnarToRowIterator.fetchNextBatch(GpuColumnarToRowExec.scala:287)
E at com.nvidia.spark.rapids.ColumnarToRowIterator.loadNextBatch(GpuColumnarToRowExec.scala:257)
E at com.nvidia.spark.rapids.ColumnarToRowIterator.hasNext(GpuColumnarToRowExec.scala:304)
E at scala.collection.Iterator$$anon$10.hasNext(Iterator.scala:460)
E at org.apache.spark.sql.execution.SparkPlan.$anonfun$getByteArrayRdd$1(SparkPlan.scala:364)
E at org.apache.spark.rdd.RDD.$anonfun$mapPartitionsInternal$2(RDD.scala:890)
E at org.apache.spark.rdd.RDD.$anonfun$mapPartitionsInternal$2$adapted(RDD.scala:890)
E at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)
E at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:365)
E at org.apache.spark.rdd.RDD.iterator(RDD.scala:329)
E at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
E at org.apache.spark.scheduler.Task.run(Task.scala:136)
E at org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:548)
E at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1504)
E at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:551)
E at java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1136)
E at java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:635)
E ... 1 more
E Suppressed: com.nvidia.spark.rapids.jni.GpuRetryOOM: injected RetryOOM
E at ai.rapids.cudf.ColumnView.extractAllRecord(Native Method)
E at ai.rapids.cudf.ColumnView.extractAllRecord(ColumnView.java:3573)
E ... 62 more
/var/lib/jenkins/spark/spark-3.3.0-bin-hadoop3.2/python/lib/py4j-0.10.9.5-src.zip/py4j/protocol.py:326: Py4JJavaError
----------------------------- Captured stdout call -----------------------------
### CPU RUN ###
The text was updated successfully, but these errors were encountered:
I am stealing this from @jlowe because I got border in a meeting and made a patch for it. It happens when there are no rows in the batch that matches the item we want to extract. When that happens we end up with no string columns to make a list out of, and get the error shown here.
Describe the bug
FAILED ../../src/main/python/regexp_test.py::test_regexp_extract_all_idx_positive[DATAGEN_SEED=1709054829, INJECT_OOM]
rapids_it-MT-egx06-standalone/299
The text was updated successfully, but these errors were encountered: