forked from apache/spark
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[SPARK-49846][SS] Add numUpdatedStateRows and numRemovedStateRows met…
…rics for use with transformWithState operator ### What changes were proposed in this pull request? Add numUpdatedStateRows and numRemovedStateRows metrics for use with transformWithState operator ### Why are the changes needed? Without this change, metrics around these operations are not available in the query progress metrics ### Does this PR introduce _any_ user-facing change? No Metrics updated as part of the streaming query progress ``` "operatorName" : "transformWithStateExec", "numRowsTotal" : 1, "numRowsUpdated" : 1, "numRowsRemoved" : 1, ``` ### How was this patch tested? Added unit tests ``` [info] Run completed in 25 seconds, 697 milliseconds. [info] Total number of tests run: 2 [info] Suites: completed 1, aborted 0 [info] Tests: succeeded 2, failed 0, canceled 0, ignored 0, pending 0 [info] All tests passed. ``` ### Was this patch authored or co-authored using generative AI tooling? No Closes apache#48317 from anishshri-db/task/SPARK-49846. Authored-by: Anish Shrigondekar <anish.shrigondekar@databricks.com> Signed-off-by: Jungtaek Lim <kabhwan.opensource@gmail.com>
- Loading branch information
1 parent
3d2623e
commit d31a474
Showing
14 changed files
with
266 additions
and
38 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
86 changes: 86 additions & 0 deletions
86
sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/ListStateMetricsImpl.scala
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,86 @@ | ||
/* | ||
* Licensed to the Apache Software Foundation (ASF) under one or more | ||
* contributor license agreements. See the NOTICE file distributed with | ||
* this work for additional information regarding copyright ownership. | ||
* The ASF licenses this file to You under the Apache License, Version 2.0 | ||
* (the "License"); you may not use this file except in compliance with | ||
* the License. You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
package org.apache.spark.sql.execution.streaming | ||
|
||
import org.apache.spark.sql.catalyst.InternalRow | ||
import org.apache.spark.sql.catalyst.expressions.{GenericInternalRow, UnsafeProjection, UnsafeRow} | ||
import org.apache.spark.sql.execution.streaming.state.{NoPrefixKeyStateEncoderSpec, StateStore} | ||
import org.apache.spark.sql.types._ | ||
|
||
/** | ||
* Trait that provides helper methods to maintain metrics for a list state. | ||
* For list state, we keep track of the count of entries in the list in a separate column family | ||
* to get an accurate view of the number of entries that are updated/removed from the list and | ||
* reported as part of the query progress metrics. | ||
*/ | ||
trait ListStateMetricsImpl { | ||
def stateStore: StateStore | ||
|
||
def baseStateName: String | ||
|
||
def exprEncSchema: StructType | ||
|
||
// We keep track of the count of entries in the list in a separate column family | ||
// to avoid scanning the entire list to get the count. | ||
private val counterCFValueSchema: StructType = | ||
StructType(Seq(StructField("count", LongType, nullable = false))) | ||
|
||
private val counterCFProjection = UnsafeProjection.create(counterCFValueSchema) | ||
|
||
private val updatedCountRow = new GenericInternalRow(1) | ||
|
||
private def getRowCounterCFName(stateName: String) = "$rowCounter_" + stateName | ||
|
||
stateStore.createColFamilyIfAbsent(getRowCounterCFName(baseStateName), exprEncSchema, | ||
counterCFValueSchema, NoPrefixKeyStateEncoderSpec(exprEncSchema), isInternal = true) | ||
|
||
/** | ||
* Function to get the number of entries in the list state for a given grouping key | ||
* @param encodedKey - encoded grouping key | ||
* @return - number of entries in the list state | ||
*/ | ||
def getEntryCount(encodedKey: UnsafeRow): Long = { | ||
val countRow = stateStore.get(encodedKey, getRowCounterCFName(baseStateName)) | ||
if (countRow != null) { | ||
countRow.getLong(0) | ||
} else { | ||
0L | ||
} | ||
} | ||
|
||
/** | ||
* Function to update the number of entries in the list state for a given grouping key | ||
* @param encodedKey - encoded grouping key | ||
* @param updatedCount - updated count of entries in the list state | ||
*/ | ||
def updateEntryCount( | ||
encodedKey: UnsafeRow, | ||
updatedCount: Long): Unit = { | ||
updatedCountRow.setLong(0, updatedCount) | ||
stateStore.put(encodedKey, | ||
counterCFProjection(updatedCountRow.asInstanceOf[InternalRow]), | ||
getRowCounterCFName(baseStateName)) | ||
} | ||
|
||
/** | ||
* Function to remove the number of entries in the list state for a given grouping key | ||
* @param encodedKey - encoded grouping key | ||
*/ | ||
def removeEntryCount(encodedKey: UnsafeRow): Unit = { | ||
stateStore.remove(encodedKey, getRowCounterCFName(baseStateName)) | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.