Skip to content

Commit

Permalink
AVRO-4065: Do Not Copy Array Contents when Expanding UTF-8 Arrays (#3181
Browse files Browse the repository at this point in the history
)
  • Loading branch information
belugabehr authored Oct 7, 2024
1 parent ec2378b commit 67263a3
Showing 1 changed file with 22 additions and 24 deletions.
46 changes: 22 additions & 24 deletions lang/java/avro/src/main/java/org/apache/avro/util/Utf8.java
Original file line number Diff line number Diff line change
Expand Up @@ -69,46 +69,44 @@ public Utf8(byte[] bytes) {
}

/**
* Return UTF-8 encoded bytes. Only valid through {@link #getByteLength()}.
* Return UTF-8 encoded bytes. Only valid through {@link #getByteLength()}
* assuming the bytes have been fully copied into the underlying buffer from the
* source.
*
* @see #setByteLength(int)
* @return a reference to the underlying byte array
*/
public byte[] getBytes() {
return bytes;
}

/**
* Return length in bytes.
*
* @deprecated call {@link #getByteLength()} instead.
*/
@Deprecated
public int getLength() {
return length;
}

/** Return length in bytes. */
public int getByteLength() {
return length;
}

/**
* Set length in bytes. Should called whenever byte content changes, even if the
* length does not change, as this also clears the cached String.
* Set length in bytes. When calling this method, even if the new length is the
* same as the current length, the cached contents of this Utf8 object will be
* wiped out. After calling this method, no assumptions should be made about the
* internal state (e.g., contents, hashcode, equality, etc.) of this Utf8 String
* other than the internal buffer being large enough to accommodate a String of
* the new length. This should be called immediately before reading a String
* from the underlying data source.
*
* @deprecated call {@link #setByteLength(int)} instead.
*/
@Deprecated
public Utf8 setLength(int newLength) {
return setByteLength(newLength);
}

/**
* Set length in bytes. Should called whenever byte content changes, even if the
* length does not change, as this also clears the cached String.
* @param newLength the new length of the underlying buffer
* @return a reference to this object.
* @see org.apache.avro.io.BinaryDecoder#readString(Utf8)
*/
public Utf8 setByteLength(int newLength) {
SystemLimitException.checkMaxStringLength(newLength);

// Note that if the buffer size increases, the internal buffer is zero-ed out.
// If the buffer is large enough, just the length pointer moves and the old
// contents remain. For consistency's sake, we could zero-out the buffer in
// both cases, but would be a perf hit.
if (this.bytes.length < newLength) {
this.bytes = Arrays.copyOf(this.bytes, newLength);
this.bytes = new byte[newLength];
}
this.length = newLength;
this.string = null;
Expand Down

0 comments on commit 67263a3

Please # to comment.