Skip to content

Adds support for fragmented mp4 with multiple sidx boxes #2186

New issue

Have a question about this project? # for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “#”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? # to your account

Open
wants to merge 6 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions RELEASENOTES.md
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,10 @@
variable bitrate metadata when falling back to constant bitrate seeking
due to `FLAG_ENABLE_CONSTANT_BITRATE_SEEKING(_ALWAYS)`
([#2194](https://github.com/androidx/media/issues/2194)).
* Add support for seeking in fragmented MP4 with multiple `sidx` atoms.
This behavior can be enabled using the `FLAG_MERGE_FRAGMENTED_SIDX` flag
on `FragmentedMp4Extractor`
([#9373](https://github.com/google/ExoPlayer/issues/9373)).
* DataSource:
* Audio:
* Allow constant power upmixing/downmixing in DefaultAudioMixer.
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
/*
* Copyright 2025 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package androidx.media3.extractor;

import androidx.media3.common.util.UnstableApi;
import com.google.common.primitives.Ints;
import com.google.common.primitives.Longs;
import java.util.ArrayList;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;

/**
* A utility class for merging multiple {@link ChunkIndex} instances into a single {@link
* ChunkIndex}.
*
* <p>This is useful in scenarios where media is split across multiple segments or sources, and a
* unified index is needed for seeking or playback.
*/
@UnstableApi
public final class ChunkIndexMerger {

/** Start time in microseconds to {@link ChunkIndex} mapping. Maintains insertion order. */
private final Map<Long, ChunkIndex> chunkMap;

/** Creates an instance. */
public ChunkIndexMerger() {
this.chunkMap = new LinkedHashMap<>();
}

/**
* Adds a {@link ChunkIndex} to be merged.
*
* <p>Chunk indices with duplicate starting timestamps are ignored to avoid redundant data.
*
* @param chunk The {@link ChunkIndex} to add.
*/
public void add(ChunkIndex chunk) {
if (chunk.timesUs.length > 0 && !chunkMap.containsKey(chunk.timesUs[0])) {
chunkMap.put(chunk.timesUs[0], chunk);
}
}

/** Returns a single {@link ChunkIndex} that merges all added chunk indices. */
public ChunkIndex merge() {
List<int[]> sizesList = new ArrayList<>();
List<long[]> offsetsList = new ArrayList<>();
List<long[]> durationsList = new ArrayList<>();
List<long[]> timesList = new ArrayList<>();

for (ChunkIndex chunk : chunkMap.values()) {
sizesList.add(chunk.sizes);
offsetsList.add(chunk.offsets);
durationsList.add(chunk.durationsUs);
timesList.add(chunk.timesUs);
}

return new ChunkIndex(
Ints.concat(sizesList.toArray(new int[sizesList.size()][])),
Longs.concat(offsetsList.toArray(new long[offsetsList.size()][])),
Longs.concat(durationsList.toArray(new long[durationsList.size()][])),
Longs.concat(timesList.toArray(new long[timesList.size()][])));
}

/** Clears all added chunk indices and internal state. */
public void clear() {
chunkMap.clear();
}

/** Returns the number of chunk indices added so far. */
public int size() {
return chunkMap.size();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@
import androidx.media3.extractor.Ac4Util;
import androidx.media3.extractor.CeaUtil;
import androidx.media3.extractor.ChunkIndex;
import androidx.media3.extractor.ChunkIndexMerger;
import androidx.media3.extractor.Extractor;
import androidx.media3.extractor.ExtractorInput;
import androidx.media3.extractor.ExtractorOutput;
Expand Down Expand Up @@ -91,8 +92,8 @@ public static ExtractorsFactory newFactory(SubtitleParser.Factory subtitleParser
* Flags controlling the behavior of the extractor. Possible flag values are {@link
* #FLAG_WORKAROUND_EVERY_VIDEO_FRAME_IS_SYNC_FRAME}, {@link #FLAG_WORKAROUND_IGNORE_TFDT_BOX},
* {@link #FLAG_ENABLE_EMSG_TRACK}, {@link #FLAG_WORKAROUND_IGNORE_EDIT_LISTS}, {@link
* #FLAG_EMIT_RAW_SUBTITLE_DATA}, {@link #FLAG_READ_WITHIN_GOP_SAMPLE_DEPENDENCIES} and {@link
* #FLAG_READ_WITHIN_GOP_SAMPLE_DEPENDENCIES_H265}.
* #FLAG_EMIT_RAW_SUBTITLE_DATA}, {@link #FLAG_READ_WITHIN_GOP_SAMPLE_DEPENDENCIES}, {@link
* #FLAG_READ_WITHIN_GOP_SAMPLE_DEPENDENCIES_H265} and {@link #FLAG_MERGE_FRAGMENTED_SIDX}.
*/
@Documented
@Retention(RetentionPolicy.SOURCE)
Expand All @@ -106,7 +107,8 @@ public static ExtractorsFactory newFactory(SubtitleParser.Factory subtitleParser
FLAG_WORKAROUND_IGNORE_EDIT_LISTS,
FLAG_EMIT_RAW_SUBTITLE_DATA,
FLAG_READ_WITHIN_GOP_SAMPLE_DEPENDENCIES,
FLAG_READ_WITHIN_GOP_SAMPLE_DEPENDENCIES_H265
FLAG_READ_WITHIN_GOP_SAMPLE_DEPENDENCIES_H265,
FLAG_MERGE_FRAGMENTED_SIDX
})
public @interface Flags {}

Expand Down Expand Up @@ -159,6 +161,9 @@ public static ExtractorsFactory newFactory(SubtitleParser.Factory subtitleParser
*/
public static final int FLAG_READ_WITHIN_GOP_SAMPLE_DEPENDENCIES_H265 = 1 << 7;

/** Flag to enables reading and merging of all sidx boxes before continuing extraction. */
public static final int FLAG_MERGE_FRAGMENTED_SIDX = 1 << 8;

/**
* @deprecated Use {@link #newFactory(SubtitleParser.Factory)} instead.
*/
Expand Down Expand Up @@ -219,6 +224,8 @@ public static ExtractorsFactory newFactory(SubtitleParser.Factory subtitleParser
private final ReorderingBufferQueue reorderingBufferQueue;
@Nullable private final TrackOutput additionalEmsgTrackOutput;

private final ChunkIndexMerger chunkIndexMerger;

private ImmutableList<SniffFailure> lastSniffFailures;
private int parserState;
private int atomType;
Expand Down Expand Up @@ -246,6 +253,12 @@ public static ExtractorsFactory newFactory(SubtitleParser.Factory subtitleParser
// Whether extractorOutput.seekMap has been called.
private boolean haveOutputSeekMap;

// Whether we've encountered and merged multiple sidx boxes with different start times and
// extractorOutput.seekMap has been called.
private boolean haveOutputSeekMapFromMultipleSidx;

private long seekPositionBeforeSidxProcessing;

/**
* @deprecated Use {@link #FragmentedMp4Extractor(SubtitleParser.Factory)} instead
*/
Expand Down Expand Up @@ -428,6 +441,8 @@ public FragmentedMp4Extractor(
new ReorderingBufferQueue(
(presentationTimeUs, buffer) ->
CeaUtil.consume(presentationTimeUs, buffer, ceaTrackOutputs));
chunkIndexMerger = new ChunkIndexMerger();
seekPositionBeforeSidxProcessing = C.INDEX_UNSET;
}

/**
Expand Down Expand Up @@ -511,24 +526,36 @@ public void release() {

@Override
public int read(ExtractorInput input, PositionHolder seekPosition) throws IOException {
while (true) {
switch (parserState) {
case STATE_READING_ATOM_HEADER:
if (!readAtomHeader(input)) {
reorderingBufferQueue.flush();
return Extractor.RESULT_END_OF_INPUT;
}
break;
case STATE_READING_ATOM_PAYLOAD:
readAtomPayload(input);
break;
case STATE_READING_ENCRYPTION_DATA:
readEncryptionData(input);
break;
default:
if (readSample(input)) {
return RESULT_CONTINUE;
}
try {
while (true) {
switch (parserState) {
case STATE_READING_ATOM_HEADER:
if (!readAtomHeader(input, /* skipPayloadParsing= */ false)) {
if (seekPositionBeforeSidxProcessing != C.INDEX_UNSET) {
seekPosition.position = seekPositionBeforeSidxProcessing;
seekPositionBeforeSidxProcessing = C.INDEX_UNSET;
return Extractor.RESULT_SEEK;
} else {
reorderingBufferQueue.flush();
return Extractor.RESULT_END_OF_INPUT;
}
}
break;
case STATE_READING_ATOM_PAYLOAD:
readAtomPayload(input);
break;
case STATE_READING_ENCRYPTION_DATA:
readEncryptionData(input);
break;
default:
if (readSample(input)) {
return RESULT_CONTINUE;
}
}
}
} finally {
if (seekPositionBeforeSidxProcessing != C.INDEX_UNSET) {
seekPositionBeforeSidxProcessing = C.INDEX_UNSET;
}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We're still investigating the use of finally block here. Extractor should be able to restart from an IOException without having this. Removing this block results in test failures with IO Errors simulation.

Copy link
Author

@anthonybajoua anthonybajoua May 2, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is due to the reset of the seek position being set and not cleared during an IO Error.

https://gist.github.com/anthonybajoua/c85a786a30f9876c82849544f848f190

See the above revision for a much cleaner way to achieve this.

}
}
Expand All @@ -538,7 +565,8 @@ private void enterReadingAtomHeaderState() {
atomHeaderBytesRead = 0;
}

private boolean readAtomHeader(ExtractorInput input) throws IOException {
private boolean readAtomHeader(ExtractorInput input, boolean skipPayloadParsing)
throws IOException {
if (atomHeaderBytesRead == 0) {
// Read the standard length atom header.
if (!input.readFully(atomHeader.getData(), 0, Mp4Box.HEADER_SIZE, true)) {
Expand Down Expand Up @@ -573,6 +601,10 @@ private boolean readAtomHeader(ExtractorInput input) throws IOException {
"Atom size less than header length (unsupported).");
}

if (skipPayloadParsing) {
return true;
}

long atomPosition = input.getPosition() - atomHeaderBytesRead;
if (atomType == Mp4Box.TYPE_moof || atomType == Mp4Box.TYPE_mdat) {
if (!haveOutputSeekMap) {
Expand Down Expand Up @@ -639,7 +671,7 @@ private void readAtomPayload(ExtractorInput input) throws IOException {
@Nullable ParsableByteArray atomData = this.atomData;
if (atomData != null) {
input.readFully(atomData.getData(), Mp4Box.HEADER_SIZE, atomPayloadSize);
onLeafAtomRead(new LeafBox(atomType, atomData), input.getPosition());
onLeafAtomRead(new LeafBox(atomType, atomData), input);
} else {
input.skipFully(atomPayloadSize);
}
Expand All @@ -653,19 +685,52 @@ private void processAtomEnded(long atomEndPosition) throws ParserException {
enterReadingAtomHeaderState();
}

private void onLeafAtomRead(LeafBox leaf, long inputPosition) throws ParserException {
private void onLeafAtomRead(LeafBox leaf, ExtractorInput input) throws IOException {
if (!containerAtoms.isEmpty()) {
containerAtoms.peek().add(leaf);
} else if (leaf.type == Mp4Box.TYPE_sidx) {
long inputPosition = input.getPosition();
Pair<Long, ChunkIndex> result = parseSidx(leaf.data, inputPosition);
segmentIndexEarliestPresentationTimeUs = result.first;
extractorOutput.seekMap(result.second);
haveOutputSeekMap = true;
chunkIndexMerger.add(result.second);
if (!haveOutputSeekMap) {
segmentIndexEarliestPresentationTimeUs = result.first;
extractorOutput.seekMap(result.second);
haveOutputSeekMap = true;
} else if ((flags & FLAG_MERGE_FRAGMENTED_SIDX) != 0
&& !haveOutputSeekMapFromMultipleSidx
&& chunkIndexMerger.size() > 1) {
seekPositionBeforeSidxProcessing = inputPosition;
try {
processRemainingSidxAtoms(input);
haveOutputSeekMapFromMultipleSidx = true;
} finally {
extractorOutput.seekMap(chunkIndexMerger.merge());
}
}
} else if (leaf.type == Mp4Box.TYPE_emsg) {
onEmsgLeafAtomRead(leaf.data);
}
}

private void processRemainingSidxAtoms(ExtractorInput input) throws IOException {
enterReadingAtomHeaderState();
while (readAtomHeader(input, /* skipPayloadParsing= */ true)) {
if (atomType == Mp4Box.TYPE_sidx) {
scratch.reset((int) atomSize);
System.arraycopy(atomHeader.getData(), 0, scratch.getData(), 0, Mp4Box.HEADER_SIZE);
input.readFully(
scratch.getData(), Mp4Box.HEADER_SIZE, (int) (atomSize - atomHeaderBytesRead));

LeafBox sidxBox = new LeafBox(Mp4Box.TYPE_sidx, scratch);
Pair<Long, ChunkIndex> result = parseSidx(sidxBox.data, input.getPeekPosition());
chunkIndexMerger.add(result.second);
} else {
input.skipFully((int) (atomSize - atomHeaderBytesRead), /* allowEndOfInput= */ true);
}
enterReadingAtomHeaderState();
}
}

private void onContainerAtomRead(ContainerBox container) throws ParserException {
if (container.type == Mp4Box.TYPE_moov) {
onMoovContainerAtomRead(container);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,22 @@ public void sampleSeekable() throws Exception {
/* closedCaptionFormats= */ ImmutableList.of(), "media/mp4/sample_fragmented_seekable.mp4");
}

@Test
public void sampleSeekableWithMultipleSidx() throws Exception {
String file = "media/mp4/sample_fragmented_seekable_multiple_sidx.mp4";
ExtractorAsserts.assertBehavior(
() ->
new FragmentedMp4Extractor(
/* subtitleParserFactory= */ new DefaultSubtitleParserFactory(),
/* flags= */ FragmentedMp4Extractor.FLAG_MERGE_FRAGMENTED_SIDX,
/* timestampAdjuster= */ null,
/* sideloadedTrack= */ null,
/* closedCaptionFormats= */ ImmutableList.of(),
/* additionalEmsgTrackOutput= */ null),
file,
simulationConfig);
}

@Test
public void sampleWithSeiPayloadInputHasNoCaptions() throws Exception {
// Enabling the CEA-608 track enables SEI payload parsing.
Expand Down
Loading