androidx · anthonybajoua · Feb 25, 2025 · Mar 25, 2025 · Apr 24, 2025 · Apr 28, 2025
diff --git a/RELEASENOTES.md b/RELEASENOTES.md
@@ -25,6 +25,10 @@
         variable bitrate metadata when falling back to constant bitrate seeking
         due to `FLAG_ENABLE_CONSTANT_BITRATE_SEEKING(_ALWAYS)`
         ([#2194](https://github.com/androidx/media/issues/2194)).
+    *   Add support for seeking in fragmented MP4 with multiple `sidx` atoms.
+        This behavior can be enabled using the `FLAG_MERGE_FRAGMENTED_SIDX` flag
+        on `FragmentedMp4Extractor`
+        ([#9373](https://github.com/google/ExoPlayer/issues/9373)).
 *   DataSource:
 *   Audio:
     *   Allow constant power upmixing/downmixing in DefaultAudioMixer.

diff --git a/libraries/extractor/src/main/java/androidx/media3/extractor/ChunkIndexMerger.java b/libraries/extractor/src/main/java/androidx/media3/extractor/ChunkIndexMerger.java
@@ -0,0 +1,87 @@
+/*
+ * Copyright 2025 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package androidx.media3.extractor;
+
+import androidx.media3.common.util.UnstableApi;
+import com.google.common.primitives.Ints;
+import com.google.common.primitives.Longs;
+import java.util.ArrayList;
+import java.util.LinkedHashMap;
+import java.util.List;
+import java.util.Map;
+
+/**
+ * A utility class for merging multiple {@link ChunkIndex} instances into a single {@link
+ * ChunkIndex}.
+ *
+ * <p>This is useful in scenarios where media is split across multiple segments or sources, and a
+ * unified index is needed for seeking or playback.
+ */
+@UnstableApi
+public final class ChunkIndexMerger {
+
+  /** Start time in microseconds to {@link ChunkIndex} mapping. Maintains insertion order. */
+  private final Map<Long, ChunkIndex> chunkMap;
+
+  /** Creates an instance. */
+  public ChunkIndexMerger() {
+    this.chunkMap = new LinkedHashMap<>();
+  }
+
+  /**
+   * Adds a {@link ChunkIndex} to be merged.
+   *
+   * <p>Chunk indices with duplicate starting timestamps are ignored to avoid redundant data.
+   *
+   * @param chunk The {@link ChunkIndex} to add.
+   */
+  public void add(ChunkIndex chunk) {
+    if (chunk.timesUs.length > 0 && !chunkMap.containsKey(chunk.timesUs[0])) {
+      chunkMap.put(chunk.timesUs[0], chunk);
+    }
+  }
+
+  /** Returns a single {@link ChunkIndex} that merges all added chunk indices. */
+  public ChunkIndex merge() {
+    List<int[]> sizesList = new ArrayList<>();
+    List<long[]> offsetsList = new ArrayList<>();
+    List<long[]> durationsList = new ArrayList<>();
+    List<long[]> timesList = new ArrayList<>();
+
+    for (ChunkIndex chunk : chunkMap.values()) {
+      sizesList.add(chunk.sizes);
+      offsetsList.add(chunk.offsets);
+      durationsList.add(chunk.durationsUs);
+      timesList.add(chunk.timesUs);
+    }
+
+    return new ChunkIndex(
+        Ints.concat(sizesList.toArray(new int[sizesList.size()][])),
+        Longs.concat(offsetsList.toArray(new long[offsetsList.size()][])),
+        Longs.concat(durationsList.toArray(new long[durationsList.size()][])),
+        Longs.concat(timesList.toArray(new long[timesList.size()][])));
+  }
+
+  /** Clears all added chunk indices and internal state. */
+  public void clear() {
+    chunkMap.clear();
+  }
+
+  /** Returns the number of chunk indices added so far. */
+  public int size() {
+    return chunkMap.size();
+  }
+}
diff --git a/libraries/extractor/src/main/java/androidx/media3/extractor/mp4/FragmentedMp4Extractor.java b/libraries/extractor/src/main/java/androidx/media3/extractor/mp4/FragmentedMp4Extractor.java
@@ -47,6 +47,7 @@
 import androidx.media3.extractor.Ac4Util;
 import androidx.media3.extractor.CeaUtil;
 import androidx.media3.extractor.ChunkIndex;
+import androidx.media3.extractor.ChunkIndexMerger;
 import androidx.media3.extractor.Extractor;
 import androidx.media3.extractor.ExtractorInput;
 import androidx.media3.extractor.ExtractorOutput;
@@ -91,8 +92,8 @@ public static ExtractorsFactory newFactory(SubtitleParser.Factory subtitleParser
    * Flags controlling the behavior of the extractor. Possible flag values are {@link
    * #FLAG_WORKAROUND_EVERY_VIDEO_FRAME_IS_SYNC_FRAME}, {@link #FLAG_WORKAROUND_IGNORE_TFDT_BOX},
    * {@link #FLAG_ENABLE_EMSG_TRACK}, {@link #FLAG_WORKAROUND_IGNORE_EDIT_LISTS}, {@link
-   * #FLAG_EMIT_RAW_SUBTITLE_DATA}, {@link #FLAG_READ_WITHIN_GOP_SAMPLE_DEPENDENCIES} and {@link
-   * #FLAG_READ_WITHIN_GOP_SAMPLE_DEPENDENCIES_H265}.
+   * #FLAG_EMIT_RAW_SUBTITLE_DATA}, {@link #FLAG_READ_WITHIN_GOP_SAMPLE_DEPENDENCIES}, {@link
+   * #FLAG_READ_WITHIN_GOP_SAMPLE_DEPENDENCIES_H265} and {@link #FLAG_MERGE_FRAGMENTED_SIDX}.
    */
   @Documented
   @Retention(RetentionPolicy.SOURCE)
@@ -106,7 +107,8 @@ public static ExtractorsFactory newFactory(SubtitleParser.Factory subtitleParser
         FLAG_WORKAROUND_IGNORE_EDIT_LISTS,
         FLAG_EMIT_RAW_SUBTITLE_DATA,
         FLAG_READ_WITHIN_GOP_SAMPLE_DEPENDENCIES,
-        FLAG_READ_WITHIN_GOP_SAMPLE_DEPENDENCIES_H265
+        FLAG_READ_WITHIN_GOP_SAMPLE_DEPENDENCIES_H265,
+        FLAG_MERGE_FRAGMENTED_SIDX
       })
   public @interface Flags {}
 
@@ -159,6 +161,9 @@ public static ExtractorsFactory newFactory(SubtitleParser.Factory subtitleParser
    */
   public static final int FLAG_READ_WITHIN_GOP_SAMPLE_DEPENDENCIES_H265 = 1 << 7;
 
+  /** Flag to enables reading and merging of all sidx boxes before continuing extraction. */
+  public static final int FLAG_MERGE_FRAGMENTED_SIDX = 1 << 8;
+
   /**
    * @deprecated Use {@link #newFactory(SubtitleParser.Factory)} instead.
    */
@@ -219,6 +224,8 @@ public static ExtractorsFactory newFactory(SubtitleParser.Factory subtitleParser
   private final ReorderingBufferQueue reorderingBufferQueue;
   @Nullable private final TrackOutput additionalEmsgTrackOutput;
 
+  private final ChunkIndexMerger chunkIndexMerger;
+
   private ImmutableList<SniffFailure> lastSniffFailures;
   private int parserState;
   private int atomType;
@@ -246,6 +253,12 @@ public static ExtractorsFactory newFactory(SubtitleParser.Factory subtitleParser
   // Whether extractorOutput.seekMap has been called.
   private boolean haveOutputSeekMap;
 
+  // Whether we've encountered and merged multiple sidx boxes with different start times and
+  // extractorOutput.seekMap has been called.
+  private boolean haveOutputSeekMapFromMultipleSidx;
+
+  private long seekPositionBeforeSidxProcessing;
+
   /**
    * @deprecated Use {@link #FragmentedMp4Extractor(SubtitleParser.Factory)} instead
    */
@@ -428,6 +441,8 @@ public FragmentedMp4Extractor(
         new ReorderingBufferQueue(
             (presentationTimeUs, buffer) ->
                 CeaUtil.consume(presentationTimeUs, buffer, ceaTrackOutputs));
+    chunkIndexMerger = new ChunkIndexMerger();
+    seekPositionBeforeSidxProcessing = C.INDEX_UNSET;
   }
 
   /**
@@ -511,24 +526,36 @@ public void release() {
 
   @Override
   public int read(ExtractorInput input, PositionHolder seekPosition) throws IOException {
-    while (true) {
-      switch (parserState) {
-        case STATE_READING_ATOM_HEADER:
-          if (!readAtomHeader(input)) {
-            reorderingBufferQueue.flush();
-            return Extractor.RESULT_END_OF_INPUT;
-          }
-          break;
-        case STATE_READING_ATOM_PAYLOAD:
-          readAtomPayload(input);
-          break;
-        case STATE_READING_ENCRYPTION_DATA:
-          readEncryptionData(input);
-          break;
-        default:
-          if (readSample(input)) {
-            return RESULT_CONTINUE;
-          }
+    try {
+      while (true) {
+        switch (parserState) {
+          case STATE_READING_ATOM_HEADER:
+            if (!readAtomHeader(input, /* skipPayloadParsing= */ false)) {
+              if (seekPositionBeforeSidxProcessing != C.INDEX_UNSET) {
+                seekPosition.position = seekPositionBeforeSidxProcessing;
+                seekPositionBeforeSidxProcessing = C.INDEX_UNSET;
+                return Extractor.RESULT_SEEK;
+              } else {
+                reorderingBufferQueue.flush();
+                return Extractor.RESULT_END_OF_INPUT;
+              }
+            }
+            break;
+          case STATE_READING_ATOM_PAYLOAD:
+            readAtomPayload(input);
+            break;
+          case STATE_READING_ENCRYPTION_DATA:
+            readEncryptionData(input);
+            break;
+          default:
+            if (readSample(input)) {
+              return RESULT_CONTINUE;
+            }
+        }
+      }
+    } finally {
+      if (seekPositionBeforeSidxProcessing != C.INDEX_UNSET) {
+        seekPositionBeforeSidxProcessing = C.INDEX_UNSET;
       }
     }
   }
@@ -538,7 +565,8 @@ private void enterReadingAtomHeaderState() {
     atomHeaderBytesRead = 0;
   }
 
-  private boolean readAtomHeader(ExtractorInput input) throws IOException {
+  private boolean readAtomHeader(ExtractorInput input, boolean skipPayloadParsing)
+      throws IOException {
     if (atomHeaderBytesRead == 0) {
       // Read the standard length atom header.
       if (!input.readFully(atomHeader.getData(), 0, Mp4Box.HEADER_SIZE, true)) {
@@ -573,6 +601,10 @@ private boolean readAtomHeader(ExtractorInput input) throws IOException {
           "Atom size less than header length (unsupported).");
     }
 
+    if (skipPayloadParsing) {
+      return true;
+    }
+
     long atomPosition = input.getPosition() - atomHeaderBytesRead;
     if (atomType == Mp4Box.TYPE_moof || atomType == Mp4Box.TYPE_mdat) {
       if (!haveOutputSeekMap) {
@@ -639,7 +671,7 @@ private void readAtomPayload(ExtractorInput input) throws IOException {
     @Nullable ParsableByteArray atomData = this.atomData;
     if (atomData != null) {
       input.readFully(atomData.getData(), Mp4Box.HEADER_SIZE, atomPayloadSize);
-      onLeafAtomRead(new LeafBox(atomType, atomData), input.getPosition());
+      onLeafAtomRead(new LeafBox(atomType, atomData), input);
     } else {
       input.skipFully(atomPayloadSize);
     }
@@ -653,19 +685,52 @@ private void processAtomEnded(long atomEndPosition) throws ParserException {
     enterReadingAtomHeaderState();
   }
 
-  private void onLeafAtomRead(LeafBox leaf, long inputPosition) throws ParserException {
+  private void onLeafAtomRead(LeafBox leaf, ExtractorInput input) throws IOException {
     if (!containerAtoms.isEmpty()) {
       containerAtoms.peek().add(leaf);
     } else if (leaf.type == Mp4Box.TYPE_sidx) {
+      long inputPosition = input.getPosition();
       Pair<Long, ChunkIndex> result = parseSidx(leaf.data, inputPosition);
-      segmentIndexEarliestPresentationTimeUs = result.first;
-      extractorOutput.seekMap(result.second);
-      haveOutputSeekMap = true;
+      chunkIndexMerger.add(result.second);
+      if (!haveOutputSeekMap) {
+        segmentIndexEarliestPresentationTimeUs = result.first;
+        extractorOutput.seekMap(result.second);
+        haveOutputSeekMap = true;
+      } else if ((flags & FLAG_MERGE_FRAGMENTED_SIDX) != 0
+          && !haveOutputSeekMapFromMultipleSidx
+          && chunkIndexMerger.size() > 1) {
+        seekPositionBeforeSidxProcessing = inputPosition;
+        try {
+          processRemainingSidxAtoms(input);
+          haveOutputSeekMapFromMultipleSidx = true;
+        } finally {
+          extractorOutput.seekMap(chunkIndexMerger.merge());
+        }
+      }
     } else if (leaf.type == Mp4Box.TYPE_emsg) {
       onEmsgLeafAtomRead(leaf.data);
     }
   }
 
+  private void processRemainingSidxAtoms(ExtractorInput input) throws IOException {
+    enterReadingAtomHeaderState();
+    while (readAtomHeader(input, /* skipPayloadParsing= */ true)) {
+      if (atomType == Mp4Box.TYPE_sidx) {
+        scratch.reset((int) atomSize);
+        System.arraycopy(atomHeader.getData(), 0, scratch.getData(), 0, Mp4Box.HEADER_SIZE);
+        input.readFully(
+            scratch.getData(), Mp4Box.HEADER_SIZE, (int) (atomSize - atomHeaderBytesRead));
+
+        LeafBox sidxBox = new LeafBox(Mp4Box.TYPE_sidx, scratch);
+        Pair<Long, ChunkIndex> result = parseSidx(sidxBox.data, input.getPeekPosition());
+        chunkIndexMerger.add(result.second);
+      } else {
+        input.skipFully((int) (atomSize - atomHeaderBytesRead), /* allowEndOfInput= */ true);
+      }
+      enterReadingAtomHeaderState();
+    }
+  }
+
   private void onContainerAtomRead(ContainerBox container) throws ParserException {
     if (container.type == Mp4Box.TYPE_moov) {
       onMoovContainerAtomRead(container);

diff --git a/.../src/test/java/androidx/media3/extractor/mp4/FragmentedMp4ExtractorParameterizedTest.java b/.../src/test/java/androidx/media3/extractor/mp4/FragmentedMp4ExtractorParameterizedTest.java
@@ -88,6 +88,22 @@ public void sampleSeekable() throws Exception {
         /* closedCaptionFormats= */ ImmutableList.of(), "media/mp4/sample_fragmented_seekable.mp4");
   }
 
+  @Test
+  public void sampleSeekableWithMultipleSidx() throws Exception {
+    String file = "media/mp4/sample_fragmented_seekable_multiple_sidx.mp4";
+    ExtractorAsserts.assertBehavior(
+        () ->
+            new FragmentedMp4Extractor(
+                /* subtitleParserFactory= */ new DefaultSubtitleParserFactory(),
+                /* flags= */ FragmentedMp4Extractor.FLAG_MERGE_FRAGMENTED_SIDX,
+                /* timestampAdjuster= */ null,
+                /* sideloadedTrack= */ null,
+                /* closedCaptionFormats= */ ImmutableList.of(),
+                /* additionalEmsgTrackOutput= */ null),
+        file,
+        simulationConfig);
+  }
+
   @Test
   public void sampleWithSeiPayloadInputHasNoCaptions() throws Exception {
     // Enabling the CEA-608 track enables SEI payload parsing.