RtpH264Reader.java
/*
* Copyright 2020 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package androidx.media3.exoplayer.rtsp.reader;
import static androidx.media3.common.util.Assertions.checkNotNull;
import static androidx.media3.common.util.Assertions.checkStateNotNull;
import static androidx.media3.common.util.Util.castNonNull;
import static androidx.media3.exoplayer.rtsp.reader.RtpReaderUtils.toSampleTimeUs;
import androidx.media3.common.C;
import androidx.media3.common.ParserException;
import androidx.media3.common.util.Log;
import androidx.media3.common.util.ParsableByteArray;
import androidx.media3.common.util.Util;
import androidx.media3.container.NalUnitUtil;
import androidx.media3.exoplayer.rtsp.RtpPacket;
import androidx.media3.exoplayer.rtsp.RtpPayloadFormat;
import androidx.media3.extractor.ExtractorOutput;
import androidx.media3.extractor.TrackOutput;
import org.checkerframework.checker.nullness.qual.MonotonicNonNull;
import org.checkerframework.checker.nullness.qual.RequiresNonNull;
/** Parses an H264 byte stream carried on RTP packets, and extracts H264 Access Units. */
/* package */ final class RtpH264Reader implements RtpPayloadReader {
private static final String TAG = "RtpH264Reader";
private static final int MEDIA_CLOCK_FREQUENCY = 90_000;
/** Offset of payload data within a FU type A payload. */
private static final int FU_PAYLOAD_OFFSET = 2;
/** Single Time Aggregation Packet type A. */
private static final int RTP_PACKET_TYPE_STAP_A = 24;
/** Fragmentation Unit type A. */
private static final int RTP_PACKET_TYPE_FU_A = 28;
/** IDR NAL unit type. */
private static final int NAL_UNIT_TYPE_IDR = 5;
/** Scratch for Fragmentation Unit RTP packets. */
private final ParsableByteArray fuScratchBuffer;
private final ParsableByteArray nalStartCodeArray =
new ParsableByteArray(NalUnitUtil.NAL_START_CODE);
private final RtpPayloadFormat payloadFormat;
private @MonotonicNonNull TrackOutput trackOutput;
private @C.BufferFlags int bufferFlags;
private long firstReceivedTimestamp;
private int previousSequenceNumber;
/** The combined size of a sample that is fragmented into multiple RTP packets. */
private int fragmentedSampleSizeBytes;
private long startTimeOffsetUs;
/** Creates an instance. */
public RtpH264Reader(RtpPayloadFormat payloadFormat) {
this.payloadFormat = payloadFormat;
fuScratchBuffer = new ParsableByteArray();
firstReceivedTimestamp = C.TIME_UNSET;
previousSequenceNumber = C.INDEX_UNSET;
}
@Override
public void createTracks(ExtractorOutput extractorOutput, int trackId) {
trackOutput = extractorOutput.track(trackId, C.TRACK_TYPE_VIDEO);
castNonNull(trackOutput).format(payloadFormat.format);
}
@Override
public void onReceivingFirstPacket(long timestamp, int sequenceNumber) {}
@Override
public void consume(ParsableByteArray data, long timestamp, int sequenceNumber, boolean rtpMarker)
throws ParserException {
int rtpH264PacketMode;
try {
// RFC6184 Section 5.6, 5.7 and 5.8.
rtpH264PacketMode = data.getData()[0] & 0x1F;
} catch (IndexOutOfBoundsException e) {
throw ParserException.createForMalformedManifest(/* message= */ null, e);
}
checkStateNotNull(trackOutput);
if (rtpH264PacketMode > 0 && rtpH264PacketMode < 24) {
processSingleNalUnitPacket(data);
} else if (rtpH264PacketMode == RTP_PACKET_TYPE_STAP_A) {
processSingleTimeAggregationPacket(data);
} else if (rtpH264PacketMode == RTP_PACKET_TYPE_FU_A) {
processFragmentationUnitPacket(data, sequenceNumber);
} else {
throw ParserException.createForMalformedManifest(
String.format("RTP H264 packetization mode [%d] not supported.", rtpH264PacketMode),
/* cause= */ null);
}
if (rtpMarker) {
if (firstReceivedTimestamp == C.TIME_UNSET) {
firstReceivedTimestamp = timestamp;
}
long timeUs =
toSampleTimeUs(
startTimeOffsetUs, timestamp, firstReceivedTimestamp, MEDIA_CLOCK_FREQUENCY);
trackOutput.sampleMetadata(
timeUs, bufferFlags, fragmentedSampleSizeBytes, /* offset= */ 0, /* cryptoData= */ null);
fragmentedSampleSizeBytes = 0;
}
previousSequenceNumber = sequenceNumber;
}
@Override
public void seek(long nextRtpTimestamp, long timeUs) {
firstReceivedTimestamp = nextRtpTimestamp;
fragmentedSampleSizeBytes = 0;
startTimeOffsetUs = timeUs;
}
// Internal methods.
/**
* Processes Single NAL Unit packet (RFC6184 Section 5.6).
*
* <p>Outputs the single NAL Unit (with start code prepended) to {@link #trackOutput}. Sets {@link
* #bufferFlags} and {@link #fragmentedSampleSizeBytes} accordingly.
*/
@RequiresNonNull("trackOutput")
private void processSingleNalUnitPacket(ParsableByteArray data) {
// Example of a Single Nal Unit packet
// 0 1 2 3
// 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
// +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
// |F|NRI| Type | |
// +-+-+-+-+-+-+-+-+ |
// | |
// | Bytes 2..n of a single NAL unit |
// | |
// | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
// | :...OPTIONAL RTP padding |
// +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
int numBytesInData = data.bytesLeft();
fragmentedSampleSizeBytes += writeStartCode();
trackOutput.sampleData(data, numBytesInData);
fragmentedSampleSizeBytes += numBytesInData;
int nalHeaderType = data.getData()[0] & 0x1F;
bufferFlags = getBufferFlagsFromNalType(nalHeaderType);
}
/**
* Processes STAP Type A packet (RFC6184 Section 5.7).
*
* <p>Outputs the received aggregation units (with start code prepended) to {@link #trackOutput}.
* Sets {@link #bufferFlags} and {@link #fragmentedSampleSizeBytes} accordingly.
*/
@RequiresNonNull("trackOutput")
private void processSingleTimeAggregationPacket(ParsableByteArray data) {
// Example of an STAP-A packet.
// 0 1 2 3
// 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
// +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
// | RTP Header |
// +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
// |STAP-A NAL HDR | NALU 1 Size | NALU 1 HDR |
// +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
// | NALU 1 Data |
// : :
// + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
// | | NALU 2 Size | NALU 2 HDR |
// +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
// | NALU 2 Data |
// : :
// | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
// | :...OPTIONAL RTP padding |
// +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
// Skips STAP-A NAL HDR that has the NAL format |F|NRI|Type|, but with Type replaced by the
// STAP-A type id (RTP_PACKET_TYPE_STAP_A).
data.readUnsignedByte();
// Gets all NAL units until the remaining bytes are only enough to store an RTP padding.
int nalUnitLength;
while (data.bytesLeft() > 4) {
nalUnitLength = data.readUnsignedShort();
fragmentedSampleSizeBytes += writeStartCode();
trackOutput.sampleData(data, nalUnitLength);
fragmentedSampleSizeBytes += nalUnitLength;
}
// Treat Aggregated NAL units as non key frames.
bufferFlags = 0;
}
/**
* Processes Fragmentation Unit Type A packet (RFC6184 Section 5.8).
*
* <p>This method will be invoked multiple times to receive a single frame that is broken down
* into a series of fragmentation units in multiple RTP packets.
*
* <p>Outputs the received fragmentation units (with start code prepended) to {@link
* #trackOutput}. Sets {@link #bufferFlags} and {@link #fragmentedSampleSizeBytes} accordingly.
*/
@RequiresNonNull("trackOutput")
private void processFragmentationUnitPacket(ParsableByteArray data, int packetSequenceNumber) {
// FU-A mode packet layout.
// 0 1 2 3
// 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
// +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
// | FU indicator | FU header | |
// +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ |
// | |
// | FU payload |
// | |
// | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
// | :...OPTIONAL RTP padding |
// +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
//
// FU Indicator FU Header
// 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7
// +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
// |F|NRI| Type |S|E|R| Type |
// +---------------+---------------+
// Indicator: Upper 3 bits are the same as NALU header, Type = 28 (FU-A type).
// Header: Start/End/Reserved/Type. Type is same as NALU type.
int fuIndicator = data.getData()[0];
int fuHeader = data.getData()[1];
int nalHeader = (fuIndicator & 0xE0) | (fuHeader & 0x1F);
boolean isFirstFuPacket = (fuHeader & 0x80) > 0;
boolean isLastFuPacket = (fuHeader & 0x40) > 0;
if (isFirstFuPacket) {
// Prepends starter code.
fragmentedSampleSizeBytes += writeStartCode();
// The bytes needed is 1 (NALU header) + payload size. The original data array has size 2 (FU
// indicator/header) + payload size. Thus setting the correct header and set position to 1.
data.getData()[1] = (byte) nalHeader;
fuScratchBuffer.reset(data.getData());
fuScratchBuffer.setPosition(1);
} else {
// Check that this packet is in the sequence of the previous packet.
int expectedSequenceNumber = RtpPacket.getNextSequenceNumber(previousSequenceNumber);
if (packetSequenceNumber != expectedSequenceNumber) {
Log.w(
TAG,
Util.formatInvariant(
"Received RTP packet with unexpected sequence number. Expected: %d; received: %d."
+ " Dropping packet.",
expectedSequenceNumber, packetSequenceNumber));
return;
}
// Setting position to ignore FU indicator and header.
fuScratchBuffer.reset(data.getData());
fuScratchBuffer.setPosition(FU_PAYLOAD_OFFSET);
}
int fragmentSize = fuScratchBuffer.bytesLeft();
trackOutput.sampleData(fuScratchBuffer, fragmentSize);
fragmentedSampleSizeBytes += fragmentSize;
if (isLastFuPacket) {
bufferFlags = getBufferFlagsFromNalType(nalHeader & 0x1F);
}
}
private int writeStartCode() {
nalStartCodeArray.setPosition(/* position= */ 0);
int bytesWritten = nalStartCodeArray.bytesLeft();
checkNotNull(trackOutput).sampleData(nalStartCodeArray, bytesWritten);
return bytesWritten;
}
private static @C.BufferFlags int getBufferFlagsFromNalType(int nalType) {
return nalType == NAL_UNIT_TYPE_IDR ? C.BUFFER_FLAG_KEY_FRAME : 0;
}
}