/*
* Copyright 2022 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package androidx.media3.muxer;
import static androidx.media3.common.util.Assertions.checkArgument;
import static androidx.media3.common.util.Assertions.checkNotNull;
import static androidx.media3.common.util.Assertions.checkState;
import static androidx.media3.muxer.ColorUtils.MEDIAFORMAT_STANDARD_TO_PRIMARIES_AND_MATRIX;
import static androidx.media3.muxer.ColorUtils.MEDIAFORMAT_TRANSFER_TO_MP4_TRANSFER;
import static androidx.media3.muxer.Mp4Utils.MVHD_TIMEBASE;
import android.media.MediaCodec;
import androidx.annotation.Nullable;
import androidx.media3.common.C;
import androidx.media3.common.ColorInfo;
import androidx.media3.common.Format;
import androidx.media3.common.MimeTypes;
import androidx.media3.common.util.Util;
import androidx.media3.container.NalUnitUtil;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.Iterables;
import com.google.common.primitives.Bytes;
import java.nio.ByteBuffer;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
import java.util.Locale;
/**
* Writes out various types of boxes as per MP4 (ISO/IEC 14496-12) standards.
*
* <p>Boxes do not construct their sub-boxes but take them as input {@linkplain ByteBuffer byte
* buffers}.
*/
/* package */ final class Boxes {
private Boxes() {}
public static final ImmutableList<Byte> XMP_UUID =
ImmutableList.of(
(byte) 0xBE,
(byte) 0x7A,
(byte) 0xCF,
(byte) 0xCB,
(byte) 0x97,
(byte) 0xA9,
(byte) 0x42,
(byte) 0xE8,
(byte) 0x9C,
(byte) 0x71,
(byte) 0x99,
(byte) 0x94,
(byte) 0x91,
(byte) 0xE3,
(byte) 0xAF,
(byte) 0xAC);
/**
* Returns the tkhd box.
*
* <p>This is a per-track header box.
*/
public static ByteBuffer tkhd(
int trackId,
int trackDurationVu,
long modificationDateUnixMs,
int orientation,
Format format) {
ByteBuffer contents = ByteBuffer.allocate(Mp4Utils.MAX_FIXED_LEAF_BOX_SIZE);
contents.putInt(0x00000007); // version and flags; allow presentation, etc.
contents.putInt(toMp4Time(modificationDateUnixMs)); // creation_time
contents.putInt(toMp4Time(modificationDateUnixMs)); // modification_time
contents.putInt(trackId);
contents.putInt(0); // reserved
contents.putInt(trackDurationVu);
contents.putInt(0); // reserved
contents.putInt(0); // reserved
contents.putInt(0); // layer = 0 and alternate_group = 0
contents.putShort(MimeTypes.isAudio(format.sampleMimeType) ? (short) 0x0100 : 0); // volume
contents.putShort((short) 0); // reserved
contents.put(rotationMatrixFromOrientation(orientation));
int width = format.width != Format.NO_VALUE ? format.width : 0;
int height = format.height != Format.NO_VALUE ? format.height : 0;
contents.putInt(width << 16);
contents.putInt(height << 16);
contents.flip();
return BoxUtils.wrapIntoBox("tkhd", contents);
}
/**
* Returns the mvhd box.
*
* <p>This is the movie header for the entire MP4 file.
*/
public static ByteBuffer mvhd(
int nextEmptyTrackId, long modificationDateUnixMs, long videoDurationUs) {
ByteBuffer contents = ByteBuffer.allocate(Mp4Utils.MAX_FIXED_LEAF_BOX_SIZE);
contents.putInt(0); // version and flags
contents.putInt(toMp4Time(modificationDateUnixMs)); // creation_time
contents.putInt(toMp4Time(modificationDateUnixMs)); // modification_time
contents.putInt((int) MVHD_TIMEBASE); // The per-track timescales might be different.
contents.putInt(
(int) Mp4Utils.vuFromUs(videoDurationUs, MVHD_TIMEBASE)); // Duration of the entire video.
contents.putInt(0x00010000); // rate = 1.0
contents.putShort((short) 0x0100); // volume = full volume
contents.putShort((short) 0); // reserved
contents.putInt(0); // reserved
contents.putInt(0); // reserved
// Default values (unity matrix). It looks like that this needs to be an identity matrix, since
// some players will apply both this and the per-track transformation, while some only go with
// the per-track one.
int[] matrix = {0x00010000, 0, 0, 0, 0x00010000, 0, 0, 0, 0x40000000};
for (int i = 0; i < matrix.length; i++) {
contents.putInt(matrix[i]);
}
for (int i = 0; i < 6; i++) {
contents.putInt(0); // pre_defined
}
// Next empty track id.
contents.putInt(nextEmptyTrackId);
contents.flip();
return BoxUtils.wrapIntoBox("mvhd", contents);
}
/**
* Returns the mdhd box.
*
* <p>This is a per-track (media) header.
*/
public static ByteBuffer mdhd(
long trackDurationVu,
int videoUnitTimebase,
long modificationDateUnixMs,
@Nullable String languageCode) {
ByteBuffer contents = ByteBuffer.allocate(Mp4Utils.MAX_FIXED_LEAF_BOX_SIZE);
contents.putInt(0x0); // version and flags
contents.putInt(toMp4Time(modificationDateUnixMs)); // creation_time
contents.putInt(toMp4Time(modificationDateUnixMs)); // modification_time
contents.putInt(videoUnitTimebase);
contents.putInt((int) trackDurationVu);
contents.putShort(languageCodeFromString(languageCode));
contents.putShort((short) 0);
contents.flip();
return BoxUtils.wrapIntoBox("mdhd", contents);
}
/**
* Returns the vmhd box.
*
* <p>This is a header for video tracks.
*/
public static ByteBuffer vmhd() {
ByteBuffer contents = ByteBuffer.allocate(Mp4Utils.MAX_FIXED_LEAF_BOX_SIZE);
contents.putInt(0x0); // version and flags
contents.putShort((short) 0); // graphicsmode
// opcolor (red, green, blue)
contents.putShort((short) 0);
contents.putShort((short) 0);
contents.putShort((short) 0);
contents.flip();
return BoxUtils.wrapIntoBox("vmhd", contents);
}
/**
* Returns the smhd box.
*
* <p>This is a header for audio tracks.
*/
public static ByteBuffer smhd() {
ByteBuffer contents = ByteBuffer.allocate(Mp4Utils.MAX_FIXED_LEAF_BOX_SIZE);
contents.putInt(0x0); // version and flags
contents.putShort((short) 0); // balance
contents.putShort((short) 0); // reserved
contents.flip();
return BoxUtils.wrapIntoBox("smhd", contents);
}
/**
* Returns the nmhd box.
*
* <p>This is a header for metadata tracks.
*/
public static ByteBuffer nmhd() {
ByteBuffer contents = ByteBuffer.allocate(Mp4Utils.MAX_FIXED_LEAF_BOX_SIZE);
contents.putInt(0x0); // version and flags
contents.flip();
return BoxUtils.wrapIntoBox("nmhd", contents);
}
/**
* Returns a text metadata sample entry box as per ISO/IEC 14496-12: 8.5.2.2.
*
* <p>This contains the sample entry (to be placed within the sample description box) for the text
* metadata tracks.
*/
public static ByteBuffer textMetaDataSampleEntry(Format format) {
ByteBuffer contents = ByteBuffer.allocate(Mp4Utils.MAX_FIXED_LEAF_BOX_SIZE);
String mimeType = checkNotNull(format.sampleMimeType);
byte[] mimeBytes = Util.getUtf8Bytes(mimeType);
contents.put(mimeBytes); // content_encoding
contents.put((byte) 0x00);
contents.put(mimeBytes); // mime_format
contents.put((byte) 0x00);
contents.flip();
return BoxUtils.wrapIntoBox("mett", contents);
}
/** Returns the minf (media info) box. */
public static ByteBuffer minf(ByteBuffer... subBoxes) {
return BoxUtils.wrapBoxesIntoBox("minf", Arrays.asList(subBoxes));
}
/** Returns the dref (data references) box. */
public static ByteBuffer dref(ByteBuffer... dataLocationBoxes) {
// We have a "number of contained boxes" field; let's pretend this is also a box so that
// wrapBoxesIntoBoxes() can concatenate it with the rest.
ByteBuffer header = ByteBuffer.allocate(8);
header.putInt(0);
header.putInt(dataLocationBoxes.length);
header.flip();
List<ByteBuffer> contents = new ArrayList<>();
contents.add(header);
Collections.addAll(contents, dataLocationBoxes);
return BoxUtils.wrapBoxesIntoBox("dref", contents);
}
/** Returns the dinf (data information) box. */
public static ByteBuffer dinf(ByteBuffer dref) {
return BoxUtils.wrapIntoBox("dinf", dref);
}
/**
* Returns the url box.
*
* <p>This box declares the location of media data (whether it is in this file or in some other
* remote file).
*/
public static ByteBuffer localUrl() {
ByteBuffer contents = ByteBuffer.allocate(4);
// Flag indicating that the data is in fact in this very file instead of a remote
// URL. Accordingly, no actual URL string is present.
contents.putInt(1);
// Since we set the flag to 1, no actual URL needs to follow.
contents.flip();
return BoxUtils.wrapIntoBox("url ", contents);
}
/**
* Returns the hdlr box.
*
* <p>This box includes tha handler specification for a track (signals whether this is video,
* audio or metadata).
*
* @param handlerType The handle type, as defined in ISO/IEC 14496-12: 8.4.3.3.
* @param handlerName The handler name, a human-readable name to identify track type for debugging
* and inspection purposes.
* @return {@link ByteBuffer} containing the hdlr box.
*/
public static ByteBuffer hdlr(String handlerType, String handlerName) {
ByteBuffer contents = ByteBuffer.allocate(Mp4Utils.MAX_FIXED_LEAF_BOX_SIZE);
contents.putInt(0x0); // version and flags.
contents.putInt(0); // pre_defined.
contents.put(Util.getUtf8Bytes(handlerType)); // handler_type.
contents.putInt(0); // reserved.
contents.putInt(0); // reserved.
contents.putInt(0); // reserved.
contents.put(Util.getUtf8Bytes(handlerName)); // name.
contents.put((byte) 0); // The null terminator for name.
contents.flip();
return BoxUtils.wrapIntoBox("hdlr", contents);
}
/**
* Returns the mdia box.
*
* <p>This box describes the media format of a track.
*/
public static ByteBuffer mdia(ByteBuffer... subBoxes) {
return BoxUtils.wrapBoxesIntoBox("mdia", Arrays.asList(subBoxes));
}
/**
* Returns the trak box.
*
* <p>This is a top level track descriptor box; each track has one.
*/
public static ByteBuffer trak(ByteBuffer... subBoxes) {
return BoxUtils.wrapBoxesIntoBox("trak", Arrays.asList(subBoxes));
}
/**
* Returns the udta box.
*
* <p>This box contains user data like location info.
*/
public static ByteBuffer udta(@Nullable Mp4Location location) {
// We can just omit the entire box if there is no location info available.
if (location == null) {
return ByteBuffer.allocate(0);
}
String locationString =
String.format(Locale.US, "%+.4f%+.4f/", location.latitude, location.longitude);
ByteBuffer xyzBoxContents = ByteBuffer.allocate(locationString.length() + 2 + 2);
xyzBoxContents.putShort((short) (xyzBoxContents.capacity() - 4));
xyzBoxContents.putShort((short) 0x15C7); // language code?
xyzBoxContents.put(Util.getUtf8Bytes(locationString));
checkState(xyzBoxContents.limit() == xyzBoxContents.capacity());
xyzBoxContents.flip();
return BoxUtils.wrapIntoBox(
"udta",
BoxUtils.wrapIntoBox(
new byte[] {
(byte) 0xA9, // copyright symbol
'x',
'y',
'z'
},
xyzBoxContents));
}
/**
* Returns the keys box.
*
* <p>This box contains a list of metadata keys.
*/
public static ByteBuffer keys(List<String> keyNames) {
// This should be an adaptive size here; we don't yet care since it's usually small.
ByteBuffer contents = ByteBuffer.allocate(Mp4Utils.MAX_FIXED_LEAF_BOX_SIZE);
contents.putInt(0x0); // version and flags.
contents.putInt(keyNames.size()); // num of entries
for (int i = 0; i < keyNames.size(); i++) {
ByteBuffer keyNameBuffer = ByteBuffer.wrap(Util.getUtf8Bytes(keyNames.get(i)));
contents.put(BoxUtils.wrapIntoBox("mdta", keyNameBuffer));
}
contents.flip();
return BoxUtils.wrapIntoBox("keys", contents);
}
/**
* Returns the ilst box.
*
* <p>This box contains a list of metadata values.
*/
public static ByteBuffer ilst(List<Object> values) {
// This should be an adaptive size here; we don't yet care since it's usually small.
ByteBuffer contents = ByteBuffer.allocate(Mp4Utils.MAX_FIXED_LEAF_BOX_SIZE);
for (int i = 0; i < values.size(); i++) {
int keyId = i + 1;
Object value = values.get(i);
ByteBuffer valueContents;
if (value instanceof String) {
String valueString = (String) value;
byte[] valueBytes = Util.getUtf8Bytes(valueString);
valueContents = ByteBuffer.allocate(valueBytes.length + 8);
valueContents.putInt(1); // type code for UTF-8 string
valueContents.putInt(0); // default country / language
valueContents.put(valueBytes);
} else if (value instanceof Float) {
valueContents = ByteBuffer.allocate(12);
valueContents.putInt(23); // float32
valueContents.putInt(0); // language / country
valueContents.putFloat((float) value);
} else {
throw new IllegalArgumentException("Unknown metadata type: " + value.getClass());
}
valueContents.flip();
ByteBuffer valueBox = BoxUtils.wrapIntoBox("data", valueContents);
contents.putInt(valueBox.remaining() + 8);
contents.putInt(keyId);
contents.put(valueBox);
}
contents.flip();
return BoxUtils.wrapIntoBox("ilst", contents);
}
/** Returns the meta (metadata) box. */
public static ByteBuffer meta(ByteBuffer... subBoxes) {
return BoxUtils.wrapBoxesIntoBox("meta", Arrays.asList(subBoxes));
}
/**
* Returns the uuid box.
*
* <p>This box is used for XMP and other metadata.
*/
public static ByteBuffer uuid(List<Byte> uuid, ByteBuffer contents) {
checkArgument(contents.remaining() > 0);
return BoxUtils.wrapBoxesIntoBox(
"uuid", ImmutableList.of(ByteBuffer.wrap(Bytes.toArray(uuid)), contents));
}
/**
* Returns the moov box.
*
* <p>This box is a top level movie descriptor box (there is a single one of this per Mp4 file).
*/
public static ByteBuffer moov(
ByteBuffer mvhdBox,
ByteBuffer udtaBox,
ByteBuffer metaBox,
List<ByteBuffer> trakBoxes,
ByteBuffer mvexBox) {
List<ByteBuffer> subBoxes = new ArrayList<>();
subBoxes.add(mvhdBox);
subBoxes.add(udtaBox);
subBoxes.add(metaBox);
subBoxes.addAll(trakBoxes);
subBoxes.add(mvexBox);
return BoxUtils.wrapBoxesIntoBox("moov", subBoxes);
}
/** Returns an audio sample entry box based on the MIME type. */
public static ByteBuffer audioSampleEntry(Format format) {
String mimeType = checkNotNull(format.sampleMimeType);
checkArgument(mimeType.equals(MimeTypes.AUDIO_AAC), "Unsupported audio format: " + mimeType);
String fourcc = "mp4a";
checkArgument(!format.initializationData.isEmpty(), "csd-0 not found in the format.");
ByteBuffer csd0 = ByteBuffer.wrap(format.initializationData.get(0));
ByteBuffer contents = ByteBuffer.allocate(csd0.limit() + Mp4Utils.MAX_FIXED_LEAF_BOX_SIZE);
contents.putInt(0x00); // reserved
contents.putShort((short) 0x0); // reserved
contents.putShort((short) 0x1); // data ref index
contents.putInt(0x00); // reserved
contents.putInt(0x00); // reserved
int channelCount = format.channelCount;
contents.putShort((short) channelCount);
contents.putShort((short) 16); // sample size
contents.putShort((short) 0x0); // predefined
contents.putShort((short) 0x0); // reserved
int sampleRate = format.sampleRate;
contents.putInt(sampleRate << 16);
contents.put(audioEsdsBox(format));
contents.flip();
return BoxUtils.wrapIntoBox(fourcc, contents);
}
/** Returns a codec specific box. */
public static ByteBuffer codecSpecificBox(Format format) {
String mimeType = checkNotNull(format.sampleMimeType);
switch (mimeType) {
case "video/avc":
return avcCBox(format);
case "video/hevc":
return hvcCBox(format);
case "video/av01":
return av1CBox(format);
default:
throw new IllegalArgumentException("Unsupported video format: " + mimeType);
}
}
/**
* Returns a {@code VisualSampleEntry} box based upon the MIME type.
*
* <p>The {@code VisualSampleEntry} schema is defined in ISO/IEC 14496-12: 8.5.2.2.
*/
public static ByteBuffer videoSampleEntry(Format format) {
ByteBuffer codecSpecificBox = codecSpecificBox(format);
String fourcc = codecSpecificFourcc(format);
ByteBuffer contents =
ByteBuffer.allocate(Mp4Utils.MAX_FIXED_LEAF_BOX_SIZE + codecSpecificBox.limit());
// reserved = 0 (6 bytes)
contents.putInt(0);
contents.putShort((short) 0);
contents.putShort((short) 1); // data_reference_index
contents.putShort((short) 0); // pre_defined
contents.putShort((short) 0); // reserved
// pre_defined
contents.putInt(0);
contents.putInt(0);
contents.putInt(0);
contents.putShort(format.width != Format.NO_VALUE ? (short) format.width : 0);
contents.putShort(format.height != Format.NO_VALUE ? (short) format.height : 0);
contents.putInt(0x00480000); // horizresolution = 72 dpi
contents.putInt(0x00480000); // vertresolution = 72 dpi
contents.putInt(0); // reserved
contents.putShort((short) 1); // frame_count
// compressorname
contents.putLong(0);
contents.putLong(0);
contents.putLong(0);
contents.putLong(0);
contents.putShort((short) 0x0018); // depth
contents.putShort((short) -1); // pre_defined
contents.put(codecSpecificBox);
contents.put(paspBox());
// Put in a "colr" box if any of the three color format parameters has a non-default (0) value.
// TODO: b/278101856 - Only null check should be enough once we disallow invalid values.
if (format.colorInfo != null
&& (format.colorInfo.colorSpace != 0
|| format.colorInfo.colorTransfer != 0
|| format.colorInfo.colorRange != 0)) {
contents.put(colrBox(format.colorInfo));
}
contents.flip();
return BoxUtils.wrapIntoBox(fourcc, contents);
}
/**
* Converts sample presentation times (in microseconds) to sample durations (in timebase units)
* that will go into the stts box.
*
* <p>ISO/IEC 14496-12: 8.6.1.3.1 recommends each track starts at 0. Therefore, the first sample
* presentation timestamp is set to 0 and the duration of that sample may be larger as a result.
*
* @param writtenSamples All the written samples.
* @param minInputPresentationTimestampUs The global minimum presentation timestamp which needs to
* be subtracted from each sample's presentation timestamp.
* @param videoUnitTimescale The timescale of the track.
* @param lastDurationBehavior The behaviour for the last sample duration.
* @return A list of all the sample durations.
*/
// TODO: b/280084657 - Add support for setting last sample duration.
public static List<Long> durationsVuForStts(
List<MediaCodec.BufferInfo> writtenSamples,
long minInputPresentationTimestampUs,
int videoUnitTimescale,
@Mp4Muxer.LastFrameDurationBehavior int lastDurationBehavior) {
List<Long> durationsVu = new ArrayList<>();
long currentTimeVu = 0L;
for (int sampleId = 0; sampleId < writtenSamples.size(); sampleId++) {
long samplePtsUs = writtenSamples.get(sampleId).presentationTimeUs;
long sampleSpanEndsAtUs =
sampleId == writtenSamples.size() - 1
? samplePtsUs
: writtenSamples.get(sampleId + 1).presentationTimeUs;
sampleSpanEndsAtUs -= minInputPresentationTimestampUs;
long sampleSpanEndsAtVu = Mp4Utils.vuFromUs(sampleSpanEndsAtUs, videoUnitTimescale);
long durationVu = sampleSpanEndsAtVu - currentTimeVu;
currentTimeVu = sampleSpanEndsAtVu;
if (durationVu >= Integer.MAX_VALUE) {
throw new IllegalArgumentException(
String.format(Locale.US, "Timestamp delta %d doesn't fit into an int", durationVu));
}
durationsVu.add(durationVu);
}
adjustLastSampleDuration(durationsVu, lastDurationBehavior);
return durationsVu;
}
/** Generates the stts (decoding time to sample) box. */
public static ByteBuffer stts(List<Long> durationsVu) {
ByteBuffer contents =
ByteBuffer.allocate(durationsVu.size() * 8 + Mp4Utils.MAX_FIXED_LEAF_BOX_SIZE);
contents.putInt(0x0); // version and flags.
// We will know total entry count only after processing all the sample durations, so put in a
// placeholder for total entry count and store its index.
int totalEntryCountIndex = contents.position();
contents.putInt(0x0); // entry_count.
int totalEntryCount = 0;
long lastDurationVu = -1L;
int lastSampleCountIndex = -1;
// Note that the framework MediaMuxer adjust time deltas within plus-minus 100 us, so that
// samples have repeating duration values. It saves few entries in the table.
for (int i = 0; i < durationsVu.size(); i++) {
long durationVu = durationsVu.get(i);
if (lastDurationVu != durationVu) {
lastDurationVu = durationVu;
lastSampleCountIndex = contents.position();
// sample_count; this will be updated instead of adding a new entry if the next sample has
// the same duration.
contents.putInt(1);
contents.putInt((int) durationVu); // sample_delta.
totalEntryCount++;
} else {
contents.putInt(lastSampleCountIndex, contents.getInt(lastSampleCountIndex) + 1);
}
}
contents.putInt(totalEntryCountIndex, totalEntryCount);
contents.flip();
return BoxUtils.wrapIntoBox("stts", contents);
}
/** Returns the stsz (sample size) box. */
public static ByteBuffer stsz(List<MediaCodec.BufferInfo> writtenSamples) {
ByteBuffer contents =
ByteBuffer.allocate(writtenSamples.size() * 4 + Mp4Utils.MAX_FIXED_LEAF_BOX_SIZE);
contents.putInt(0x0); // version and flags.
// TODO: b/270583563 - Consider optimizing for identically-sized samples.
// sample_size; specifying the default sample size. Set to zero to indicate that the samples
// have different sizes and they are stored in the sample size table.
contents.putInt(0);
contents.putInt(writtenSamples.size()); // sample_count.
for (int i = 0; i < writtenSamples.size(); i++) {
contents.putInt(writtenSamples.get(i).size);
}
contents.flip();
return BoxUtils.wrapIntoBox("stsz", contents);
}
/** Returns the stsc (sample to chunk) box. */
public static ByteBuffer stsc(List<Integer> writtenChunkSampleCounts) {
ByteBuffer contents =
ByteBuffer.allocate(
writtenChunkSampleCounts.size() * 12 + Mp4Utils.MAX_FIXED_LEAF_BOX_SIZE);
contents.putInt(0x0); // version and flags.
contents.putInt(writtenChunkSampleCounts.size()); // entry_count.
int currentChunk = 1;
// TODO: b/270583563 - Consider optimizing for consecutive chunks having same number of samples.
for (int i = 0; i < writtenChunkSampleCounts.size(); i++) {
int samplesInChunk = writtenChunkSampleCounts.get(i);
contents.putInt(currentChunk); // first_chunk.
contents.putInt(samplesInChunk); // samples_per_chunk.
// sample_description_index; we have only one sample description in each track.
contents.putInt(1);
currentChunk += 1;
}
contents.flip();
return BoxUtils.wrapIntoBox("stsc", contents);
}
/** Returns the co64 (chunk offset) box. */
public static ByteBuffer co64(List<Long> writtenChunkOffsets) {
ByteBuffer contents =
ByteBuffer.allocate(writtenChunkOffsets.size() * 8 + Mp4Utils.MAX_FIXED_LEAF_BOX_SIZE);
contents.putInt(0x0); // version.
contents.putInt(writtenChunkOffsets.size()); // entry_count.
for (int i = 0; i < writtenChunkOffsets.size(); i++) {
contents.putLong(writtenChunkOffsets.get(i)); // chunk_offset.
}
contents.flip();
return BoxUtils.wrapIntoBox("co64", contents);
}
/** Returns the stss (sync sample) box. */
public static ByteBuffer stss(List<MediaCodec.BufferInfo> writtenSamples) {
ByteBuffer contents =
ByteBuffer.allocate(writtenSamples.size() * 4 + Mp4Utils.MAX_FIXED_LEAF_BOX_SIZE);
contents.putInt(0x0); // version and flags.
// We will know total entry count only after processing all the sample, so put in a placeholder
// for total entry count and store its index.
int totalEntryCountIndex = contents.position();
contents.putInt(writtenSamples.size()); // entry_count.
int currentSampleNumber = 1;
int totalKeyFrames = 0;
for (int i = 0; i < writtenSamples.size(); i++) {
MediaCodec.BufferInfo info = writtenSamples.get(i);
if ((info.flags & MediaCodec.BUFFER_FLAG_KEY_FRAME) > 0) {
contents.putInt(currentSampleNumber); // sample_number.
totalKeyFrames++;
}
currentSampleNumber++;
}
contents.putInt(totalEntryCountIndex, totalKeyFrames);
contents.flip();
return BoxUtils.wrapIntoBox("stss", contents);
}
/** Returns the stsd (sample description) box. */
public static ByteBuffer stsd(ByteBuffer sampleEntryBox) {
ByteBuffer contents =
ByteBuffer.allocate(sampleEntryBox.limit() + Mp4Utils.MAX_FIXED_LEAF_BOX_SIZE);
contents.putInt(0x0); // version and flags.
contents.putInt(1); // entry_count, We have only one sample description in each track.
contents.put(sampleEntryBox);
contents.flip();
return BoxUtils.wrapIntoBox("stsd", contents);
}
/** Returns the stbl (sample table) box. */
public static ByteBuffer stbl(ByteBuffer... subBoxes) {
return BoxUtils.wrapBoxesIntoBox("stbl", Arrays.asList(subBoxes));
}
/** Creates the ftyp box. */
public static ByteBuffer ftyp() {
List<ByteBuffer> boxBytes = new ArrayList<>();
String majorVersion = "isom";
boxBytes.add(ByteBuffer.wrap(Util.getUtf8Bytes(majorVersion)));
int minorVersion = 0x020000;
ByteBuffer minorBytes = ByteBuffer.allocate(4);
minorBytes.putInt(minorVersion);
minorBytes.flip();
boxBytes.add(minorBytes);
String[] compatibleBrands = {"isom", "iso2", "mp41"};
for (String compatibleBrand : compatibleBrands) {
boxBytes.add(ByteBuffer.wrap(Util.getUtf8Bytes(compatibleBrand)));
}
return BoxUtils.wrapBoxesIntoBox("ftyp", boxBytes);
}
/** Adjusts the duration of the very last sample if needed. */
private static void adjustLastSampleDuration(
List<Long> durationsToBeAdjustedVu, @Mp4Muxer.LastFrameDurationBehavior int behavior) {
// Technically, MP4 files store not timestamps but frame durations. Thus, if we interpret
// timestamps as the start of frames then it's not obvious what's the duration of the very
// last frame should be. If our samples follow each other in roughly regular intervals (e.g. in
// a normal, 30 fps video), it makes sense to assume that the last sample will last the same ~33
// ms as the all the other ones before. On the other hand, if we have just a few, irregularly
// spaced frames, with duplication, the entire duration of the video will increase, creating
// abnormal gaps.
if (durationsToBeAdjustedVu.size() <= 2) {
// Nothing to duplicate if there are 0 or 1 entries.
return;
}
switch (behavior) {
case Mp4Muxer.LAST_FRAME_DURATION_BEHAVIOR_DUPLICATE_PREV_DURATION:
// This is the default MediaMuxer behavior: the last sample duration is a copy of the
// previous sample duration.
durationsToBeAdjustedVu.set(
durationsToBeAdjustedVu.size() - 1,
durationsToBeAdjustedVu.get(durationsToBeAdjustedVu.size() - 2));
break;
case Mp4Muxer.LAST_FRAME_DURATION_BEHAVIOR_INSERT_SHORT_FRAME:
// Keep the last sample duration as short as possible.
checkState(Iterables.getLast(durationsToBeAdjustedVu) == 0L);
break;
default:
throw new IllegalArgumentException(
"Unexpected value for the last frame duration behavior " + behavior);
}
}
/** Returns the avcC box as per ISO/IEC 14496-15: 5.3.3.1.2. */
private static ByteBuffer avcCBox(Format format) {
checkArgument(
format.initializationData.size() >= 2, "csd-0 and/or csd-1 not found in the format.");
ByteBuffer csd0 = ByteBuffer.wrap(format.initializationData.get(0));
ByteBuffer csd1 = ByteBuffer.wrap(format.initializationData.get(1));
ByteBuffer contents =
ByteBuffer.allocate(csd0.limit() + csd1.limit() + Mp4Utils.MAX_FIXED_LEAF_BOX_SIZE);
contents.put((byte) 0x01); // configurationVersion
ImmutableList<ByteBuffer> csd0NalUnits = AnnexBUtils.findNalUnits(csd0);
checkArgument(csd0NalUnits.size() == 1, "SPS data not found in csd0.");
ByteBuffer sps = csd0NalUnits.get(0);
byte[] spsData = new byte[sps.remaining()];
sps.get(spsData);
sps.rewind();
NalUnitUtil.SpsData h264SpsData =
NalUnitUtil.parseSpsNalUnit(spsData, /* nalOffset= */ 0, spsData.length);
contents.put((byte) h264SpsData.profileIdc); // AVCProfileIndication
contents.put((byte) h264SpsData.constraintsFlagsAndReservedZero2Bits); // profile_compatibility
contents.put((byte) h264SpsData.levelIdc); // AVCLevelIndication
contents.put((byte) 0xFF); // 6 bits reserved ('0b111111') + 2 bits lengthSizeMinusOne (3)
contents.put((byte) 0xE1); // 3 bits reserved ('0b111') + 5 bits numOfSequenceParameterSets (1)
contents.putShort((short) sps.remaining()); // sequenceParameterSetLength
contents.put(sps); // sequenceParameterSetNALUnit
sps.rewind();
ImmutableList<ByteBuffer> csd1NalUnits = AnnexBUtils.findNalUnits(csd1);
checkState(csd1NalUnits.size() == 1, "PPS data not found in csd1.");
contents.put((byte) 0x01); // numOfPictureParameterSets
ByteBuffer pps = csd1NalUnits.get(0);
contents.putShort((short) pps.remaining()); // pictureParameterSetLength
contents.put(pps); // pictureParameterSetNALUnit
pps.rewind();
contents.flip();
return BoxUtils.wrapIntoBox("avcC", contents);
}
/** Returns the hvcC box as per ISO/IEC 14496-15: 8.3.3.1.2. */
private static ByteBuffer hvcCBox(Format format) {
// For H.265, all three codec-specific NALUs (VPS, SPS, PPS) are packed into csd-0.
checkArgument(!format.initializationData.isEmpty(), "csd-0 not found in the format.");
ByteBuffer csd0 = ByteBuffer.wrap(format.initializationData.get(0));
ByteBuffer contents = ByteBuffer.allocate(csd0.limit() + Mp4Utils.MAX_FIXED_LEAF_BOX_SIZE);
ImmutableList<ByteBuffer> nalusWithEmulationPrevention = AnnexBUtils.findNalUnits(csd0);
// Remove emulation prevention bytes to parse the actual csd-0 data.
// For storing the csd-0 data into MP4 file, use original NALUs with emulation prevention bytes.
List<ByteBuffer> nalusWithoutEmulationPrevention = new ArrayList<>();
for (int i = 0; i < nalusWithEmulationPrevention.size(); i++) {
nalusWithoutEmulationPrevention.add(
AnnexBUtils.stripEmulationPrevention(nalusWithEmulationPrevention.get(i)));
}
contents.put((byte) 0x01); // configurationVersion
// Assuming that VPS, SPS and PPS are in this order in csd-0.
ByteBuffer vps = nalusWithoutEmulationPrevention.get(0);
if (vps.get(vps.position()) != 0x40) {
throw new IllegalArgumentException("First NALU in csd-0 is not the VPS.");
}
// general_profile_space (2 bits) + general_tier_flag (1 bit) + general_profile_idc (5 bits)
contents.put(vps.get(6));
contents.putInt(vps.getInt(7)); // general_profile_compatibility_flags
// general_constraint_indicator_flags (6 bytes)
contents.putInt(vps.getInt(11));
contents.putShort(vps.getShort(15));
contents.put(vps.get(17)); // general_level_idc
// First 4 bits reserved + min_spatial_segmentation_idc (12 bits)
contents.putShort((short) 0xF000);
// First 6 bits reserved + parallelismType (2 bits)
contents.put((byte) 0xFC);
ByteBuffer sps = nalusWithEmulationPrevention.get(1);
byte[] spsArray = new byte[sps.remaining()];
sps.get(spsArray);
sps.rewind();
NalUnitUtil.H265SpsData h265SpsData =
NalUnitUtil.parseH265SpsNalUnit(
spsArray, /* nalOffset= */ 0, /* nalLimit= */ spsArray.length);
byte chromaFormat = (byte) (0xFC | h265SpsData.chromaFormatIdc); // First 6 bits reserved
byte bitDepthLumaMinus8 =
(byte) (0xF8 | h265SpsData.bitDepthLumaMinus8); // First 5 bits reserved
byte bitDepthChromaMinus8 =
(byte) (0xF8 | h265SpsData.bitDepthChromaMinus8); // First 5 bits reserved
contents.put(chromaFormat);
contents.put(bitDepthLumaMinus8);
contents.put(bitDepthChromaMinus8);
// avgFrameRate; value 0 indicates an unspecified average frame rate.
contents.putShort((short) 0);
// constantFrameRate (2 bits) + numTemporalLayers (3 bits) + temporalIdNested (1 bit) +
// lengthSizeMinusOne (2 bits)
contents.put((byte) 0x0F);
// Put all NALUs.
contents.put((byte) nalusWithEmulationPrevention.size()); // numOfArrays
for (int i = 0; i < nalusWithEmulationPrevention.size(); i++) {
ByteBuffer nalu = nalusWithEmulationPrevention.get(i);
// array_completeness (1 bit) + reserved (1 bit) + NAL_unit_type (6 bits)
byte naluType = (byte) ((nalu.get(0) >> 1) & 0x3F);
contents.put(naluType);
contents.putShort((short) 1); // numNalus; number of NALUs in array
contents.putShort((short) nalu.limit()); // nalUnitLength
contents.put(nalu);
}
contents.flip();
return BoxUtils.wrapIntoBox("hvcC", contents);
}
/** Returns the av1C box. */
private static ByteBuffer av1CBox(Format format) {
// For AV1, the entire codec-specific box is packed into csd-0.
checkArgument(!format.initializationData.isEmpty(), "csd-0 is not found in the format");
ByteBuffer csd0 = ByteBuffer.wrap(format.initializationData.get(0));
return BoxUtils.wrapIntoBox("av1C", csd0.duplicate());
}
/** Returns the pasp box. */
private static ByteBuffer paspBox() {
ByteBuffer contents = ByteBuffer.allocate(8);
contents.putInt(1 << 16); // hspacing
contents.putInt(1 << 16); // vspacing
contents.rewind();
return BoxUtils.wrapIntoBox("pasp", contents);
}
/** Returns the colr box. */
@SuppressWarnings("InlinedApi")
private static ByteBuffer colrBox(ColorInfo colorInfo) {
ByteBuffer contents = ByteBuffer.allocate(20);
contents.put((byte) 'n');
contents.put((byte) 'c');
contents.put((byte) 'l');
contents.put((byte) 'x');
// Parameters going into the file.
short primaries = 0;
short transfer = 0;
short matrix = 0;
byte range = 0;
if (colorInfo.colorSpace != Format.NO_VALUE) {
int standard = colorInfo.colorSpace;
if (standard < 0 || standard >= MEDIAFORMAT_STANDARD_TO_PRIMARIES_AND_MATRIX.size()) {
throw new IllegalArgumentException("Color standard not implemented: " + standard);
}
primaries = MEDIAFORMAT_STANDARD_TO_PRIMARIES_AND_MATRIX.get(standard).get(0);
matrix = MEDIAFORMAT_STANDARD_TO_PRIMARIES_AND_MATRIX.get(standard).get(1);
}
if (colorInfo.colorTransfer != Format.NO_VALUE) {
int transferInFormat = colorInfo.colorTransfer;
if (transferInFormat < 0 || transferInFormat >= MEDIAFORMAT_TRANSFER_TO_MP4_TRANSFER.size()) {
throw new IllegalArgumentException("Color transfer not implemented: " + transferInFormat);
}
transfer = MEDIAFORMAT_TRANSFER_TO_MP4_TRANSFER.get(transferInFormat);
}
if (colorInfo.colorRange != Format.NO_VALUE) {
int rangeInFormat = colorInfo.colorRange;
// Handled values are 0 (unknown), 1 (full) and 2 (limited).
if (rangeInFormat < 0 || rangeInFormat > 2) {
throw new IllegalArgumentException("Color range not implemented: " + rangeInFormat);
}
// Set this to 0x80 only for full range, 0 otherwise.
range = rangeInFormat == C.COLOR_RANGE_FULL ? (byte) 0x80 : 0;
}
contents.putShort(primaries);
contents.putShort(transfer);
contents.putShort(matrix);
contents.put(range);
contents.flip();
return BoxUtils.wrapIntoBox("colr", contents);
}
/** Returns video codec specific fourcc. */
private static String codecSpecificFourcc(Format format) {
String mimeType = checkNotNull(format.sampleMimeType);
switch (mimeType) {
case "video/avc":
return "avc1";
case "video/hevc":
return "hvc1";
case "video/av01":
return "av01";
default:
throw new IllegalArgumentException("Unsupported video format: " + mimeType);
}
}
/** Returns the esds box. */
private static ByteBuffer audioEsdsBox(Format format) {
checkArgument(!format.initializationData.isEmpty(), "csd-0 is not found in the format.");
ByteBuffer csd0 = ByteBuffer.wrap(format.initializationData.get(0));
int csd0Size = csd0.limit();
ByteBuffer contents = ByteBuffer.allocate(csd0Size + Mp4Utils.MAX_FIXED_LEAF_BOX_SIZE);
contents.putInt(0x0); // version and flags.
contents.put((byte) 0x03); // ES_DescrTag
// We're normally using a variable-length encoding for the length of various sub-packages (esds
// etc.), in a nested way, so outer lengths need to account for variable-length inner lengths
// too (to save ~10 bytes per video file). Meanwhile, AAC codec-specific
// data is typically just 2 bytes, so every length actually fits into a byte. Here, we're just
// skipping the entire complex story by asserting that we won't ever need variable-length sizes.
checkArgument(csd0Size + 21 < 127, "CSD too long; we might need variable-length encoding?");
contents.put((byte) (23 + csd0Size));
contents.putShort((short) 0x0000); // ES_ID
contents.put((byte) 0x00);
contents.put((byte) 0x04); // DecoderConfigDescrTag
contents.put((byte) (15 + csd0Size));
contents.put((byte) 0x40); // objectTypeIndication
contents.put((byte) 0x15); // streamType AudioStream
contents.putShort((short) 0x03);
contents.put((byte) 0x00); // 24-bit buffer size (0x300)
contents.putInt(format.peakBitrate != Format.NO_VALUE ? format.peakBitrate : 0);
contents.putInt(format.averageBitrate != Format.NO_VALUE ? format.bitrate : 0);
contents.put((byte) 0x05); // DecoderSpecificInfoTag
contents.put((byte) csd0Size);
contents.put(csd0);
csd0.rewind();
contents.put((byte) 0x06); // SLConfigDescriptorTag
contents.put((byte) 0x01);
contents.put((byte) 0x02);
contents.flip();
return BoxUtils.wrapIntoBox("esds", contents);
}
/** Convert UNIX timestamps to the format used by MP4 files. */
private static int toMp4Time(long unixTimeMs) {
// Jan 1, 1904, including leap years.
long delta = (66 * 365 + 17) * (24 * 60 * 60);
return (int) (unixTimeMs / 1000L + delta);
}
/** Packs a three-letter language code into a short, packing 3x5 bits. */
private static short languageCodeFromString(@Nullable String code) {
if (code == null) {
return 0;
}
byte[] bytes = Util.getUtf8Bytes(code);
if (bytes.length != 3) {
throw new IllegalArgumentException("Non-length-3 language code: " + code);
}
// Use an int so that we don't bump into the issue of Java not having unsigned types. We take
// the last 5 bits of each letter to supply 5 bits each of the eventual code.
int value = (bytes[2] & 0x1F);
value += (bytes[1] & 0x1F) << 5;
value += (bytes[0] & 0x1F) << 10;
// This adds up to 15 bits; the 16th one is really supposed to be 0.
checkState((value & 0x8000) == 0);
return (short) (value & 0xFFFF);
}
/**
* Generates an orientation matrix, to be included in the MP4 header.
*
* <p>The supported values are 0, 90, 180 and 270 (degrees).
*/
private static byte[] rotationMatrixFromOrientation(int orientation) {
// The transformation matrix is defined as below:
// | a b u |
// | c d v |
// | x y w |
// To specify the orientation (u, v, w) are restricted to (0, 0, 0x40000000).
// Reference: ISO/IEC 14496-12: 8.2.2.3.
int fixedOne = 65536;
switch (orientation) {
case 0:
return Util.toByteArray(fixedOne, 0, 0, 0, fixedOne, 0, 0, 0, 0x40000000);
case 90:
return Util.toByteArray(0, fixedOne, 0, -fixedOne, 0, 0, 0, 0, 0x40000000);
case 180:
return Util.toByteArray(-fixedOne, 0, 0, 0, -fixedOne, 0, 0, 0, 0x40000000);
case 270:
return Util.toByteArray(0, -fixedOne, 0, fixedOne, 0, 0, 0, 0, 0x40000000);
default:
throw new IllegalArgumentException("invalid orientation " + orientation);
}
}
}