VideoTranscodingSamplePipeline.java

/*
 * Copyright 2021 The Android Open Source Project
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package androidx.media3.transformer;

import static androidx.media3.common.util.Assertions.checkNotNull;
import static androidx.media3.common.util.Util.SDK_INT;

import android.content.Context;
import android.graphics.Matrix;
import android.media.MediaCodec;
import android.media.MediaFormat;
import androidx.annotation.Nullable;
import androidx.annotation.RequiresApi;
import androidx.media3.common.C;
import androidx.media3.common.Format;
import androidx.media3.common.util.Util;
import androidx.media3.decoder.DecoderInputBuffer;
import java.util.List;
import org.checkerframework.checker.nullness.qual.MonotonicNonNull;
import org.checkerframework.dataflow.qual.Pure;

/**
 * Pipeline to decode video samples, apply transformations on the raw samples, and re-encode them.
 */
/* package */ final class VideoTranscodingSamplePipeline implements SamplePipeline {

  private final int outputRotationDegrees;
  private final DecoderInputBuffer decoderInputBuffer;
  private final Codec decoder;

  private final Codec encoder;
  private final DecoderInputBuffer encoderOutputBuffer;

  private @MonotonicNonNull FrameEditor frameEditor;

  private boolean waitingForFrameEditorInput;

  public VideoTranscodingSamplePipeline(
      Context context,
      Format inputFormat,
      TransformationRequest transformationRequest,
      Codec.DecoderFactory decoderFactory,
      Codec.EncoderFactory encoderFactory,
      List<String> allowedOutputMimeTypes,
      FallbackListener fallbackListener,
      Transformer.DebugViewProvider debugViewProvider)
      throws TransformationException {
    decoderInputBuffer =
        new DecoderInputBuffer(DecoderInputBuffer.BUFFER_REPLACEMENT_MODE_DISABLED);
    encoderOutputBuffer =
        new DecoderInputBuffer(DecoderInputBuffer.BUFFER_REPLACEMENT_MODE_DISABLED);

    // Scale width and height to desired transformationRequest.outputHeight, preserving aspect
    // ratio.
    // TODO(b/209781577): Think about which edge length should be set for portrait videos.
    float inputFormatAspectRatio = (float) inputFormat.width / inputFormat.height;
    int outputWidth = inputFormat.width;
    int outputHeight = inputFormat.height;
    if (transformationRequest.outputHeight != C.LENGTH_UNSET
        && transformationRequest.outputHeight != inputFormat.height) {
      outputWidth = Math.round(inputFormatAspectRatio * transformationRequest.outputHeight);
      outputHeight = transformationRequest.outputHeight;
    }

    // The encoder may not support encoding in portrait orientation, so the decoded video is
    // rotated to landscape orientation and a rotation is added back later to the output format.
    boolean swapEncodingDimensions = inputFormat.height > inputFormat.width;
    if (swapEncodingDimensions) {
      outputRotationDegrees = (inputFormat.rotationDegrees + 90) % 360;
      int temp = outputWidth;
      outputWidth = outputHeight;
      outputHeight = temp;
    } else {
      outputRotationDegrees = inputFormat.rotationDegrees;
    }
    float displayAspectRatio =
        (inputFormat.rotationDegrees % 180) == 0
            ? inputFormatAspectRatio
            : 1.0f / inputFormatAspectRatio;

    Matrix transformationMatrix = new Matrix(transformationRequest.transformationMatrix);
    // Scale frames by input aspect ratio, to account for FrameEditor's square normalized device
    // coordinates (-1 to 1) and preserve frame relative dimensions during transformations
    // (ex. rotations). After this scaling, transformationMatrix operations operate on a rectangle
    // for x from -displayAspectRatio to displayAspectRatio, and y from -1 to 1
    transformationMatrix.preScale(displayAspectRatio, 1);
    transformationMatrix.postScale(1.0f / displayAspectRatio, 1);

    // The decoder rotates videos to their intended display orientation. The frameEditor rotates
    // them back for improved encoder compatibility.
    // TODO(b/201293185): After fragment shader transformations are implemented, put
    // postRotate in a later vertex shader.
    transformationMatrix.postRotate(outputRotationDegrees);

    Format requestedOutputFormat =
        new Format.Builder()
            .setWidth(outputWidth)
            .setHeight(outputHeight)
            .setRotationDegrees(0)
            .setSampleMimeType(
                transformationRequest.videoMimeType != null
                    ? transformationRequest.videoMimeType
                    : inputFormat.sampleMimeType)
            .build();
    encoder = encoderFactory.createForVideoEncoding(requestedOutputFormat, allowedOutputMimeTypes);
    Format actualOutputFormat = encoder.getConfigurationFormat();
    fallbackListener.onTransformationRequestFinalized(
        createFallbackTransformationRequest(
            transformationRequest,
            !swapEncodingDimensions,
            requestedOutputFormat,
            actualOutputFormat));

    if (transformationRequest.enableHdrEditing
        || inputFormat.height != actualOutputFormat.height
        || inputFormat.width != actualOutputFormat.width
        || !transformationMatrix.isIdentity()) {
      frameEditor =
          FrameEditor.create(
              context,
              actualOutputFormat.width,
              actualOutputFormat.height,
              inputFormat.pixelWidthHeightRatio,
              transformationMatrix,
              /* outputSurface= */ checkNotNull(encoder.getInputSurface()),
              transformationRequest.enableHdrEditing,
              debugViewProvider);
    }

    decoder =
        decoderFactory.createForVideoDecoding(
            inputFormat,
            frameEditor == null
                ? checkNotNull(encoder.getInputSurface())
                : frameEditor.getInputSurface());
  }

  @Override
  @Nullable
  public DecoderInputBuffer dequeueInputBuffer() throws TransformationException {
    return decoder.maybeDequeueInputBuffer(decoderInputBuffer) ? decoderInputBuffer : null;
  }

  @Override
  public void queueInputBuffer() throws TransformationException {
    decoder.queueInputBuffer(decoderInputBuffer);
  }

  @Override
  public boolean processData() throws TransformationException {
    if (hasProcessedAllInputData()) {
      return false;
    }

    if (SDK_INT >= 29) {
      return processDataV29();
    } else {
      return processDataDefault();
    }
  }

  /**
   * Processes input data from API 29.
   *
   * <p>In this method the decoder could decode multiple frames in one invocation; as compared to
   * {@link #processDataDefault()}, in which one frame is decoded in each invocation. Consequently,
   * if {@link FrameEditor} processes frames slower than the decoder, decoded frames are queued up
   * in the decoder's output surface.
   *
   * <p>Prior to API 29, decoders may drop frames to keep their output surface from growing out of
   * bound; while after API 29, the {@link MediaFormat#KEY_ALLOW_FRAME_DROP} key prevents frame
   * dropping even when the surface is full. As dropping random frames is not acceptable in {@code
   * Transformer}, using this method requires API level 29 or higher.
   */
  @RequiresApi(29)
  private boolean processDataV29() throws TransformationException {
    if (frameEditor != null) {
      // Processes as many frames as possible. FrameEditor's output surface will block when it's
      // full, so there will be no frame drop and the surface will not grow out of bound.
      while (frameEditor.canProcessData()) {
        frameEditor.processData();
      }
    }

    while (decoder.getOutputBufferInfo() != null) {
      if (frameEditor != null) {
        frameEditor.registerInputFrame();
      }
      decoder.releaseOutputBuffer(/* render= */ true);
    }
    if (decoder.isEnded()) {
      signalEndOfInputStream();
    }

    return frameEditor != null && frameEditor.canProcessData();
  }

  /** Processes input data. */
  private boolean processDataDefault() throws TransformationException {
    if (frameEditor != null) {
      if (frameEditor.canProcessData()) {
        waitingForFrameEditorInput = false;
        frameEditor.processData();
        return true;
      }
      if (waitingForFrameEditorInput) {
        return false;
      }
    }

    boolean decoderHasOutputBuffer = decoder.getOutputBufferInfo() != null;
    if (decoderHasOutputBuffer) {
      if (frameEditor != null) {
        frameEditor.registerInputFrame();
        waitingForFrameEditorInput = true;
      }
      decoder.releaseOutputBuffer(/* render= */ true);
    }
    if (decoder.isEnded()) {
      signalEndOfInputStream();
      return false;
    }
    return decoderHasOutputBuffer && !waitingForFrameEditorInput;
  }

  @Override
  @Nullable
  public Format getOutputFormat() throws TransformationException {
    @Nullable Format format = encoder.getOutputFormat();
    return format == null
        ? null
        : format.buildUpon().setRotationDegrees(outputRotationDegrees).build();
  }

  @Override
  @Nullable
  public DecoderInputBuffer getOutputBuffer() throws TransformationException {
    encoderOutputBuffer.data = encoder.getOutputBuffer();
    if (encoderOutputBuffer.data == null) {
      return null;
    }
    MediaCodec.BufferInfo bufferInfo = checkNotNull(encoder.getOutputBufferInfo());
    encoderOutputBuffer.timeUs = bufferInfo.presentationTimeUs;
    encoderOutputBuffer.setFlags(bufferInfo.flags);
    return encoderOutputBuffer;
  }

  @Override
  public void releaseOutputBuffer() throws TransformationException {
    encoder.releaseOutputBuffer();
  }

  @Override
  public boolean isEnded() {
    return encoder.isEnded();
  }

  @Override
  public void release() {
    if (frameEditor != null) {
      frameEditor.release();
    }
    decoder.release();
    encoder.release();
  }

  @Pure
  private static TransformationRequest createFallbackTransformationRequest(
      TransformationRequest transformationRequest,
      boolean resolutionIsHeight,
      Format requestedFormat,
      Format actualFormat) {
    // TODO(b/210591626): Also update bitrate etc. once encoder configuration and fallback are
    // implemented.
    if (Util.areEqual(requestedFormat.sampleMimeType, actualFormat.sampleMimeType)
        && ((!resolutionIsHeight && requestedFormat.width == actualFormat.width)
            || (resolutionIsHeight && requestedFormat.height == actualFormat.height))) {
      return transformationRequest;
    }
    return transformationRequest
        .buildUpon()
        .setVideoMimeType(actualFormat.sampleMimeType)
        .setResolution(resolutionIsHeight ? requestedFormat.height : requestedFormat.width)
        .build();
  }

  private boolean hasProcessedAllInputData() {
    return decoder.isEnded() && (frameEditor == null || frameEditor.isEnded());
  }

  private void signalEndOfInputStream() throws TransformationException {
    if (frameEditor != null) {
      frameEditor.signalEndOfInputStream();
    }
    if (frameEditor == null || frameEditor.isEnded()) {
      encoder.signalEndOfInputStream();
    }
  }
}