DefaultVideoCompositor.java

/*
 * Copyright 2023 The Android Open Source Project
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      https://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package androidx.media3.effect;

import static androidx.media3.common.util.Assertions.checkNotNull;
import static androidx.media3.common.util.Assertions.checkState;
import static androidx.media3.common.util.Assertions.checkStateNotNull;
import static java.lang.Math.abs;
import static java.lang.Math.max;

import android.content.Context;
import android.opengl.EGLContext;
import android.opengl.EGLDisplay;
import android.opengl.EGLSurface;
import android.opengl.GLES20;
import androidx.annotation.GuardedBy;
import androidx.annotation.IntRange;
import androidx.annotation.Nullable;
import androidx.media3.common.C;
import androidx.media3.common.ColorInfo;
import androidx.media3.common.GlObjectsProvider;
import androidx.media3.common.GlTextureInfo;
import androidx.media3.common.VideoFrameProcessingException;
import androidx.media3.common.util.GlProgram;
import androidx.media3.common.util.GlUtil;
import androidx.media3.common.util.Log;
import androidx.media3.common.util.LongArrayQueue;
import androidx.media3.common.util.Size;
import androidx.media3.common.util.UnstableApi;
import androidx.media3.common.util.Util;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.Iterables;
import java.io.IOException;
import java.util.ArrayDeque;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.Queue;
import java.util.concurrent.ExecutorService;
import org.checkerframework.checker.nullness.qual.MonotonicNonNull;

/**
 * A basic {@link VideoCompositor} implementation that takes in frames from input sources' streams
 * and combines them into one output stream.
 *
 * <p>The first {@linkplain #registerInputSource registered source} will be the primary stream,
 * which is used to determine the output textures' timestamps and dimensions.
 *
 * <p>The input source must be able to have at least two {@linkplain
 * VideoCompositor#queueInputTexture queued textures} in its output buffer.
 *
 * <p>When composited, textures are overlaid over one another in the reverse order of their
 * registration order, so that the first registered source is on the very top. The way the textures
 * are overlaid can be customized using the {@link OverlaySettings} output by {@link
 * VideoCompositorSettings}.
 *
 * <p>Only SDR input with the same {@link ColorInfo} are supported.
 */
@UnstableApi
public final class DefaultVideoCompositor implements VideoCompositor {
  // TODO: b/262694346 -  Flesh out this implementation by doing the following:
  //  * Use a lock to synchronize inputFrameInfos more narrowly, to reduce blocking.
  //  * Add support for mixing SDR streams with different ColorInfo.
  //  * Add support for HDR input.

  private static final String THREAD_NAME = "Effect:DefaultVideoCompositor:GlThread";
  private static final String TAG = "DefaultVideoCompositor";
  private static final int PRIMARY_INPUT_ID = 0;

  private final VideoCompositor.Listener listener;
  private final GlTextureProducer.Listener textureOutputListener;
  private final GlObjectsProvider glObjectsProvider;
  private final VideoCompositorSettings settings;
  private final CompositorGlProgram compositorGlProgram;
  private final VideoFrameProcessingTaskExecutor videoFrameProcessingTaskExecutor;

  @GuardedBy("this")
  private final List<InputSource> inputSources;

  @GuardedBy("this")
  private boolean allInputsEnded; // Whether all inputSources have signaled end of input.

  private final TexturePool outputTexturePool;
  private final LongArrayQueue outputTextureTimestamps; // Synchronized with outputTexturePool.
  private final LongArrayQueue syncObjects; // Synchronized with outputTexturePool.

  private @MonotonicNonNull ColorInfo configuredColorInfo;

  // Only used on the GL Thread.
  private @MonotonicNonNull EGLContext eglContext;
  private @MonotonicNonNull EGLDisplay eglDisplay;
  private @MonotonicNonNull EGLSurface placeholderEglSurface;

  /**
   * Creates an instance.
   *
   * <p>If a non-null {@code executorService} is set, the {@link ExecutorService} must be
   * {@linkplain ExecutorService#shutdown shut down} by the caller.
   */
  public DefaultVideoCompositor(
      Context context,
      GlObjectsProvider glObjectsProvider,
      VideoCompositorSettings settings,
      @Nullable ExecutorService executorService,
      VideoCompositor.Listener listener,
      GlTextureProducer.Listener textureOutputListener,
      @IntRange(from = 1) int textureOutputCapacity) {
    this.listener = listener;
    this.textureOutputListener = textureOutputListener;
    this.glObjectsProvider = glObjectsProvider;
    this.settings = settings;
    this.compositorGlProgram = new CompositorGlProgram(context);

    inputSources = new ArrayList<>();
    outputTexturePool =
        new TexturePool(/* useHighPrecisionColorComponents= */ false, textureOutputCapacity);
    outputTextureTimestamps = new LongArrayQueue(textureOutputCapacity);
    syncObjects = new LongArrayQueue(textureOutputCapacity);

    boolean ownsExecutor = executorService == null;
    ExecutorService instanceExecutorService =
        ownsExecutor ? Util.newSingleThreadExecutor(THREAD_NAME) : checkNotNull(executorService);
    videoFrameProcessingTaskExecutor =
        new VideoFrameProcessingTaskExecutor(
            instanceExecutorService,
            /* shouldShutdownExecutorService= */ ownsExecutor,
            listener::onError);
    videoFrameProcessingTaskExecutor.submit(this::setupGlObjects);
  }

  @Override
  public synchronized int registerInputSource() {
    inputSources.add(new InputSource());
    return inputSources.size() - 1;
  }

  @Override
  public synchronized void signalEndOfInputSource(int inputId) {
    inputSources.get(inputId).isInputEnded = true;
    boolean allInputsEnded = true;
    for (int i = 0; i < inputSources.size(); i++) {
      if (!inputSources.get(i).isInputEnded) {
        allInputsEnded = false;
        break;
      }
    }

    this.allInputsEnded = allInputsEnded;
    if (inputSources.get(PRIMARY_INPUT_ID).frameInfos.isEmpty()) {
      if (inputId == PRIMARY_INPUT_ID) {
        releaseExcessFramesInAllSecondaryStreams();
      }
      if (allInputsEnded) {
        listener.onEnded();
        return;
      }
    }
    if (inputId != PRIMARY_INPUT_ID && inputSources.get(inputId).frameInfos.size() == 1) {
      // When a secondary stream ends input, composite if there was only one pending frame in the
      // stream.
      videoFrameProcessingTaskExecutor.submit(this::maybeComposite);
    }
  }

  @Override
  public synchronized void queueInputTexture(
      int inputId,
      GlTextureProducer textureProducer,
      GlTextureInfo inputTexture,
      ColorInfo colorInfo,
      long presentationTimeUs) {
    InputSource inputSource = inputSources.get(inputId);
    checkState(!inputSource.isInputEnded);
    checkStateNotNull(!ColorInfo.isTransferHdr(colorInfo), "HDR input is not supported.");
    if (configuredColorInfo == null) {
      configuredColorInfo = colorInfo;
    }
    checkState(
        configuredColorInfo.equals(colorInfo), "Mixing different ColorInfos is not supported.");

    InputFrameInfo inputFrameInfo =
        new InputFrameInfo(
            textureProducer,
            inputTexture,
            presentationTimeUs,
            settings.getOverlaySettings(inputId, presentationTimeUs));
    inputSource.frameInfos.add(inputFrameInfo);

    if (inputId == PRIMARY_INPUT_ID) {
      releaseExcessFramesInAllSecondaryStreams();
    } else {
      releaseExcessFramesInSecondaryStream(inputSource);
    }

    videoFrameProcessingTaskExecutor.submit(this::maybeComposite);
  }

  @Override
  public synchronized void release() {
    checkState(allInputsEnded);
    try {
      videoFrameProcessingTaskExecutor.release(/* releaseTask= */ this::releaseGlObjects);
    } catch (InterruptedException e) {
      Thread.currentThread().interrupt();
      throw new IllegalStateException(e);
    }
  }

  @Override
  public void releaseOutputTexture(long presentationTimeUs) {
    videoFrameProcessingTaskExecutor.submit(() -> releaseOutputTextureInternal(presentationTimeUs));
  }

  private synchronized void releaseExcessFramesInAllSecondaryStreams() {
    for (int i = 0; i < inputSources.size(); i++) {
      if (i == PRIMARY_INPUT_ID) {
        continue;
      }
      releaseExcessFramesInSecondaryStream(inputSources.get(i));
    }
  }

  /**
   * Release unneeded frames from the {@link InputSource} secondary stream.
   *
   * <p>After this method returns, there should be exactly zero or one frames left with a timestamp
   * less than the primary stream's next timestamp that were present when the method execution
   * began.
   */
  private synchronized void releaseExcessFramesInSecondaryStream(InputSource secondaryInputSource) {
    InputSource primaryInputSource = inputSources.get(PRIMARY_INPUT_ID);
    // If the primary stream output is ended, all secondary frames can be released.
    if (primaryInputSource.frameInfos.isEmpty() && primaryInputSource.isInputEnded) {
      releaseFrames(
          secondaryInputSource,
          /* numberOfFramesToRelease= */ secondaryInputSource.frameInfos.size());
      return;
    }

    // Release frames until the secondary stream has 0-2 frames with presentationTimeUs before or at
    // nextTimestampToComposite.
    @Nullable InputFrameInfo nextPrimaryFrame = primaryInputSource.frameInfos.peek();
    long nextTimestampToComposite =
        nextPrimaryFrame != null ? nextPrimaryFrame.presentationTimeUs : C.TIME_UNSET;

    int numberOfSecondaryFramesBeforeOrAtNextTargetTimestamp =
        Iterables.size(
            Iterables.filter(
                secondaryInputSource.frameInfos,
                frame -> frame.presentationTimeUs <= nextTimestampToComposite));
    releaseFrames(
        secondaryInputSource,
        /* numberOfFramesToRelease= */ max(
            numberOfSecondaryFramesBeforeOrAtNextTargetTimestamp - 1, 0));
  }

  private synchronized void releaseFrames(InputSource inputSource, int numberOfFramesToRelease) {
    for (int i = 0; i < numberOfFramesToRelease; i++) {
      InputFrameInfo frameInfoToRelease = inputSource.frameInfos.remove();
      frameInfoToRelease.textureProducer.releaseOutputTexture(
          frameInfoToRelease.presentationTimeUs);
    }
  }

  // Below methods must be called on the GL thread.
  private void setupGlObjects() throws GlUtil.GlException {
    eglDisplay = GlUtil.getDefaultEglDisplay();
    eglContext =
        glObjectsProvider.createEglContext(
            eglDisplay, /* openGlVersion= */ 2, GlUtil.EGL_CONFIG_ATTRIBUTES_RGBA_8888);
    placeholderEglSurface =
        glObjectsProvider.createFocusedPlaceholderEglSurface(eglContext, eglDisplay);
  }

  private synchronized void maybeComposite()
      throws VideoFrameProcessingException, GlUtil.GlException {
    ImmutableList<InputFrameInfo> framesToComposite = getFramesToComposite();
    if (framesToComposite.isEmpty()) {
      return;
    }

    InputFrameInfo primaryInputFrame = framesToComposite.get(PRIMARY_INPUT_ID);

    ImmutableList.Builder<Size> inputSizes = new ImmutableList.Builder<>();
    for (int i = 0; i < framesToComposite.size(); i++) {
      GlTextureInfo texture = framesToComposite.get(i).texture;
      inputSizes.add(new Size(texture.width, texture.height));
    }
    Size outputSize = settings.getOutputSize(inputSizes.build());
    outputTexturePool.ensureConfigured(
        glObjectsProvider, outputSize.getWidth(), outputSize.getHeight());

    GlTextureInfo outputTexture = outputTexturePool.useTexture();
    long outputPresentationTimestampUs = primaryInputFrame.presentationTimeUs;
    outputTextureTimestamps.add(outputPresentationTimestampUs);

    compositorGlProgram.drawFrame(framesToComposite, outputTexture);
    long syncObject = GlUtil.createGlSyncFence();
    syncObjects.add(syncObject);
    textureOutputListener.onTextureRendered(
        /* textureProducer= */ this, outputTexture, outputPresentationTimestampUs, syncObject);

    InputSource primaryInputSource = inputSources.get(PRIMARY_INPUT_ID);
    releaseFrames(primaryInputSource, /* numberOfFramesToRelease= */ 1);
    releaseExcessFramesInAllSecondaryStreams();

    if (allInputsEnded && primaryInputSource.frameInfos.isEmpty()) {
      listener.onEnded();
    }
  }

  /**
   * Checks whether {@code inputSources} is able to composite, and if so, returns a list of {@link
   * InputFrameInfo}s that should be composited next.
   *
   * <p>The first input frame info in the list is from the the primary source. An empty list is
   * returned if {@code inputSources} cannot composite now.
   */
  private synchronized ImmutableList<InputFrameInfo> getFramesToComposite() {
    if (outputTexturePool.freeTextureCount() == 0) {
      return ImmutableList.of();
    }
    for (int inputId = 0; inputId < inputSources.size(); inputId++) {
      if (inputSources.get(inputId).frameInfos.isEmpty()) {
        return ImmutableList.of();
      }
    }
    ImmutableList.Builder<InputFrameInfo> framesToComposite = new ImmutableList.Builder<>();
    InputFrameInfo primaryFrameToComposite =
        inputSources.get(PRIMARY_INPUT_ID).frameInfos.element();
    framesToComposite.add(primaryFrameToComposite);

    for (int inputId = 0; inputId < inputSources.size(); inputId++) {
      if (inputId == PRIMARY_INPUT_ID) {
        continue;
      }
      // Select the secondary streams' frame that would be composited next. The frame selected is
      // the closest-timestamp frame from the primary stream's frame, if all secondary streams have:
      //   1. One or more frames, and the secondary stream has ended, or
      //   2. Two or more frames, and at least one frame has timestamp greater than the target
      //      timestamp.
      // The smaller timestamp is taken if two timestamps have the same distance from the primary.
      InputSource secondaryInputSource = inputSources.get(inputId);
      if (secondaryInputSource.frameInfos.size() == 1 && !secondaryInputSource.isInputEnded) {
        return ImmutableList.of();
      }

      long minTimeDiffFromPrimaryUs = Long.MAX_VALUE;
      @Nullable InputFrameInfo secondaryFrameToComposite = null;
      Iterator<InputFrameInfo> frameInfosIterator = secondaryInputSource.frameInfos.iterator();
      while (frameInfosIterator.hasNext()) {
        InputFrameInfo candidateFrame = frameInfosIterator.next();
        long candidateTimestampUs = candidateFrame.presentationTimeUs;
        long candidateAbsDistance =
            abs(candidateTimestampUs - primaryFrameToComposite.presentationTimeUs);

        if (candidateAbsDistance < minTimeDiffFromPrimaryUs) {
          minTimeDiffFromPrimaryUs = candidateAbsDistance;
          secondaryFrameToComposite = candidateFrame;
        }

        if (candidateTimestampUs > primaryFrameToComposite.presentationTimeUs
            || (!frameInfosIterator.hasNext() && secondaryInputSource.isInputEnded)) {
          framesToComposite.add(checkNotNull(secondaryFrameToComposite));
          break;
        }
      }
    }
    ImmutableList<InputFrameInfo> framesToCompositeList = framesToComposite.build();
    if (framesToCompositeList.size() != inputSources.size()) {
      return ImmutableList.of();
    }
    return framesToCompositeList;
  }

  private synchronized void releaseOutputTextureInternal(long presentationTimeUs)
      throws VideoFrameProcessingException, GlUtil.GlException {
    while (outputTexturePool.freeTextureCount() < outputTexturePool.capacity()
        && outputTextureTimestamps.element() <= presentationTimeUs) {
      outputTexturePool.freeTexture();
      outputTextureTimestamps.remove();
      GlUtil.deleteSyncObject(syncObjects.remove());
    }
    maybeComposite();
  }

  private void releaseGlObjects() {
    try {
      compositorGlProgram.release();
      outputTexturePool.deleteAllTextures();
      GlUtil.destroyEglSurface(eglDisplay, placeholderEglSurface);
    } catch (GlUtil.GlException e) {
      Log.e(TAG, "Error releasing GL resources", e);
    } finally {
      try {
        GlUtil.destroyEglContext(eglDisplay, eglContext);
      } catch (GlUtil.GlException e) {
        Log.e(TAG, "Error releasing GL context", e);
      }
    }
  }

  /**
   * A wrapper for a {@link GlProgram}, that draws multiple input {@link InputFrameInfo}s onto one
   * output {@link GlTextureInfo}.
   *
   * <p>All methods must be called on a GL thread, unless otherwise stated.
   */
  private static final class CompositorGlProgram {
    private static final String TAG = "CompositorGlProgram";
    private static final String VERTEX_SHADER_PATH =
        "shaders/vertex_shader_transformation_es2.glsl";
    private static final String FRAGMENT_SHADER_PATH =
        "shaders/fragment_shader_alpha_scale_es2.glsl";

    private final Context context;
    private final OverlayMatrixProvider overlayMatrixProvider;
    private @MonotonicNonNull GlProgram glProgram;

    /**
     * Creates an instance.
     *
     * <p>May be called on any thread.
     */
    public CompositorGlProgram(Context context) {
      this.context = context;
      this.overlayMatrixProvider = new OverlayMatrixProvider();
    }

    /** Draws {@link InputFrameInfo}s onto an output {@link GlTextureInfo}. */
    // Enhanced for-loops are discouraged in media3.effect due to short-lived allocations.
    @SuppressWarnings("ListReverse")
    public void drawFrame(List<InputFrameInfo> framesToComposite, GlTextureInfo outputTexture)
        throws GlUtil.GlException, VideoFrameProcessingException {
      ensureConfigured();
      GlUtil.focusFramebufferUsingCurrentContext(
          outputTexture.fboId, outputTexture.width, outputTexture.height);
      overlayMatrixProvider.configure(new Size(outputTexture.width, outputTexture.height));
      GlUtil.clearFocusedBuffers();

      GlProgram glProgram = checkNotNull(this.glProgram);
      glProgram.use();

      // Setup for blending.
      GLES20.glEnable(GLES20.GL_BLEND);
      // Similar to:
      // dst.rgb = src.rgb * src.a + dst.rgb * (1 - src.a)
      // dst.a   = src.a           + dst.a   * (1 - src.a)
      GLES20.glBlendFuncSeparate(
          /* srcRGB= */ GLES20.GL_SRC_ALPHA,
          /* dstRGB= */ GLES20.GL_ONE_MINUS_SRC_ALPHA,
          /* srcAlpha= */ GLES20.GL_ONE,
          /* dstAlpha= */ GLES20.GL_ONE_MINUS_SRC_ALPHA);
      GlUtil.checkGlError();

      // Draw textures from back to front.
      for (int i = framesToComposite.size() - 1; i >= 0; i--) {
        blendOntoFocusedTexture(framesToComposite.get(i));
      }

      GLES20.glDisable(GLES20.GL_BLEND);
      GlUtil.checkGlError();
    }

    public void release() {
      try {
        if (glProgram != null) {
          glProgram.delete();
        }
      } catch (GlUtil.GlException e) {
        Log.e(TAG, "Error releasing GL Program", e);
      }
    }

    private void ensureConfigured() throws VideoFrameProcessingException, GlUtil.GlException {
      if (glProgram != null) {
        return;
      }
      try {
        glProgram = new GlProgram(context, VERTEX_SHADER_PATH, FRAGMENT_SHADER_PATH);
        glProgram.setBufferAttribute(
            "aFramePosition",
            GlUtil.getNormalizedCoordinateBounds(),
            GlUtil.HOMOGENEOUS_COORDINATE_VECTOR_SIZE);
        glProgram.setFloatsUniform("uTexTransformationMatrix", GlUtil.create4x4IdentityMatrix());
      } catch (IOException e) {
        throw new VideoFrameProcessingException(e);
      }
    }

    private void blendOntoFocusedTexture(InputFrameInfo inputFrameInfo) throws GlUtil.GlException {
      GlProgram glProgram = checkNotNull(this.glProgram);
      GlTextureInfo inputTexture = inputFrameInfo.texture;
      glProgram.setSamplerTexIdUniform("uTexSampler", inputTexture.texId, /* texUnitIndex= */ 0);
      float[] transformationMatrix =
          overlayMatrixProvider.getTransformationMatrix(
              /* overlaySize= */ new Size(inputTexture.width, inputTexture.height),
              inputFrameInfo.overlaySettings);
      glProgram.setFloatsUniform("uTransformationMatrix", transformationMatrix);
      glProgram.setFloatUniform("uAlphaScale", inputFrameInfo.overlaySettings.alphaScale);
      glProgram.bindAttributesAndUniforms();

      // The four-vertex triangle strip forms a quad.
      GLES20.glDrawArrays(GLES20.GL_TRIANGLE_STRIP, /* first= */ 0, /* count= */ 4);
      GlUtil.checkGlError();
    }
  }

  /** Holds information on an input source. */
  private static final class InputSource {
    /**
     * A queue of {link InputFrameInfo}s, monotonically increasing in order of {@code
     * presentationTimeUs} values.
     */
    public final Queue<InputFrameInfo> frameInfos;

    public boolean isInputEnded;

    public InputSource() {
      frameInfos = new ArrayDeque<>();
    }
  }

  /** Holds information on a frame and how to release it. */
  private static final class InputFrameInfo {
    public final GlTextureProducer textureProducer;
    public final GlTextureInfo texture;
    public final long presentationTimeUs;
    public final OverlaySettings overlaySettings;

    public InputFrameInfo(
        GlTextureProducer textureProducer,
        GlTextureInfo texture,
        long presentationTimeUs,
        OverlaySettings overlaySettings) {
      this.textureProducer = textureProducer;
      this.texture = texture;
      this.presentationTimeUs = presentationTimeUs;
      this.overlaySettings = overlaySettings;
    }
  }
}