/*
* Copyright (C) 2018 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package androidx.media3.exoplayer.audio;
import static androidx.media3.common.util.Assertions.checkArgument;
import static androidx.media3.common.util.Assertions.checkState;
import static java.lang.Math.min;
import static java.lang.annotation.ElementType.TYPE_USE;
import androidx.annotation.IntDef;
import androidx.media3.common.C;
import androidx.media3.common.Format;
import androidx.media3.common.audio.AudioProcessor;
import androidx.media3.common.audio.BaseAudioProcessor;
import androidx.media3.common.util.UnstableApi;
import androidx.media3.common.util.Util;
import java.lang.annotation.Documented;
import java.lang.annotation.Retention;
import java.lang.annotation.RetentionPolicy;
import java.lang.annotation.Target;
import java.nio.ByteBuffer;
/**
* An {@link AudioProcessor} that skips silence in the input stream. Input and output are 16-bit
* PCM.
*/
@UnstableApi
public final class SilenceSkippingAudioProcessor extends BaseAudioProcessor {
/**
* Default fraction of the original silence to keep. Between [0, 1]. 1 means keep all silence. 0
* means remove all silence.
*/
public static final float DEFAULT_SILENCE_RETENTION_RATIO = 0.2f;
/**
* Default volume percentage to keep.
*
* <p>Even when modifying the volume to a mute state, it is ideal to decrease the volume instead
* of making the volume zero. Completely silent audio sounds like playback has stopped. While
* decreased volume sounds like very light background noise at a recording studio.
*/
public static final int DEFAULT_MIN_VOLUME_TO_KEEP_PERCENTAGE = 10;
/** Default absolute level below which an individual PCM sample is classified as silent. */
public static final short DEFAULT_SILENCE_THRESHOLD_LEVEL = 1024;
/**
* Default minimum duration of audio that must be below {@code silenceThresholdLevel} before
* silence starts being trimmed. Specified in microseconds.
*/
public static final long DEFAULT_MINIMUM_SILENCE_DURATION_US = 100_000;
/**
* Default maximum silence to keep in microseconds. This maximum is applied after {@code
* silenceRetentionRatio}.
*/
public static final long DEFAULT_MAX_SILENCE_TO_KEEP_DURATION_US = 2_000_000;
/**
* @deprecated Specify silence behaviour via {@code silenceRetentionRatio} instead.
*/
@Deprecated public static final long DEFAULT_PADDING_SILENCE_US = 20_000;
/** Trimming states. */
@Documented
@Retention(RetentionPolicy.SOURCE)
@Target(TYPE_USE)
@IntDef({
STATE_NOISY,
STATE_SHORTENING_SILENCE,
})
private @interface State {}
/** State when the input is not silent. */
private static final int STATE_NOISY = 0;
/**
* State when the input has been silent less than or equal to {@link #maxSilenceToKeepDurationUs}
* and the silence is being shortened according to {@link #calculateShortenedSilenceLength(int)}.
*/
private static final int STATE_SHORTENING_SILENCE = 1;
/** Ways to change the volume of silence. */
@Documented
@Retention(RetentionPolicy.SOURCE)
@Target(TYPE_USE)
@IntDef({
FADE_OUT,
MUTE,
FADE_IN,
DO_NOT_CHANGE_VOLUME,
})
private @interface VolumeChangeType {}
private static final int FADE_OUT = 0;
private static final int MUTE = 1;
private static final int FADE_IN = 2;
private static final int DO_NOT_CHANGE_VOLUME = 3;
/**
* Used with {@code minVolumeToKeepPercentageWhenMuting} to avoid round off errors. An alternative
* to this would be to use floats, but integer math is probably faster than floats.
*/
private static final int AVOID_TRUNCATION_FACTOR = 1000;
/**
* Fraction of the original silence to keep. Between [0, 1]. 1 means keep all silence. 0 means
* remove all silence.
*/
private final float silenceRetentionRatio;
/** Absolute level below which an individual PCM sample is classified as silent. */
private final short silenceThresholdLevel;
/**
* Volume percentage to keep. Even when modifying the volume to a mute state, it is ideal to
* decrease the volume instead of making the volume zero. Completely silent audio sounds like
* playback has stopped. While decreased volume sounds like very light background noise from a
* recording studio.
*/
private final int minVolumeToKeepPercentageWhenMuting;
/**
* Duration of audio that must be below {@link #silenceThresholdLevel} before silence starts being
* trimmed. Specified in microseconds.
*/
private final long minimumSilenceDurationUs;
/**
* Maximum silence to keep in microseconds. This maximum is applied after {@link
* #silenceRetentionRatio}.
*/
private final long maxSilenceToKeepDurationUs;
private AudioFormat inputFormat;
private int bytesPerFrame;
private boolean enabled;
private @State int state;
private long skippedFrames;
/**
* The frames of silence that has been output since the last noise. Used to enforce {@link
* #maxSilenceToKeepDurationUs}.
*/
private int outputSilenceFramesSinceNoise = 0;
/**
* Buffers audio data that may be classified as silence while in {@link
* #STATE_SHORTENING_SILENCE}. If the input becomes noisy before the buffer has filled, it will be
* output without shortening. Otherwise, the buffer will be output when filled as shortened
* silence and emptied.
*/
private byte[] maybeSilenceBuffer;
/**
* An index into {@link #maybeSilenceBuffer} pointing to the location where silence that has not
* been output starts.
*/
private int maybeSilenceBufferStartIndex = 0;
/**
* A count of the number of bytes of content in {@link #maybeSilenceBuffer}. The count starts at
* {@link #maybeSilenceBufferStartIndex}, and the bytes counted may wrap around to the start of
* the buffer. The count will never be greater than {@link #maybeSilenceBuffer}'s length.
*/
private int maybeSilenceBufferContentsSize = 0;
/** Used to hold a subset of the contents of {@link #maybeSilenceBuffer} for convenience. */
// TODO: This processor can probably be more efficient if this array is not used. Operations like
// modifyVolume() can be applied to a non-contiguous contents, the code is just more complex.
private byte[] contiguousOutputBuffer;
/** Creates a new silence skipping audio processor. */
public SilenceSkippingAudioProcessor() {
this(
DEFAULT_MINIMUM_SILENCE_DURATION_US,
DEFAULT_SILENCE_RETENTION_RATIO,
DEFAULT_MAX_SILENCE_TO_KEEP_DURATION_US,
DEFAULT_MIN_VOLUME_TO_KEEP_PERCENTAGE,
DEFAULT_SILENCE_THRESHOLD_LEVEL);
}
/**
* @deprecated Use {@link #SilenceSkippingAudioProcessor(long, float, long, int, short)} instead.
*/
@Deprecated
public SilenceSkippingAudioProcessor(
long minimumSilenceDurationUs, long paddingSilenceUs, short silenceThresholdLevel) {
this(
minimumSilenceDurationUs,
/* silenceRetentionRatio= */ (float) paddingSilenceUs / minimumSilenceDurationUs,
/* maxSilenceToKeepDurationUs= */ minimumSilenceDurationUs,
/* minVolumeToKeepPercentageWhenMuting= */ 0,
silenceThresholdLevel);
}
/**
* Creates a new silence trimming audio processor.
*
* @param minimumSilenceDurationUs Duration of audio that must be below {@code
* silenceThresholdLevel} before silence starts being trimmed, in microseconds.
* @param silenceRetentionRatio Fraction of the original silence to keep. Between [0, 1]. 1 means
* keep all silence. 0 means remove all silence.
* @param maxSilenceToKeepDurationUs Maximum silence to keep in microseconds. This maximum is
* applied after {@link #silenceRetentionRatio}.
* @param minVolumeToKeepPercentageWhenMuting Volume percentage to keep. Even when modifying the
* volume to a mute state, it is ideal to decrease the volume instead of making the volume
* zero. Completely silent audio sounds like playback has stopped. While decreased volume
* sounds like very light background noise from a recording studio.
* @param silenceThresholdLevel Absolute level below which an individual PCM sample is classified
* as silent.
*/
public SilenceSkippingAudioProcessor(
long minimumSilenceDurationUs,
float silenceRetentionRatio,
long maxSilenceToKeepDurationUs,
int minVolumeToKeepPercentageWhenMuting,
short silenceThresholdLevel) {
checkArgument(silenceRetentionRatio >= 0f && silenceRetentionRatio <= 1f);
this.minimumSilenceDurationUs = minimumSilenceDurationUs;
this.silenceRetentionRatio = silenceRetentionRatio;
this.maxSilenceToKeepDurationUs = maxSilenceToKeepDurationUs;
this.minVolumeToKeepPercentageWhenMuting = minVolumeToKeepPercentageWhenMuting;
this.silenceThresholdLevel = silenceThresholdLevel;
inputFormat = AudioFormat.NOT_SET;
maybeSilenceBuffer = Util.EMPTY_BYTE_ARRAY;
contiguousOutputBuffer = Util.EMPTY_BYTE_ARRAY;
}
/**
* Sets whether to shorten silence in the input. This method may only be called after draining
* data through the processor. The value returned by {@link #isActive()} may change, and the
* processor must be {@link #flush() flushed} before queueing more data.
*
* @param enabled Whether to shorten silence in the input.
*/
public void setEnabled(boolean enabled) {
this.enabled = enabled;
}
/**
* Returns the total number of frames of input audio that were skipped due to being classified as
* silence since the last call to {@link #flush()}.
*/
public long getSkippedFrames() {
return skippedFrames;
}
@Override
protected AudioFormat onConfigure(AudioFormat inputAudioFormat)
throws UnhandledAudioFormatException {
if (inputAudioFormat.encoding != C.ENCODING_PCM_16BIT) {
throw new UnhandledAudioFormatException(inputAudioFormat);
}
this.inputFormat = inputAudioFormat;
bytesPerFrame = inputAudioFormat.channelCount * 2;
return inputAudioFormat;
}
@Override
public boolean isActive() {
return inputFormat.sampleRate != Format.NO_VALUE && enabled;
}
@Override
public void queueInput(ByteBuffer inputBuffer) {
while (inputBuffer.hasRemaining() && !hasPendingOutput()) {
switch (state) {
case STATE_NOISY:
processNoisy(inputBuffer);
break;
case STATE_SHORTENING_SILENCE:
shortenSilenceSilenceUntilNoise(inputBuffer);
break;
default:
throw new IllegalStateException();
}
}
}
@Override
public void onQueueEndOfStream() {
// The maybeSilenceBuffer is only written to in the STATE_SHORTENING_SILENCE state, and
// is always completely flushed before leaving the STATE_SHORTENING_SILENCE.
if (maybeSilenceBufferContentsSize > 0) {
// There's bytes in the buffer. So the final chunk of shortened silence will be output to
// simulate a transition back to the noisy state and the end of output.
outputShortenedSilenceBuffer(/* shouldTransitionToNoisyState= */ true);
outputSilenceFramesSinceNoise = 0;
}
}
@Override
public void onFlush() {
if (isActive()) {
// Divide by 2 to allow the buffer to be split into two bytesPerFrame aligned parts.
int maybeSilenceBufferSize =
alignToBytePerFrameBoundary(durationUsToFrames(minimumSilenceDurationUs) / 2) * 2;
if (maybeSilenceBuffer.length != maybeSilenceBufferSize) {
maybeSilenceBuffer = new byte[maybeSilenceBufferSize];
contiguousOutputBuffer = new byte[maybeSilenceBufferSize];
}
}
state = STATE_NOISY;
skippedFrames = 0;
outputSilenceFramesSinceNoise = 0;
maybeSilenceBufferStartIndex = 0;
maybeSilenceBufferContentsSize = 0;
}
@Override
public void onReset() {
enabled = false;
inputFormat = AudioFormat.NOT_SET;
maybeSilenceBuffer = Util.EMPTY_BYTE_ARRAY;
contiguousOutputBuffer = Util.EMPTY_BYTE_ARRAY;
}
/**
* Incrementally processes new input from {@code inputBuffer} while in {@link #STATE_NOISY},
* updating the state if needed.
*/
private void processNoisy(ByteBuffer inputBuffer) {
int limit = inputBuffer.limit();
// Check if there's any noise within the maybe silence buffer duration.
inputBuffer.limit(min(limit, inputBuffer.position() + maybeSilenceBuffer.length));
int noiseLimit = findNoiseLimit(inputBuffer);
if (noiseLimit == inputBuffer.position()) {
// The buffer contains the start of possible silence.
state = STATE_SHORTENING_SILENCE;
} else {
inputBuffer.limit(min(noiseLimit, inputBuffer.capacity()));
output(inputBuffer);
}
// Restore the limit.
inputBuffer.limit(limit);
}
/**
* Incrementally processes new input from {@code inputBuffer} while in {@link
* #STATE_SHORTENING_SILENCE}, updating the state if needed.
*
* <p>If the amount of silence is less than {@link #minimumSilenceDurationUs}, then {@link
* #DO_NOT_CHANGE_VOLUME} is used to output the silence.
*
* <p>If the amount of silence is more than {@link #minimumSilenceDurationUs}, then the following
* will be output:
*
* <ul>
* <li>A half a buffer full of silence using {@link #FADE_OUT}. This padding has no
* discontinuities.
* <li>A number of bytes between 0 to ({@link #maxSilenceToKeepDurationUs} - padding}. This will
* have discontinuities, that are imperceptible due to {@linkplain #MUTE muting} the
* content.
* <li>If the silence length is over {@link #maxSilenceToKeepDurationUs} - a half buffer (for
* padding that will be applied later) then the silence begins to be thrown away entirely.
* <li>A final silence with a length of a half buffer will be used with a {@link #FADE_IN}. This
* padding has no discontinuities. It will transition with no discontinuities back to the
* {@link #STATE_NOISY}.
* </ul>
*
* <p>Transitions to {@link #STATE_NOISY} if noise is encountered. It writes to {@link
* #maybeSilenceBuffer} in contiguous blocks. If the silence available is enough to wrap around
* the end of the buffer then the buffer is filled from {@link #maybeSilenceBufferStartIndex} to
* the buffers end and the beginning of the buffer is filled upon the next call to this method.
*/
private void shortenSilenceSilenceUntilNoise(ByteBuffer inputBuffer) {
checkState(maybeSilenceBufferStartIndex < maybeSilenceBuffer.length);
int limit = inputBuffer.limit();
int noisePosition = findNoisePosition(inputBuffer);
int silenceInputSize = noisePosition - inputBuffer.position();
int indexToWriteTo;
int contiguousBufferRemaining;
if (maybeSilenceBufferStartIndex + maybeSilenceBufferContentsSize < maybeSilenceBuffer.length) {
// ^0---^start---^end---^length
contiguousBufferRemaining =
maybeSilenceBuffer.length
- (maybeSilenceBufferContentsSize + maybeSilenceBufferStartIndex);
indexToWriteTo = maybeSilenceBufferStartIndex + maybeSilenceBufferContentsSize;
} else {
// The bytes have wrapped around. ^0---^end---^start---^length
int amountInUpperPartOfBuffer = maybeSilenceBuffer.length - maybeSilenceBufferStartIndex;
indexToWriteTo = maybeSilenceBufferContentsSize - amountInUpperPartOfBuffer;
contiguousBufferRemaining = maybeSilenceBufferStartIndex - indexToWriteTo;
}
boolean noiseFound = noisePosition < limit;
// Fill as much of the silence buffer as possible.
int bytesOfInput = min(silenceInputSize, contiguousBufferRemaining);
inputBuffer.limit(inputBuffer.position() + bytesOfInput);
inputBuffer.get(maybeSilenceBuffer, indexToWriteTo, bytesOfInput);
maybeSilenceBufferContentsSize += bytesOfInput;
checkState(maybeSilenceBufferContentsSize <= maybeSilenceBuffer.length);
boolean shouldTransitionToNoisyState =
noiseFound
&&
/* The silence before the noise is not enough to fill the remaining buffer. */
silenceInputSize < contiguousBufferRemaining;
outputShortenedSilenceBuffer(shouldTransitionToNoisyState);
if (shouldTransitionToNoisyState) {
state = STATE_NOISY;
outputSilenceFramesSinceNoise = 0;
}
// Restore the limit.
inputBuffer.limit(limit);
}
/** See {@link #shortenSilenceSilenceUntilNoise}. */
private void outputShortenedSilenceBuffer(boolean shouldTransitionToNoisyState) {
int sizeBeforeOutput = maybeSilenceBufferContentsSize;
int bytesToOutput;
@VolumeChangeType int volumeChangeType;
int bytesConsumed;
// Only output when buffer is full or transitioning to noisy state.
if (maybeSilenceBufferContentsSize == maybeSilenceBuffer.length
|| shouldTransitionToNoisyState) {
if (outputSilenceFramesSinceNoise == 0) {
// This is the beginning of a silence chunk so keep MINIMUM_SILENCE_DURATION_US / 2 of the
// silence.
if (shouldTransitionToNoisyState) {
volumeChangeType = DO_NOT_CHANGE_VOLUME;
bytesToOutput = maybeSilenceBufferContentsSize;
outputSilence(bytesToOutput, volumeChangeType);
bytesConsumed = bytesToOutput;
} else {
checkState(maybeSilenceBufferContentsSize >= maybeSilenceBuffer.length / 2);
// To keep this block a tad simpler, by always outputting exactly buffer size / 2 to avoid
// needing to add the shortening code here.
volumeChangeType = FADE_OUT;
bytesToOutput = maybeSilenceBuffer.length / 2;
outputSilence(bytesToOutput, volumeChangeType);
bytesConsumed = bytesToOutput;
}
} else if (shouldTransitionToNoisyState) {
volumeChangeType = FADE_IN;
int bytesRemainingAfterOutputtingHalfMin =
maybeSilenceBufferContentsSize - maybeSilenceBuffer.length / 2;
bytesConsumed = bytesRemainingAfterOutputtingHalfMin + maybeSilenceBuffer.length / 2;
int shortenedSilenceLength =
calculateShortenedSilenceLength(bytesRemainingAfterOutputtingHalfMin);
// For simplicity we fade in over the shortened silence and the half buffer of padding.
// This acts to increase the padding a bit which only helps (probably imperceptibly)
// the sound quality.
bytesToOutput = maybeSilenceBuffer.length / 2 + shortenedSilenceLength;
outputSilence(bytesToOutput, volumeChangeType);
} else {
volumeChangeType = MUTE;
// Output as much as possible while still keeping half the buffer full so that half the
// min silence can be output later as padding.
bytesConsumed = maybeSilenceBufferContentsSize - maybeSilenceBuffer.length / 2;
bytesToOutput = calculateShortenedSilenceLength(bytesConsumed);
outputSilence(bytesToOutput, volumeChangeType);
}
checkState(
bytesConsumed % bytesPerFrame == 0,
"bytesConsumed is not aligned to frame size: %s" + bytesConsumed);
checkState((sizeBeforeOutput >= bytesToOutput));
maybeSilenceBufferContentsSize -= bytesConsumed;
maybeSilenceBufferStartIndex += bytesConsumed;
// The start index might wrap back around to the start of the buffer.
maybeSilenceBufferStartIndex %= maybeSilenceBuffer.length;
outputSilenceFramesSinceNoise += bytesToOutput / bytesPerFrame;
skippedFrames += (bytesConsumed - bytesToOutput) / bytesPerFrame;
}
}
/**
* Returns the appropriate size that a given number of bytes of silence should be shortened to. It
* calculates this using the {@link #outputSilenceFramesSinceNoise} and the {@link
* #silenceRetentionRatio}. The {@link #silenceRetentionRatio} multiplied by {@code
* silenceToShortenBytes} is returned until a max outputted silence length is hit, and then only
* the remaining silence between the current {@link #outputSilenceFramesSinceNoise} and {@link
* #maxSilenceToKeepDurationUs} is reached.
*/
private int calculateShortenedSilenceLength(int silenceToShortenBytes) {
// Start skipping silence to keep the silence below MAX_SILENCE_DURATION_US long.
int bytesNeededToReachMax =
(durationUsToFrames(maxSilenceToKeepDurationUs) - outputSilenceFramesSinceNoise)
* bytesPerFrame
- maybeSilenceBuffer.length / 2;
checkState(bytesNeededToReachMax >= 0);
return alignToBytePerFrameBoundary(
min(silenceToShortenBytes * silenceRetentionRatio + .5f, bytesNeededToReachMax));
}
/**
* Method used to avoid rounding errors while calculating output and skipped frames. The given
* {@code value} is decreased to the nearest value that is divisible by {@link #bytesPerFrame}.
*/
private int alignToBytePerFrameBoundary(int value) {
return (value / bytesPerFrame) * bytesPerFrame;
}
/**
* Method used to avoid rounding errors while calculating output and skipped frames. The given
* {@code value} is decreased to the nearest value that is divisible by {@link #bytesPerFrame}.
*/
private int alignToBytePerFrameBoundary(float value) {
return alignToBytePerFrameBoundary((int) value);
}
/** Copies elements from {@code data} to populate a new output buffer from the processor. */
private void outputRange(byte[] data, int size, @VolumeChangeType int rampType) {
checkArgument(
size % bytesPerFrame == 0, "byteOutput size is not aligned to frame size " + size);
modifyVolume(data, size, rampType);
replaceOutputBuffer(size).put(data, 0, size).flip();
}
/**
* Copies {@code sizeToOutput} elements from the {@link #maybeSilenceBuffer} to {@link
* #contiguousOutputBuffer}. The contents of {@link #maybeSilenceBuffer} can wrap around from the
* end of the buffer and back to the beginning. The {@link #contiguousOutputBuffer} content always
* start from index 0.
*
* @param rampType This parameter is used to determine which part of the {@link
* #maybeSilenceBuffer} contents need to be kept. For {@link #FADE_IN} the end of the contents
* is always kept. Otherwise the beginning of the contents are always kept.
*/
private void outputSilence(int sizeToOutput, @VolumeChangeType int rampType) {
if (sizeToOutput == 0) {
return;
}
checkArgument(maybeSilenceBufferContentsSize >= sizeToOutput);
if (rampType == FADE_IN) {
// Keeps the end of the buffer because we are padding the start of the next chunk of noise.
if (maybeSilenceBufferStartIndex + maybeSilenceBufferContentsSize
<= maybeSilenceBuffer.length) {
// ^0---^start---^end---^length
System.arraycopy(
maybeSilenceBuffer,
maybeSilenceBufferStartIndex + maybeSilenceBufferContentsSize - sizeToOutput,
contiguousOutputBuffer,
0,
sizeToOutput);
} else {
// ^0---^end--^start---^length
int sizeInUpperPartOfArray = maybeSilenceBuffer.length - maybeSilenceBufferStartIndex;
int sizeInLowerPartOfArray = maybeSilenceBufferContentsSize - sizeInUpperPartOfArray;
if (sizeInLowerPartOfArray >= sizeToOutput) {
// We just need the lower part of the array.
System.arraycopy(
maybeSilenceBuffer,
sizeInLowerPartOfArray - sizeToOutput,
contiguousOutputBuffer,
0,
sizeToOutput);
} else {
int sizeToOutputInUpperPart = sizeToOutput - sizeInLowerPartOfArray;
System.arraycopy(
maybeSilenceBuffer,
maybeSilenceBuffer.length - sizeToOutputInUpperPart,
contiguousOutputBuffer,
0,
sizeToOutputInUpperPart);
// Copy everything from lower part. DO_NOT_CHANGE_VOLUME (which keeps everything) and
// MUTE (where the content that is kept only provides background noise).
System.arraycopy(
maybeSilenceBuffer,
0,
contiguousOutputBuffer,
sizeToOutputInUpperPart,
sizeInLowerPartOfArray);
}
}
} else {
if (maybeSilenceBufferStartIndex + sizeToOutput <= maybeSilenceBuffer.length) {
// ^0---^start---^end---^length
System.arraycopy(
maybeSilenceBuffer,
maybeSilenceBufferStartIndex,
contiguousOutputBuffer,
0,
sizeToOutput);
} else {
// ^0---^end (of content to output now)---^start---^length
int sizeToCopyInUpperPartOfArray = maybeSilenceBuffer.length - maybeSilenceBufferStartIndex;
// Copy the upper part of the array.
System.arraycopy(
maybeSilenceBuffer,
maybeSilenceBufferStartIndex,
contiguousOutputBuffer,
0,
sizeToCopyInUpperPartOfArray);
int amountToCopyFromLowerPartOfArray = sizeToOutput - sizeToCopyInUpperPartOfArray;
System.arraycopy(
maybeSilenceBuffer,
0,
contiguousOutputBuffer,
sizeToCopyInUpperPartOfArray,
amountToCopyFromLowerPartOfArray);
}
}
checkArgument(
sizeToOutput % bytesPerFrame == 0,
"sizeToOutput is not aligned to frame size: " + sizeToOutput);
checkState(maybeSilenceBufferStartIndex < maybeSilenceBuffer.length);
outputRange(contiguousOutputBuffer, sizeToOutput, rampType);
}
/**
* Modifies the amplitude of the samples in {@code sampleBuffer} based on the given {@link
* VolumeChangeType}.
*/
private void modifyVolume(byte[] sampleBuffer, int size, @VolumeChangeType int volumeChangeType) {
if (volumeChangeType == DO_NOT_CHANGE_VOLUME) {
return;
}
for (int idx = 0; idx < size; idx += 2) {
byte mostSignificantByte = sampleBuffer[idx + 1];
byte leastSignificantByte = sampleBuffer[idx];
int sample = twoByteSampleToInt(mostSignificantByte, leastSignificantByte);
int volumeModificationPercentage;
if (volumeChangeType == FADE_OUT) {
volumeModificationPercentage =
calculateFadeOutPercentage(/* value= */ idx, /* max= */ size - 1);
} else if (volumeChangeType == FADE_IN) {
volumeModificationPercentage =
calculateFadeInPercentage(/* value= */ idx, /* max= */ size - 1);
} else {
volumeModificationPercentage = minVolumeToKeepPercentageWhenMuting;
}
sample = (sample * volumeModificationPercentage) / 100;
sampleIntToTwoBigEndianBytes(sampleBuffer, idx, sample);
}
}
private int calculateFadeOutPercentage(int value, int max) {
return ((minVolumeToKeepPercentageWhenMuting - 100) * ((AVOID_TRUNCATION_FACTOR * value) / max))
/ AVOID_TRUNCATION_FACTOR
+ 100;
}
private int calculateFadeInPercentage(int value, int max) {
return (minVolumeToKeepPercentageWhenMuting
+ ((100 - minVolumeToKeepPercentageWhenMuting) * (AVOID_TRUNCATION_FACTOR * value) / max)
/ AVOID_TRUNCATION_FACTOR);
}
private static int twoByteSampleToInt(byte mostSignificantByte, byte leastSignificantByte) {
return ((leastSignificantByte & 0xFF) | mostSignificantByte << 8);
}
/**
* Converts {@code sample} into the corresponding big-endian 16bit bytes within {@code byteArray}.
*/
private static void sampleIntToTwoBigEndianBytes(byte[] byteArray, int startIndex, int sample) {
// Avoid 16-bit-integer overflow when writing back the manipulated data.
if (sample >= Short.MAX_VALUE) {
byteArray[startIndex] = (byte) 0xFF;
byteArray[startIndex + 1] = (byte) 0x7F;
} else if (sample <= Short.MIN_VALUE) {
byteArray[startIndex] = (byte) 0x00;
byteArray[startIndex + 1] = (byte) 0x80;
} else {
byteArray[startIndex] = (byte) (sample & 0xFF);
byteArray[startIndex + 1] = (byte) (sample >> 8);
}
}
/**
* Copies remaining bytes from {@code data} to populate a new output buffer from the processor.
*/
private void output(ByteBuffer data) {
replaceOutputBuffer(data.remaining()).put(data).flip();
}
/**
* Returns the number of input frames corresponding to {@code durationUs} microseconds of audio.
*/
private int durationUsToFrames(long durationUs) {
return (int) ((durationUs * inputFormat.sampleRate) / C.MICROS_PER_SECOND);
}
/**
* Returns the earliest byte position in [position, limit) of {@code buffer} that contains a frame
* classified as a noisy frame, or the limit of the buffer if no such frame exists.
*/
private int findNoisePosition(ByteBuffer buffer) {
// The input is in ByteOrder.nativeOrder(), which is little endian on Android.
for (int i = buffer.position() + 1; i < buffer.limit(); i += 2) {
if (isNoise(buffer.get(i), buffer.get(i - 1))) {
// Round to the start of the frame.
return bytesPerFrame * (i / bytesPerFrame);
}
}
return buffer.limit();
}
/**
* Returns the earliest byte position in [position, limit) of {@code buffer} such that all frames
* from the byte position to the limit are classified as silent.
*/
private int findNoiseLimit(ByteBuffer buffer) {
// The input is in ByteOrder.nativeOrder(), which is little endian on Android.
for (int i = buffer.limit() - 1; i >= buffer.position(); i -= 2) {
if (isNoise(buffer.get(i), buffer.get(i - 1))) {
// Return the start of the next frame.
return bytesPerFrame * (i / bytesPerFrame) + bytesPerFrame;
}
}
return buffer.position();
}
/**
* Whether the given two bytes represent a short signed PCM value that is greater than {@link
* #silenceThresholdLevel}.
*/
private boolean isNoise(byte mostSignificantByte, byte leastSignificantByte) {
return Math.abs(twoByteSampleToInt(mostSignificantByte, leastSignificantByte))
> silenceThresholdLevel;
}
}