WebvttParserUtil.java
/*
* Copyright (C) 2016 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package androidx.media3.extractor.text.webvtt;
import androidx.annotation.Nullable;
import androidx.media3.common.ParserException;
import androidx.media3.common.util.ParsableByteArray;
import androidx.media3.common.util.UnstableApi;
import androidx.media3.common.util.Util;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/** Utility methods for parsing WebVTT data. */
@UnstableApi
public final class WebvttParserUtil {
private static final Pattern COMMENT = Pattern.compile("^NOTE([ \t].*)?$");
private static final String WEBVTT_HEADER = "WEBVTT";
private WebvttParserUtil() {}
/**
* Reads and validates the first line of a WebVTT file.
*
* @param input The input from which the line should be read.
* @throws ParserException If the line isn't the start of a valid WebVTT file.
*/
public static void validateWebvttHeaderLine(ParsableByteArray input) throws ParserException {
int startPosition = input.getPosition();
if (!isWebvttHeaderLine(input)) {
input.setPosition(startPosition);
throw ParserException.createForMalformedContainer(
"Expected WEBVTT. Got " + input.readLine(), /* cause= */ null);
}
}
/**
* Returns whether the given input is the first line of a WebVTT file.
*
* @param input The input from which the line should be read.
*/
public static boolean isWebvttHeaderLine(ParsableByteArray input) {
@Nullable String line = input.readLine();
return line != null && line.startsWith(WEBVTT_HEADER);
}
/**
* Parses a WebVTT timestamp.
*
* @param timestamp The timestamp string.
* @return The parsed timestamp in microseconds.
* @throws NumberFormatException If the timestamp could not be parsed.
*/
public static long parseTimestampUs(String timestamp) throws NumberFormatException {
long value = 0;
String[] parts = Util.splitAtFirst(timestamp, "\.");
String[] subparts = Util.split(parts[0], ":");
for (String subpart : subparts) {
value = (value * 60) + Long.parseLong(subpart);
}
value *= 1000;
if (parts.length == 2) {
value += Long.parseLong(parts[1]);
}
return value * 1000;
}
/**
* Parses a percentage string.
*
* @param s The percentage string.
* @return The parsed value, where 1.0 represents 100%.
* @throws NumberFormatException If the percentage could not be parsed.
*/
public static float parsePercentage(String s) throws NumberFormatException {
if (!s.endsWith("%")) {
throw new NumberFormatException("Percentages must end with %");
}
return Float.parseFloat(s.substring(0, s.length() - 1)) / 100;
}
/**
* Reads lines up to and including the next WebVTT cue header.
*
* @param input The input from which lines should be read.
* @return A {@link Matcher} for the WebVTT cue header, or null if the end of the input was
* reached without a cue header being found. In the case that a cue header is found, groups 1,
* 2 and 3 of the returned matcher contain the start time, end time and settings list.
*/
@Nullable
public static Matcher findNextCueHeader(ParsableByteArray input) {
@Nullable String line;
while ((line = input.readLine()) != null) {
if (COMMENT.matcher(line).matches()) {
// Skip until the end of the comment block.
while ((line = input.readLine()) != null && !line.isEmpty()) {}
} else {
Matcher cueHeaderMatcher = WebvttCueParser.CUE_HEADER_PATTERN.matcher(line);
if (cueHeaderMatcher.matches()) {
return cueHeaderMatcher;
}
}
}
return null;
}
}