Home Reference Source

src/utils/imsc1-ttml-parser.ts

import { findBox } from './mp4-tools';
import { parseTimeStamp } from './vttparser';
import VTTCue from './vttcue';
import { utf8ArrayToStr } from '../demux/id3';
import { toTimescaleFromScale } from './timescale-conversion';
import { generateCueId } from './webvtt-parser';

export const IMSC1_CODEC = 'stpp.ttml.im1t';

// Time format: h:m:s:frames(.subframes)
const HMSF_REGEX = /^(\d{2,}):(\d{2}):(\d{2}):(\d{2})\.?(\d+)?$/;

// Time format: hours, minutes, seconds, milliseconds, frames, ticks
const TIME_UNIT_REGEX = /^(\d*(?:\.\d*)?)(h|m|s|ms|f|t)$/;

export function parseIMSC1(
  payload: ArrayBuffer,
  initPTS: number,
  timescale: number,
  callBack: (cues: Array<VTTCue>) => any,
  errorCallBack: (error: Error) => any
) {
  const results = findBox(new Uint8Array(payload), ['mdat']);
  if (results.length === 0) {
    errorCallBack(new Error('Could not parse IMSC1 mdat'));
    return;
  }
  const mdat = results[0];
  const ttml = utf8ArrayToStr(
    new Uint8Array(payload, mdat.start, mdat.end - mdat.start)
  );
  const syncTime = toTimescaleFromScale(initPTS, 1, timescale);

  try {
    callBack(parseTTML(ttml, syncTime));
  } catch (error) {
    errorCallBack(error);
  }
}

function parseTTML(ttml: string, syncTime: number): Array<VTTCue> {
  const parser = new DOMParser();
  const xmlDoc = parser.parseFromString(ttml, 'text/xml');
  const tt = xmlDoc.getElementsByTagName('tt')[0];
  if (!tt) {
    throw new Error('Invalid ttml');
  }
  const defaultRateInfo = {
    frameRate: 30,
    subFrameRate: 1,
    frameRateMultiplier: 0,
    tickRate: 0,
  };
  const rateInfo: Object = Object.keys(defaultRateInfo).reduce(
    (result, key) => {
      result[key] = tt.getAttribute(`ttp:${key}`) || defaultRateInfo[key];
      return result;
    },
    {}
  );

  const trim = tt.getAttribute('xml:space') !== 'preserve';

  const styleElements = collectionToDictionary(
    getElementCollection(tt, 'styling', 'style')
  );
  const regionElements = collectionToDictionary(
    getElementCollection(tt, 'layout', 'region')
  );
  const cueElements = getElementCollection(tt, 'body', '[begin]');

  return [].map
    .call(cueElements, (cueElement) => {
      const cueText = getTextContent(cueElement, trim);

      if (!cueText || !cueElement.hasAttribute('begin')) {
        return null;
      }
      const startTime = parseTtmlTime(
        cueElement.getAttribute('begin'),
        rateInfo
      );
      const duration = parseTtmlTime(cueElement.getAttribute('dur'), rateInfo);
      let endTime = parseTtmlTime(cueElement.getAttribute('end'), rateInfo);
      if (startTime === null) {
        throw timestampParsingError(cueElement);
      }
      if (endTime === null) {
        if (duration === null) {
          throw timestampParsingError(cueElement);
        }
        endTime = startTime + duration;
      }
      const cue = new VTTCue(startTime - syncTime, endTime - syncTime, cueText);
      cue.id = generateCueId(cue.startTime, cue.endTime, cue.text);

      const region = regionElements[cueElement.getAttribute('region')];
      const style = styleElements[cueElement.getAttribute('style')];

      // TODO: Add regions to track and cue (origin and extend)
      // These values are hard-coded (for now) to simulate region settings in the demo
      cue.position = 10;
      cue.size = 80;

      // Apply styles to cue
      const styles = getTtmlStyles(region, style);
      const { textAlign } = styles;
      if (textAlign) {
        // cue.positionAlign not settable in FF~2016
        const lineAlign = {
          left: 'start',
          center: 'center',
          right: 'end',
          start: 'start',
          end: 'end',
        }[textAlign];
        if (lineAlign) {
          cue.lineAlign = lineAlign;
        }
        cue.align = textAlign as AlignSetting;
      }
      Object.assign(cue, styles);

      return cue;
    })
    .filter((cue) => cue !== null);
}

function getElementCollection(
  fromElement,
  parentName,
  childName
): Array<HTMLElement> {
  const parent = fromElement.getElementsByTagName(parentName)[0];
  if (parent) {
    return [].slice.call(parent.querySelectorAll(childName));
  }
  return [];
}

function collectionToDictionary(
  elementsWithId: Array<HTMLElement>
): { [id: string]: HTMLElement } {
  return elementsWithId.reduce((dict, element: HTMLElement) => {
    const id = element.getAttribute('xml:id');
    if (id) {
      dict[id] = element;
    }
    return dict;
  }, {});
}

function getTextContent(element, trim): string {
  return [].slice.call(element.childNodes).reduce((str, node, i) => {
    if (node.nodeName === 'br' && i) {
      return str + '\n';
    }
    if (node.childNodes?.length) {
      return getTextContent(node, trim);
    } else if (trim) {
      return str + node.textContent.trim().replace(/\s+/g, ' ');
    }
    return str + node.textContent;
  }, '');
}

function getTtmlStyles(region, style): { [style: string]: string } {
  const ttsNs = 'http://www.w3.org/ns/ttml#styling';
  const styleAttributes = [
    'displayAlign',
    'textAlign',
    'color',
    'backgroundColor',
    'fontSize',
    'fontFamily',
    // 'fontWeight',
    // 'lineHeight',
    // 'wrapOption',
    // 'fontStyle',
    // 'direction',
    // 'writingMode'
  ];
  return styleAttributes.reduce((styles, name) => {
    const value =
      getAttributeNS(style, ttsNs, name) || getAttributeNS(region, ttsNs, name);
    if (value) {
      styles[name] = value;
    }
    return styles;
  }, {});
}

function getAttributeNS(element, ns, name): string | null {
  return element.hasAttributeNS(ns, name)
    ? element.getAttributeNS(ns, name)
    : null;
}

function timestampParsingError(node) {
  return new Error(`Could not parse ttml timestamp ${node}`);
}

function parseTtmlTime(timeAttributeValue, rateInfo): number | null {
  if (!timeAttributeValue) {
    return null;
  }
  let seconds: number | null = parseTimeStamp(timeAttributeValue);
  if (seconds === null) {
    if (HMSF_REGEX.test(timeAttributeValue)) {
      seconds = parseHoursMinutesSecondsFrames(timeAttributeValue, rateInfo);
    } else if (TIME_UNIT_REGEX.test(timeAttributeValue)) {
      seconds = parseTimeUnits(timeAttributeValue, rateInfo);
    }
  }
  return seconds;
}

function parseHoursMinutesSecondsFrames(timeAttributeValue, rateInfo): number {
  const m = HMSF_REGEX.exec(timeAttributeValue) as Array<any>;
  const frames = (m[4] | 0) + (m[5] | 0) / rateInfo.subFrameRate;
  return (
    (m[1] | 0) * 3600 +
    (m[2] | 0) * 60 +
    (m[3] | 0) +
    frames / rateInfo.frameRate
  );
}

function parseTimeUnits(timeAttributeValue, rateInfo): number {
  const m = TIME_UNIT_REGEX.exec(timeAttributeValue) as Array<any>;
  const value = Number(m[1]);
  const unit = m[2];
  switch (unit) {
    case 'h':
      return value * 3600;
    case 'm':
      return value * 60;
    case 'ms':
      return value * 1000;
    case 'f':
      return value / rateInfo.frameRate;
    case 't':
      return value / rateInfo.tickRate;
  }
  return value;
}