<p:declare-step xmlns:p="http://www.w3.org/ns/xproc" xmlns:px="http://www.daisy.org/ns/pipeline/xproc" xmlns:cx="http://xmlcalabash.com/ns/extensions" xmlns:xs="http://www.w3.org/2001/XMLSchema" xmlns:d="http://www.daisy.org/ns/pipeline/data" xmlns:epub="http://www.idpf.org/2007/ops" xmlns:tts="http://www.daisy.org/ns/pipeline/tts" xmlns:ssml="http://www.w3.org/2001/10/synthesis" version="1.0" exclude-inline-prefixes="#all" type="px:tts-for-epub3" name="main"> <p:input port="source.fileset" primary="true"/> <p:input port="source.in-memory" sequence="true"> <p:documentation xmlns="http://www.w3.org/1999/xhtml"> The source fileset with HTML documents, lexicons and CSS stylesheets. </p:documentation> </p:input> <p:input port="config"> <p:documentation xmlns="http://www.w3.org/1999/xhtml"> <h2>TTS configuration file</h2> Configuration file with voice mappings, PLS lexicons and annotations. </p:documentation> </p:input> <p:output port="audio-map"> <p:pipe port="audio-map" step="synthesize"/> <p:documentation xmlns="http://www.w3.org/1999/xhtml"> List of audio clips mapped to fragments in the HTML document set. </p:documentation> </p:output> <p:output port="result.fileset" primary="true"/> <p:output port="result.in-memory" sequence="true"> <p:pipe step="update-fileset" port="result.in-memory"/> <p:documentation xmlns="http://www.w3.org/1999/xhtml"> The result fileset. HTML documents are enriched with IDs, words and sentences. Inlined aural CSS is removed. </p:documentation> </p:output> <p:output port="status"> <p:pipe step="synthesize" port="status"/> </p:output> <p:option name="include-log" select="'false'"> <p:documentation xmlns="http://www.w3.org/1999/xhtml"> Whether or not to make the TTS log available on the "log" port. </p:documentation> </p:option> <p:output port="log" sequence="true"> <p:pipe step="synthesize" port="log"/> </p:output> <p:option name="audio" required="false" cx:type="xs:boolean" cx:as="xs:string" select="'true'"> <p:documentation xmlns="http://www.w3.org/1999/xhtml"> <h2>Enable Text-To-Speech</h2> Whether to use a speech synthesizer to produce audio files. </p:documentation> </p:option> <p:option name="audio-file-type" select="'audio/mpeg'"> <p:documentation xmlns="http://www.w3.org/1999/xhtml"> The desired file type of the generated audio files, specified as a MIME type. Examples: <ul> <li>"audio/mpeg"</li> <li>"audio/x-wav" (but note that this is not a core media type)</li> </ul> </p:documentation> </p:option> <p:option name="process-css" required="false" select="'true'"> <p:documentation xmlns="http://www.w3.org/1999/xhtml"> Set to false to bypass aural CSS processing. </p:documentation> </p:option> <p:option name="stylesheet" select="''"> <p:documentation xmlns="http://www.w3.org/1999/xhtml"> CSS style sheets as space separated list of absolute URIs. </p:documentation> </p:option> <p:option name="lexicon" cx:as="xs:anyURI*" select="()"> <p:documentation xmlns="http://www.w3.org/1999/xhtml"> PLS lexicons as list of absolute URIs. </p:documentation> </p:option> <p:option name="sentence-class" required="false" select="''"> <p:documentation xmlns="http://www.w3.org/1999/xhtml"> Class attribute to mark sentences with. </p:documentation> </p:option> <p:option name="temp-dir" select="''"> <p:documentation xmlns="http://www.w3.org/1999/xhtml"> Empty directory dedicated to this conversion. May be left empty in which case a temporary directory will be automatically created. </p:documentation> </p:option> <p:import href="epub3-to-ssml.xpl"> <p:documentation> px:epub3-to-ssml </p:documentation> </p:import> <p:import href="http://www.daisy.org/pipeline/modules/tts-common/library.xpl"> <p:documentation> px:ssml-to-audio px:isolate-skippable </p:documentation> </p:import> <p:import href="http://www.daisy.org/pipeline/modules/html-break-detection/library.xpl"> <p:documentation> px:html-break-detect px:html-unwrap-words </p:documentation> </p:import> <p:import href="http://www.daisy.org/pipeline/modules/css-utils/library.xpl"> <p:documentation> px:css-speech-cascade px:css-speech-clean </p:documentation> </p:import> <p:import href="http://www.daisy.org/pipeline/modules/fileset-utils/library.xpl"> <p:documentation> px:fileset-load px:fileset-update </p:documentation> </p:import> <p:import href="http://www.daisy.org/pipeline/modules/common-utils/library.xpl"> <p:documentation> px:add-ids </p:documentation> </p:import> <p:variable name="fileset-base" select="base-uri(/*)"> <p:pipe step="main" port="source.fileset"/> </p:variable> <p:choose name="process-css" px:progress="1/10"> <p:when test="$audio='true' and $process-css='true'"> <p:output port="fileset" primary="true"/> <p:output port="in-memory" sequence="true"> <p:pipe step="cascade" port="result.in-memory"/> </p:output> <px:css-speech-cascade content-type="application/xhtml+xml" name="cascade"> <p:input port="source.in-memory"> <p:pipe step="main" port="source.in-memory"/> </p:input> <p:with-option name="user-stylesheet" select="$stylesheet"/> </px:css-speech-cascade> </p:when> <p:otherwise> <p:output port="fileset" primary="true"/> <p:output port="in-memory" sequence="true"> <p:pipe step="main" port="source.in-memory"/> </p:output> <p:identity/> </p:otherwise> </p:choose> <px:fileset-load media-types="application/xhtml+xml" name="html"> <p:input port="in-memory"> <p:pipe step="process-css" port="in-memory"/> </p:input> </px:fileset-load> <p:choose name="synthesize" px:progress="9/10"> <p:when test="$audio = 'false'"> <p:xpath-context> <p:empty/> </p:xpath-context> <p:output port="html" primary="true" sequence="true"/> <p:output port="audio-map"> <p:inline> <d:audio-clips/> </p:inline> </p:output> <p:output port="status"> <p:inline> <d:status result="ok"/> </p:inline> </p:output> <p:output port="log" sequence="true"> <p:empty/> </p:output> <p:identity/> </p:when> <p:otherwise> <p:output port="html" primary="true" sequence="true"> <p:pipe step="for-each" port="html"/> </p:output> <p:output port="audio-map"> <p:pipe port="result" step="to-audio"/> </p:output> <p:output port="status"> <p:pipe step="to-audio" port="status"/> </p:output> <p:output port="log" sequence="true"> <p:pipe step="to-audio" port="log"/> </p:output> <p:for-each name="for-each" px:progress="0.2"> <p:output port="ssml" primary="true" sequence="true"> <p:pipe step="ssml" port="result"/> </p:output> <p:output port="html"> <p:pipe step="clean-html" port="result"/> </p:output> <p:group px:progress="1/5"> <p:documentation> Insert "speech-only" spans from @tts:before and @tts:after attributes </p:documentation> <p:insert match="*[@tts:before]" position="first-child"> <p:input port="insertion"> <p:inline><tts:before>[CONTENT]</tts:before></p:inline> </p:input> </p:insert> <p:string-replace match="tts:before/text()" replace="parent::*/parent::*/@tts:before"/> <p:insert match="*[@tts:after]" position="last-child"> <p:input port="insertion"> <p:inline><tts:after>[CONTENT]</tts:after></p:inline> </p:input> </p:insert> <p:string-replace match="tts:after/text()" replace="parent::*/parent::*/@tts:after"/> <p:add-attribute match="tts:before|tts:after" attribute-name="tts:speech-only" attribute-value=""/> <p:rename match="tts:before|tts:after" new-name="span" new-namespace="http://www.w3.org/1999/xhtml"/> </p:group> <px:html-break-detect name="break" px:progress="1/5" px:message="Performing sentence detection"> <p:with-option name="sentence-attr" select="if ($sentence-class!='') then 'class' else ''"/> <p:with-option name="sentence-attr-val" select="$sentence-class"/> </px:html-break-detect> <px:isolate-skippable name="isolate-skippable" px:progress="1/5" match="*[@epub:type/tokenize(.,'\s+')=('pagebreak','noteref')]| *[@role='doc-pagebreak']| *[@role='doc-noteref']"> <p:input port="sentence-ids"> <p:pipe step="break" port="sentence-ids"/> </p:input> </px:isolate-skippable> <px:epub3-to-ssml px:progress="1/5" px:message="Generating SSML from HTML"> <p:input port="sentence-ids"> <p:pipe step="break" port="sentence-ids"/> </p:input> <p:input port="skippable-ids"> <p:pipe step="isolate-skippable" port="skippable-ids"/> </p:input> <p:input port="fileset.in"> <p:pipe step="process-css" port="fileset"/> </p:input> <p:input port="config"> <p:pipe port="config" step="main"/> </p:input> <p:with-option name="user-lexicons" select="$lexicon"/> </px:epub3-to-ssml> <px:add-ids match="ssml:s" name="ssml"> <p:documentation>px:ssml-to-audio requires that all sentences have an id attribute</p:documentation> </px:add-ids> <p:sink/> <p:group px:progress="1/10"> <p:documentation> Unwrap elements with @tts:speech-only attribute and remove text content. </p:documentation> <p:delete match="*[@tts:speech-only]//text()"> <p:input port="source"> <p:pipe step="isolate-skippable" port="result"/> </p:input> </p:delete> <p:unwrap match="*[@tts:speech-only][not(@id)]"/> <p:documentation>Remove @tts:* attributes and tts namespace nodes</p:documentation> <px:css-speech-clean/> </p:group> <px:html-unwrap-words px:progress="1/10"> <p:documentation> Remove the word tags because it results in invalid EPUB. (The info is used in the synthesize step, but not for synchronization on word level.) </p:documentation> </px:html-unwrap-words> <p:identity name="clean-html"/> <p:sink/> </p:for-each> <px:ssml-to-audio name="to-audio" px:progress="7/9" px:message="Processing SSML"> <p:input port="config"> <p:pipe port="config" step="main"/> </p:input> <p:with-option name="audio-file-type" select="$audio-file-type"> <p:empty/> </p:with-option> <p:with-option name="include-log" select="$include-log"> <p:empty/> </p:with-option> <p:with-option name="temp-dir" select="if ($temp-dir!='') then concat($temp-dir,'audio/') else ''"> <p:empty/> </p:with-option> </px:ssml-to-audio> </p:otherwise> </p:choose> <p:sink/> <px:fileset-update name="update-fileset"> <p:input port="source.fileset"> <p:pipe step="process-css" port="fileset"/> </p:input> <p:input port="source.in-memory"> <p:pipe step="process-css" port="in-memory"/> </p:input> <p:input port="update.fileset"> <p:pipe step="html" port="result.fileset"/> </p:input> <p:input port="update.in-memory"> <p:pipe step="synthesize" port="html"/> </p:input> </px:fileset-update> </p:declare-step>