<p:declare-step xmlns:p="http://www.w3.org/ns/xproc" xmlns:px="http://www.daisy.org/ns/pipeline/xproc" xmlns:pxi="http://www.daisy.org/ns/pipeline/xproc/internal" xmlns:d="http://www.daisy.org/ns/pipeline/data" version="1.0" type="px:epub3-ensure-core-media" name="main"> <p:documentation xmlns="http://www.w3.org/1999/xhtml"> <p>Ensure that a EPUB publication contains only resources that are <a href="https://www.w3.org/publishing/epub3/epub-spec.html#sec-core-media-types">EPUB 3 core media types</a>.</p> </p:documentation> <p:input port="source.fileset" primary="true"/> <p:input port="source.in-memory" sequence="true"> <p:documentation xmlns="http://www.w3.org/1999/xhtml"> <p>The input EPUB publication</p> </p:documentation> </p:input> <p:output port="result.fileset" primary="true"> <p:documentation xmlns="http://www.w3.org/1999/xhtml"> <p>The resulting EPUB publication</p> <p>Resources that are not core media types are filtered out. The package document is updated accordingly. References from HTML documents to the unsupported resources are fixed.</p> <p>Also fixes dead links in HTML documents.</p> </p:documentation> </p:output> <p:output port="result.in-memory" sequence="true"> <p:pipe step="clean-html" port="in-memory"/> </p:output> <p:import href="http://www.daisy.org/pipeline/modules/fileset-utils/library.xpl"> <p:documentation> px:fileset-load px:fileset-join px:fileset-diff px:fileset-intersect px:fileset-update </p:documentation> </p:import> <p:import href="../ocf/opf-manifest-to-fileset.xpl"> <p:documentation> pxi:opf-manifest-to-fileset </p:documentation> </p:import> <p:variable name="core-media-types" select="'application/font-sfnt application/font-woff application/javascript application/pls+xml application/smil+xml application/vnd.ms-opentype application/x-dtbncx+xml application/xhtml+xml audio/mp4 audio/mpeg font/otf font/ttf font/woff font/woff2 image/gif image/jpeg image/png image/svg+xml text/css text/javascript'"/> <p:documentation> Filter the fileset </p:documentation> <px:fileset-load media-types="application/oebps-package+xml" name="opf"> <p:input port="in-memory"> <p:pipe step="main" port="source.in-memory"/> </p:input> </px:fileset-load> <p:choose name="filter-resources"> <p:xpath-context> <p:pipe step="opf" port="result.fileset"/> </p:xpath-context> <p:when test="//d:file"> <p:output port="fileset" primary="true"/> <p:output port="in-memory" sequence="true"> <p:pipe step="main" port="source.in-memory"/> </p:output> <p:for-each> <pxi:opf-manifest-to-fileset/> </p:for-each> <px:fileset-join name="in-manifest"/> <p:sink/> <px:fileset-intersect> <p:input port="source"> <p:pipe step="main" port="source.fileset"/> <p:pipe step="in-manifest" port="result"/> </p:input> </px:fileset-intersect> <px:fileset-filter name="in-manifest-filtered"> <p:with-option name="media-types" select="string-join(($core-media-types,'application/oebps-package+xml'),' ')"/> <p:input port="source.in-memory"> <p:pipe step="main" port="source.in-memory"/> </p:input> </px:fileset-filter> <p:sink/> <px:fileset-diff name="not-in-manifest"> <p:input port="source"> <p:pipe step="main" port="source.fileset"/> </p:input> <p:input port="secondary"> <p:pipe step="in-manifest" port="result"/> </p:input> </px:fileset-diff> <p:sink/> <px:fileset-join name="filtered-unsorted"> <p:input port="source"> <p:pipe step="in-manifest-filtered" port="result"/> <p:pipe step="not-in-manifest" port="result"/> </p:input> </px:fileset-join> <p:sink/> <px:fileset-intersect> <p:input port="source"> <p:pipe step="main" port="source.fileset"/> <p:pipe step="filtered-unsorted" port="result"/> </p:input> </px:fileset-intersect> </p:when> <p:otherwise> <p:output port="fileset" primary="true"/> <p:output port="in-memory" sequence="true"> <p:pipe step="filter" port="result.in-memory"/> </p:output> <p:sink/> <px:fileset-filter name="filter"> <p:with-option name="media-types" select="$core-media-types"/> <p:input port="source"> <p:pipe step="main" port="source.fileset"/> </p:input> <p:input port="source.in-memory"> <p:pipe step="main" port="source.in-memory"/> </p:input> </px:fileset-filter> </p:otherwise> </p:choose> <p:documentation> Remove OPF items that are not in the fileset </p:documentation> <p:group name="clean-package-doc"> <p:output port="fileset" primary="true"/> <p:output port="in-memory" sequence="true"> <p:pipe step="update" port="result.in-memory"/> </p:output> <p:sink/> <p:for-each name="docs"> <p:iteration-source> <p:pipe step="opf" port="result"/> </p:iteration-source> <p:output port="result"/> <p:xslt> <p:input port="source"> <p:pipe step="docs" port="current"/> <p:pipe step="filter-resources" port="fileset"/> </p:input> <p:input port="stylesheet"> <p:document href="clean-package-doc.xsl"/> </p:input> <p:input port="parameters"> <p:empty/> </p:input> </p:xslt> </p:for-each> <p:sink/> <px:fileset-update name="update"> <p:input port="source.fileset"> <p:pipe step="filter-resources" port="fileset"/> </p:input> <p:input port="source.in-memory"> <p:pipe step="filter-resources" port="in-memory"/> </p:input> <p:input port="update.fileset"> <p:pipe step="opf" port="result.fileset"/> </p:input> <p:input port="update.in-memory"> <p:pipe step="docs" port="result"/> </p:input> </px:fileset-update> </p:group> <p:documentation> Clean references in content documents </p:documentation> <p:group name="clean-html"> <p:output port="fileset" primary="true"/> <p:output port="in-memory" sequence="true"> <p:pipe step="update" port="result.in-memory"/> </p:output> <px:fileset-load media-types="application/xhtml+xml" name="load"> <p:input port="in-memory"> <p:pipe step="clean-package-doc" port="in-memory"/> </p:input> </px:fileset-load> <p:for-each name="docs"> <p:output port="result"/> <p:sink/> <p:xslt> <p:input port="source"> <p:pipe step="docs" port="current"/> <p:pipe step="clean-package-doc" port="fileset"/> </p:input> <p:input port="stylesheet"> <p:document href="html-clean-resources.xsl"/> </p:input> <p:input port="parameters"> <p:empty/> </p:input> </p:xslt> </p:for-each> <p:sink/> <px:fileset-update name="update"> <p:input port="source.fileset"> <p:pipe step="clean-package-doc" port="fileset"/> </p:input> <p:input port="source.in-memory"> <p:pipe step="clean-package-doc" port="in-memory"/> </p:input> <p:input port="update.fileset"> <p:pipe step="load" port="result.fileset"/> </p:input> <p:input port="update.in-memory"> <p:pipe step="docs" port="result"/> </p:input> </px:fileset-update> </p:group> </p:declare-step>