<p:declare-stepxmlns:p="http://www.w3.org/ns/xproc"xmlns:px="http://www.daisy.org/ns/pipeline/xproc"xmlns:pxi="http://www.daisy.org/ns/pipeline/xproc/internal"xmlns:c="http://www.w3.org/ns/xproc-step"xmlns:cx="http://xmlcalabash.com/ns/extensions"xmlns:xs="http://www.w3.org/2001/XMLSchema"version="1.0"type="px:word-to-dtbook.script"name="main"px:input-filesets="docx"px:output-filesets="dtbook">
<p:documentationxmlns="http://www.w3.org/1999/xhtml">
<h1px:role="name">Word to DTBook</h1><ppx:role="desc"xml:space="preserve">Transforms a Microsoft Office Word (.docx) document into a DTBook XML file.</p>
<apx:role="homepage"href="http://daisy.github.io/pipeline/Get-Help/User-Guide/Scripts/word-to-dtbook/">
Online documentation
</a>
<dlpx:role="author">
<dt>Name:</dt>
<ddpx:role="name">Nicolas Pavie</dd>
<dt>E-mail:</dt>
<dd><apx:role="contact"href="mailto:pavie.nicolas@gmail.com">pavie.nicolas@gmail.com</a></dd>
<dt>Organisation:</dt>
<ddpx:role="organization">DAISY Consortium</dd>
</dl>
</p:documentation>
<p:optionname="source"required="true"px:type="anyFileURI"px:media-type="application/vnd.openxmlformats-officedocument.wordprocessingml.document">
<p:documentation>
<h2px:role="name">Input Docx file</h2><ppx:role="desc"xml:space="preserve">The document you want to convert.</p>
</p:documentation>
</p:option><p:optionname="result"required="true"px:output="result"px:type="anyDirURI">
<p:documentation>
<h2px:role="name">DTBook output</h2><ppx:role="desc"xml:space="preserve">Output folder of the conversion to DTBook XML</p>
</p:documentation>
</p:option><p:optionname="title"select="''"required="false">
<p:documentationxmlns="http://www.w3.org/1999/xhtml">
<h2px:role="name">Document title</h2>
</p:documentation>
</p:option><p:optionname="creator"select="''"required="false">
<p:documentationxmlns="http://www.w3.org/1999/xhtml">
<h2px:role="name">Document author</h2>
</p:documentation>
</p:option><p:optionname="publisher"select="''"required="false">
<p:documentationxmlns="http://www.w3.org/1999/xhtml">
<h2px:role="name">Document publisher</h2><ppx:role="desc">Publisher metadata (dc:Publisher) to be added</p>
</p:documentation>
</p:option><p:optionname="uid"select="''">
<p:documentationxmlns="http://www.w3.org/1999/xhtml">
<h2px:role="name">Document identifier</h2><ppx:role="desc">Identifier to be added as dtb:uid metadata</p>
</p:documentation>
</p:option><p:optionname="subject"select="''">
<p:documentationxmlns="http://www.w3.org/1999/xhtml">
<h2px:role="name">Subject(s)</h2><ppx:role="desc">Subject(s) to be added as dc:Subject metadata</p>
</p:documentation>
</p:option><p:optionname="accept-revisions"select="false()"cx:as="xs:boolean">
<p:documentationxmlns="http://www.w3.org/1999/xhtml">
<h2px:role="name">Accept revisions</h2><ppx:role="desc">If the document has revisions that are not accepted, consider them as accepted for the conversion.</p>
</p:documentation>
</p:option><p:optionname="pagination"select="'custom'">
<p:documentationxmlns="http://www.w3.org/1999/xhtml">
<h2px:role="name">Pagination mode</h2><ppx:role="desc">Define how page numbers are computed and inserted in the result</p>
</p:documentation>
<p:pipeinfo>
<px:type>
<choicexmlns:a="http://relaxng.org/ns/compatibility/annotations/1.0">
<value>custom</value>
<a:documentationxml:lang="en">Use numbers tagged with the style 'PageNumberDAISY' in the document</a:documentation>
<value>automatic</value>
<a:documentationxml:lang="en">Use Word page breaks to compute and insert page numbers in content</a:documentation>
</choice>
</px:type>
</p:pipeinfo>
</p:option><p:optionname="image-size"select="'original'">
<p:documentationxmlns="http://www.w3.org/1999/xhtml">
<h2px:role="name">Image resizing</h2>
</p:documentation>
<p:pipeinfo>
<px:type>
<choicexmlns:a="http://relaxng.org/ns/compatibility/annotations/1.0">
<value>original</value>
<a:documentationxml:lang="en">Keep image size</a:documentation>
<value>resize</value>
<a:documentationxml:lang="en">Resize images</a:documentation>
<value>resample</value>
<a:documentationxml:lang="en">Resample images</a:documentation>
</choice>
</px:type>
</p:pipeinfo>
</p:option><p:optionname="dpi"select="96"cx:as="xs:integer">
<p:documentationxmlns="http://www.w3.org/1999/xhtml">
<h2px:role="name">Image resampling value</h2><ppx:role="desc">Image resampling targeted resolution in dpi (dot-per-inch)</p>
</p:documentation>
</p:option><p:optionname="character-styles"select="false()"cx:as="xs:boolean">
<p:documentationxmlns="http://www.w3.org/1999/xhtml">
<h2px:role="name">Translate character styles</h2>
</p:documentation>
</p:option><p:optionname="footnotes-position"select="'end'">
<p:documentationxmlns="http://www.w3.org/1999/xhtml">
<h2px:role="name">Footnotes position</h2><ppx:role="desc">Footnotes position in content</p>
</p:documentation>
<p:pipeinfo>
<px:type>
<choicexmlns:a="http://relaxng.org/ns/compatibility/annotations/1.0">
<value>inline</value>
<a:documentationxml:lang="en">Inline note in content (after the paragraph containing its first reference)</a:documentation>
<value>end</value>
<a:documentationxml:lang="en">Put notes at the end of a level defined in footnotes insertion level</a:documentation>
<value>page</value>
<a:documentationxml:lang="en">Put the notes near the page break</a:documentation>
</choice>
</px:type>
</p:pipeinfo>
</p:option><p:optionname="footnotes-level"select="0"cx:as="xs:integer">
<p:documentationxmlns="http://www.w3.org/1999/xhtml">
<h2px:role="name">Footnotes insertion level</h2><ppx:role="desc">Lowest level into which notes are inserted in content.
0 means the footnotes will be inserted as close as possible of its first call.</p>
</p:documentation>
</p:option><p:optionname="footnotes-numbering"cx:as="xs:string"select="'none'">
<p:documentationxmlns="http://www.w3.org/1999/xhtml">
<h2px:role="name">Footnotes numbering</h2><ppx:role="desc">Customize footnotes numbering</p>
</p:documentation>
<p:pipeinfo>
<px:type>
<choicexmlns:a="http://relaxng.org/ns/compatibility/annotations/1.0">
<value>none</value>
<a:documentationxml:lang="en">Disable note numbering</a:documentation>
<value>word</value>
<a:documentationxml:lang="en">Use original word numbering</a:documentation>
<value>number</value>
<a:documentationxml:lang="en">Use custom numbering, starting from the footnotes start value</a:documentation>
</choice>
</px:type>
</p:pipeinfo>
</p:option><p:optionname="footnotes-start-value"cx:as="xs:integer"select="1">
<p:documentationxmlns="http://www.w3.org/1999/xhtml">
<h2px:role="name">Footnotes starting value</h2><ppx:role="desc">If footnotes numbering is required, start the notes numbering process from this value</p>
</p:documentation>
</p:option><p:optionname="footnotes-numbering-prefix"select="''">
<p:documentationxmlns="http://www.w3.org/1999/xhtml">
<h2px:role="name">Footnotes number prefix</h2><ppx:role="desc">Add a prefix before the note's number if numbering is requested.</p>
</p:documentation>
</p:option><p:optionname="footnotes-numbering-suffix"select="''">
<p:documentationxmlns="http://www.w3.org/1999/xhtml">
<h2px:role="name">Footnotes number suffix</h2><ppx:role="desc">Add a text between the note's number and the note's content.</p>
</p:documentation>
</p:option><p:optionname="extract-shapes"cx:as="xs:boolean"select="false()">
<p:documentationxmlns="http://www.w3.org/1999/xhtml">
<h2px:role="name">Extract vector shapes (Experimental)</h2><ppx:role="desc">Try to export inline shapes like diagrams or charts during conversion using Microsoft Word.
If deactivated, those shapes will be replaced by their name and description in the result.
Proceed with caution : Word must not be blocked by any dialog bound to it, or the process might crash or get stuck indefinitely</p>
</p:documentation>
</p:option><p:optionname="repair"select="false()"cx:as="xs:boolean">
<p:documentationxmlns="http://www.w3.org/1999/xhtml">
<h2px:role="name">Repair the DTBook</h2><ppx:role="desc"xml:space="preserve">Apply repair routines on the DTBook.</p>
</p:documentation>
</p:option><p:optionname="tidy"select="false()"cx:as="xs:boolean">
<p:documentationxmlns="http://www.w3.org/1999/xhtml">
<h2px:role="name">Tidy up the DTBook</h2><ppx:role="desc"xml:space="preserve">Apply tidying routines on the DTBook.</p>
</p:documentation>
</p:option><p:optionname="narrator"select="false()"cx:as="xs:boolean">
<p:documentationxmlns="http://www.w3.org/1999/xhtml">
<h2px:role="name">Cleanup the document for audio synthesis</h2><ppx:role="desc"xml:space="preserve">Apply cleaning routines on the document to prepare it for text-to-speech processes.</p>
</p:documentation>
</p:option><p:optionname="ApplySentenceDetection"select="false()"cx:as="xs:boolean">
<p:documentationxmlns="http://www.w3.org/1999/xhtml">
<h2px:role="name">Apply sentences detection</h2><ppx:role="desc">Encapsulate sentences within the document</p>
</p:documentation>
</p:option>
<p:importhref="http://www.daisy.org/pipeline/modules/common-utils/library.xpl">
<p:documentation>
px:error
px:log-error
</p:documentation>
</p:import>
<p:importhref="http://www.daisy.org/pipeline/modules/fileset-utils/library.xpl">
<p:documentation>
px:fileset-add-entry
px:fileset-copy
px:fileset-filter
px:fileset-store
</p:documentation>
</p:import>
<p:importhref="http://www.daisy.org/pipeline/modules/dtbook-utils/library.xpl">
<p:documentation>
px:dtbook-break-detect
px:dtbook-unwrap-words
px:dtbook-upgrade
px:dtbook-load
</p:documentation>
</p:import>
<p:importhref="fix-dtbook/fix-dtbook.xpl">
<p:documentation>
pxi:dtbook-fix
</p:documentation>
</p:import>
<p:importhref="fix-dtbook/doctyping.xpl">
<p:documentation>
pxi:dtbook-doctyping
</p:documentation>
</p:import>
<p:xslttemplate-name="main"cx:serialize="true"px:message="Converting DOCX to DTBook"px:progess="1/2">
<p:inputport="source">
<p:empty/>
</p:input>
<p:inputport="stylesheet">
<p:documenthref="oox2Daisy.xsl"/>
</p:input>
<p:with-paramname="InputFile"select="$source"/>
<p:with-paramname="OutputDir"select="$temp-dir"/>
<p:with-paramname="title"select="$title"/>
<p:with-paramname="creator"select="$creator"/>
<p:with-paramname="publisher"select="$publisher"/>
<p:with-paramname="uid"select="$uid"/>
<p:with-paramname="subject"select="$subject"/>
<p:with-paramname="acceptRevisions"select="$accept-revisions"/>
<p:with-paramname="version"select="$version"/>
<p:with-paramname="pagination"select="$pagination"/>
<p:with-paramname="MasterSub"select="$MasterSub"/>
<p:with-paramname="ImageSizeOption"select="$image-size"/>
<p:with-paramname="DPI"select="$dpi"/>
<p:with-paramname="CharacterStyles"select="$character-styles"/>
<p:with-paramname="FootnotesPosition"select="$footnotes-position"/>
<p:with-paramname="FootnotesLevel"select="$footnotes-level"/>
<p:with-paramname="FootnotesNumbering"select="$footnotes-numbering"/>
<p:with-paramname="FootnotesStartValue"select="$footnotes-start-value"/>
<p:with-paramname="FootnotesNumberingPrefix"select="$footnotes-numbering-prefix"/>
<p:with-paramname="FootnotesNumberingSuffix"select="$footnotes-numbering-suffix"/>
<p:with-paramname="disableDateGeneration"select="$disableDateGeneration"/>
<p:with-paramname="extractShapes"select="$extract-shapes"/>
</p:xslt>
<p:group>
<p:documentation>Store plain text file and load as XML</p:documentation>
<p:variablename="path"select="concat( $temp-dir, replace(replace($source,'^.*/([^/]*?)(\.[^/\.]*)?$','$1.xml'),',','_'))"/>
<p:storename="store">
<p:with-optionname="href"select="$path"/>
</p:store>
<p:try>
<p:group>
<p:loadcx:depends-on="store">
<p:with-optionname="href"select="$path"/>
</p:load>
</p:group>
<p:catchname="catch">
<p:choose>
<p:xpath-context>
<p:pipestep="catch"port="error"/>
</p:xpath-context>
<p:whentest="/c:errors/c:error/@code='err:XD0011'">
<px:log-errorseverity="DEBUG">
<p:inputport="source">
<p:empty/>
</p:input>
<p:inputport="error">
<p:pipestep="catch"port="error"/>
</p:input>
</px:log-error>
<px:errorcode="BUG"message="An unexpected error happened. Please contact maintainer."/>
</p:when>
<p:otherwise>
<px:error>
<p:inputport="error">
<p:pipestep="catch"port="error"/>
</p:input>
</px:error>
</p:otherwise>
</p:choose>
</p:catch>
</p:try>
</p:group>
<p:for-eachpx:message="Cleaning DTBook(s)"px:progess="1/2">
<p:variablename="output-name"select="concat(replace(replace(base-uri(.),'^.*/([^/]+)$','$1'),'\.[^\.]*$',''),'.xml')"/>
<p:groupname="cleaned"px:message="Cleaning '{$output-name}' ...">
<p:outputport="result"/>
<px:dtbook-upgrade/>
<pxi:dtbook-fix>
<p:with-optionname="repair"select="$repair"/>
<p:with-optionname="tidy"select="$tidy"/>
<p:with-optionname="simplifyHeadingLayout"select="$simplifyHeadingLayout"/>
<p:with-optionname="externalizeWhitespace"select="$externalizeWhitespace"/>
<p:with-optionname="documentLanguage"select="$documentLanguage"/>
<p:with-optionname="narrator"select="$narrator"/>
<p:with-optionname="publisher"select="$publisher"/>
</pxi:dtbook-fix>
<p:choose>
<p:whentest="$ApplySentenceDetection">
<px:dtbook-break-detect/>
<px:dtbook-unwrap-words/>
</p:when>
<p:otherwise>
<p:identity/>
</p:otherwise>
</p:choose>
<p:choose>
<p:whentest="$WithDoctype">
<pxi:dtbook-doctyping/>
</p:when>
<p:otherwise>
<p:identity/>
</p:otherwise>
</p:choose>
</p:group>
<p:storepx:message="Storing the cleaned DTBook and its resources ...">
<p:with-optionname="href"select="concat(resolve-uri($result),$output-name)"/>
</p:store>
<px:fileset-add-entrymedia-type="application/x-dtbook+xml"name="dtbook">
<p:inputport="entry">
<p:pipestep="cleaned"port="result"/>
</p:input>
</px:fileset-add-entry>
<px:dtbook-loadname="load"/>
<px:fileset-filternot-media-types="application/x-dtbook+xml"/>
<px:fileset-copyname="copy">
<p:with-optionname="target"select="resolve-uri($result)"/>
</px:fileset-copy>
<px:fileset-store>
<p:inputport="in-memory.in">
<p:pipestep="copy"port="result.in-memory"/>
</p:input>
</px:fileset-store>
</p:for-each>
</p:declare-step>