1 package org.daisy.pipeline.braille.pef.calabash.impl;
2
3 import java.io.ByteArrayInputStream;
4 import java.io.ByteArrayOutputStream;
5 import java.io.File;
6 import java.io.FileInputStream;
7 import java.io.FileOutputStream;
8 import java.io.InputStream;
9 import java.io.IOException;
10 import java.io.OutputStream;
11 import java.net.URI;
12 import java.text.DecimalFormat;
13 import java.text.NumberFormat;
14 import java.util.Map;
15 import java.util.NoSuchElementException;
16
17 import javax.xml.parsers.ParserConfigurationException;
18 import javax.xml.parsers.SAXParser;
19 import javax.xml.parsers.SAXParserFactory;
20
21 import com.xmlcalabash.core.XProcException;
22 import com.xmlcalabash.core.XProcRuntime;
23 import com.xmlcalabash.io.ReadablePipe;
24 import com.xmlcalabash.library.DefaultStep;
25 import com.xmlcalabash.model.RuntimeValue;
26 import com.xmlcalabash.runtime.XAtomicStep;
27
28 import net.sf.saxon.s9api.QName;
29 import net.sf.saxon.s9api.SaxonApiException;
30 import net.sf.saxon.s9api.Serializer;
31
32 import org.daisy.braille.utils.pef.PEFFileSplitter;
33 import org.daisy.braille.utils.pef.PEFHandler;
34 import org.daisy.braille.utils.pef.PEFHandler.Alignment;
35 import org.daisy.braille.utils.pef.UnsupportedWidthException;
36 import org.daisy.common.xproc.calabash.XProcStep;
37 import org.daisy.common.xproc.calabash.XProcStepProvider;
38 import org.daisy.common.xproc.XProcMonitor;
39 import org.daisy.dotify.api.embosser.EmbosserWriter;
40 import org.daisy.dotify.api.embosser.FileFormat;
41 import org.daisy.pipeline.braille.common.Query;
42 import org.daisy.pipeline.braille.common.Query.MutableQuery;
43 import static org.daisy.pipeline.braille.common.Query.util.mutableQuery;
44 import static org.daisy.pipeline.braille.common.Query.util.query;
45 import org.daisy.pipeline.braille.pef.FileFormatRegistry;
46
47 import org.xml.sax.SAXException;
48
49 import org.osgi.service.component.annotations.Component;
50 import org.osgi.service.component.annotations.Reference;
51 import org.osgi.service.component.annotations.ReferenceCardinality;
52 import org.osgi.service.component.annotations.ReferencePolicy;
53
54 import org.slf4j.Logger;
55 import org.slf4j.LoggerFactory;
56
57 public class PEF2TextStep extends DefaultStep implements XProcStep {
58
59 private static final QName _output_dir = new QName("output-dir");
60 private static final QName _file_format = new QName("file-format");
61 private static final QName _line_breaks = new QName("line-breaks");
62 private static final QName _page_breaks = new QName("page-breaks");
63 private static final QName _pad = new QName("pad");
64 private static final QName _charset = new QName("charset");
65 private static final QName _name_pattern = new QName("name-pattern");
66 private static final QName _number_width = new QName("number-width");
67 private static final QName _single_volume_name = new QName("single-volume-name");
68
69 private final FileFormatRegistry fileFormatRegistry;
70
71 private ReadablePipe source = null;
72
73 private PEF2TextStep(XProcRuntime runtime,
74 XAtomicStep step,
75 FileFormatRegistry fileFormatRegistry) {
76 super(runtime, step);
77 this.fileFormatRegistry = fileFormatRegistry;
78 }
79
80 @Override
81 public void setInput(String port, ReadablePipe pipe) {
82 source = pipe;
83 }
84
85 @Override
86 public void reset() {
87 source.resetReader();
88 }
89
90 @Override
91 public void run() throws SaxonApiException {
92 super.run();
93 MutableQuery q = mutableQuery(query(getOption(_file_format, "")));
94 q.removeAll("blank-last-page");
95 q.removeAll("sheets-multiple-of-two");
96 addOption(_line_breaks, q);
97 addOption(_page_breaks, q);
98 addOption(_pad, q);
99 addOption(_charset, q);
100 logger.debug("Finding file format for query: " + q);
101 Iterable<FileFormat> fileFormats = fileFormatRegistry.get(q);
102 if (!fileFormats.iterator().hasNext()) {
103 throw new XProcException(step, "No file format found for query: " + q); }
104 for (FileFormat fileFormat : fileFormats) {
105 try {
106 logger.debug("Storing PEF to file format: " + fileFormat);
107
108
109 File textDir = new File(new URI(getOption(_output_dir).getString()));
110 textDir.mkdirs();
111
112
113 ByteArrayOutputStream s = new ByteArrayOutputStream();
114 Serializer serializer = runtime.getProcessor().newSerializer();
115 serializer.setOutputStream(s);
116 serializer.setCloseOnCompletion(true);
117 serializer.setOutputProperty(Serializer.Property.INDENT, "yes");
118 serializer.serializeNode(source.read());
119 serializer.close();
120 InputStream pefStream = new ByteArrayInputStream(s.toByteArray());
121 s.close();
122
123
124 String singleVolumeName = getOption(_single_volume_name, "");
125 String pattern = getOption(_name_pattern, "");
126 if (pattern.isEmpty())
127 pattern = "volume-{}";
128 int match = pattern.indexOf("{}");
129 if (match < 0 || match != pattern.lastIndexOf("{}")) {
130 logger.error("name-pattern is invalid: '" + pattern + "'");
131 if (singleVolumeName.isEmpty())
132 throw new RuntimeException("name-pattern and single-volume-name may not both be empty");
133 }
134 if ((fileFormat.supportsVolumes() && !singleVolumeName.isEmpty())
135 || match < 0 || match != pattern.lastIndexOf("{}")) {
136
137 convertPEF2Text(pefStream,
138 new File(textDir, singleVolumeName + fileFormat.getFileExtension()), fileFormat);
139 } else {
140
141 pattern = pattern.replaceAll("'", "''")
142 .replaceAll("([0#\\.,;%\u2030\u00A4-]+)", "'$1'");
143
144 match = pattern.indexOf("{}");
145 File splitDir = new File(textDir, "split");
146 splitDir.mkdir();
147
148
149 PEFFileSplitter splitter = new PEFFileSplitter(x -> true);
150 String prefix = PEFFileSplitter.PREFIX;
151 String postfix = PEFFileSplitter.POSTFIX;
152 splitter.split(pefStream, splitDir, prefix, postfix);
153 File[] pefFiles = splitDir.listFiles();
154 String formatPattern = pattern.substring(0, match);
155 int nWidth; {
156 try {
157 nWidth = Integer.parseInt(getOption(_number_width, "")); }
158 catch (NumberFormatException e) {
159 nWidth = 0; }}
160 if (nWidth == 0)
161 formatPattern += "###";
162 else
163 while (nWidth > 0) { formatPattern += "0"; nWidth--; }
164 formatPattern += pattern.substring(match + 2);
165 NumberFormat format = new DecimalFormat(formatPattern);
166 for (File pefFile : pefFiles) {
167 InputStream is = new FileInputStream(pefFile);
168 if (pefFiles.length == 1 && !singleVolumeName.isEmpty()) {
169
170 convertPEF2Text(is, new File(textDir, singleVolumeName + fileFormat.getFileExtension()), fileFormat);
171 } else {
172 String pefName = pefFile.getName();
173 if (pefName.length() <= prefix.length() + postfix.length()
174 || !pefName.substring(0, prefix.length()).equals(prefix)
175 || !pefName.substring(pefName.length() - postfix.length()).equals(postfix)) {
176 is.close();
177 throw new RuntimeException("Coding error");
178 }
179 String textName = format.format(
180 Integer.parseInt(pefName.substring(prefix.length(), pefName.length() - postfix.length())));
181 convertPEF2Text(is,
182 new File(textDir, textName + fileFormat.getFileExtension()),
183 fileFormat);
184 }
185 is.close();
186 if (!pefFile.delete()) pefFile.deleteOnExit();
187 }
188 pefStream.close();
189 if (!splitDir.delete()) splitDir.deleteOnExit();
190 }
191 return; }
192 catch (Exception e) {
193 logger.error("Storing PEF to file format '" + fileFormat + "' failed", e); }}
194 throw new XProcException(step, "pef:pef2text failed");
195 }
196
197 private void convertPEF2Text(InputStream pefStream, File textFile, FileFormat fileFormat)
198 throws ParserConfigurationException, SAXException, IOException, UnsupportedWidthException {
199 OutputStream textStream = new FileOutputStream(textFile);
200 if ("pef".equals(fileFormat.getIdentifier())) {
201
202
203 byte[] buf = new byte[153600];
204 int length;
205 while ((length = pefStream.read(buf)) > 0)
206 textStream.write(buf, 0, length);
207 } else {
208 EmbosserWriter writer = fileFormat.newEmbosserWriter(textStream);
209 PEFHandler.Builder builder = new PEFHandler.Builder(writer);
210 builder.range(null).align(Alignment.LEFT).offset(0);
211 parsePefFile(pefStream, builder.build());
212 }
213 textStream.close();
214 }
215
216 private void addOption(QName option, MutableQuery query) {
217 RuntimeValue v = getOption(option);
218 if (v != null && !"".equals(v.getString()))
219 query.add(option.getLocalName(), v.getString());
220 }
221
222 @Component(
223 name = "pxi:pef2text",
224 service = { XProcStepProvider.class },
225 property = { "type:String={http://www.daisy.org/ns/pipeline/xproc/internal}pef2text" }
226 )
227 public static class Provider implements XProcStepProvider {
228
229 @Override
230 public XProcStep newStep(XProcRuntime runtime, XAtomicStep step, XProcMonitor monitor, Map<String,String> properties) {
231 return new PEF2TextStep(runtime, step, fileFormatRegistry);
232 }
233
234 @Reference(
235 name = "FileFormatRegistry",
236 unbind = "-",
237 service = FileFormatRegistry.class,
238 cardinality = ReferenceCardinality.MANDATORY,
239 policy = ReferencePolicy.STATIC
240 )
241 protected void bindFileFormatRegistry(FileFormatRegistry registry) {
242 fileFormatRegistry = registry;
243 }
244
245 private FileFormatRegistry fileFormatRegistry;
246
247 }
248
249
250
251
252
253
254
255
256
257
258
259 private static void parsePefFile(InputStream is, PEFHandler ph)
260 throws ParserConfigurationException, SAXException, IOException, UnsupportedWidthException {
261
262 SAXParserFactory spf = SAXParserFactory.newInstance();
263 spf.setNamespaceAware(true);
264 SAXParser sp = spf.newSAXParser();
265 try {
266 sp.parse(is, ph); }
267 catch (SAXException e) {
268 if (ph.hasWidthError())
269 throw new UnsupportedWidthException(e);
270 else
271 throw e; }
272 }
273
274 private static final Logger logger = LoggerFactory.getLogger(PEF2TextStep.class);
275
276 }