1   package org.daisy.pipeline.braille.pef.calabash.impl;
2   
3   import java.io.ByteArrayInputStream;
4   import java.io.ByteArrayOutputStream;
5   import java.io.File;
6   import java.io.FileInputStream;
7   import java.io.FileOutputStream;
8   import java.io.InputStream;
9   import java.io.IOException;
10  import java.io.OutputStream;
11  import java.net.URI;
12  import java.text.DecimalFormat;
13  import java.text.NumberFormat;
14  import java.util.Map;
15  import java.util.NoSuchElementException;
16  
17  import javax.xml.parsers.ParserConfigurationException;
18  import javax.xml.parsers.SAXParser;
19  import javax.xml.parsers.SAXParserFactory;
20  
21  import com.xmlcalabash.core.XProcException;
22  import com.xmlcalabash.core.XProcRuntime;
23  import com.xmlcalabash.io.ReadablePipe;
24  import com.xmlcalabash.library.DefaultStep;
25  import com.xmlcalabash.model.RuntimeValue;
26  import com.xmlcalabash.runtime.XAtomicStep;
27  
28  import net.sf.saxon.s9api.QName;
29  import net.sf.saxon.s9api.SaxonApiException;
30  import net.sf.saxon.s9api.Serializer;
31  
32  import org.daisy.braille.utils.pef.PEFFileSplitter;
33  import org.daisy.braille.utils.pef.PEFHandler;
34  import org.daisy.braille.utils.pef.PEFHandler.Alignment;
35  import org.daisy.braille.utils.pef.UnsupportedWidthException;
36  import org.daisy.common.xproc.calabash.XProcStep;
37  import org.daisy.common.xproc.calabash.XProcStepProvider;
38  import org.daisy.common.xproc.XProcMonitor;
39  import org.daisy.dotify.api.embosser.EmbosserWriter;
40  import org.daisy.dotify.api.embosser.FileFormat;
41  import org.daisy.pipeline.braille.common.Query;
42  import org.daisy.pipeline.braille.common.Query.MutableQuery;
43  import static org.daisy.pipeline.braille.common.Query.util.mutableQuery;
44  import static org.daisy.pipeline.braille.common.Query.util.query;
45  import org.daisy.pipeline.braille.pef.FileFormatRegistry;
46  
47  import org.xml.sax.SAXException;
48  
49  import org.osgi.service.component.annotations.Component;
50  import org.osgi.service.component.annotations.Reference;
51  import org.osgi.service.component.annotations.ReferenceCardinality;
52  import org.osgi.service.component.annotations.ReferencePolicy;
53  
54  import org.slf4j.Logger;
55  import org.slf4j.LoggerFactory;
56  
57  public class PEF2TextStep extends DefaultStep implements XProcStep {
58  	
59  	private static final QName _output_dir = new QName("output-dir");
60  	private static final QName _file_format = new QName("file-format");
61  	private static final QName _line_breaks = new QName("line-breaks");
62  	private static final QName _page_breaks = new QName("page-breaks");
63  	private static final QName _pad = new QName("pad");
64  	private static final QName _charset = new QName("charset");
65  	private static final QName _name_pattern = new QName("name-pattern");
66  	private static final QName _number_width = new QName("number-width");
67  	private static final QName _single_volume_name = new QName("single-volume-name");
68  	
69  	private final FileFormatRegistry fileFormatRegistry;
70  	
71  	private ReadablePipe source = null;
72  	
73  	private PEF2TextStep(XProcRuntime runtime,
74  	                     XAtomicStep step,
75  	                     FileFormatRegistry fileFormatRegistry) {
76  		super(runtime, step);
77  		this.fileFormatRegistry = fileFormatRegistry;
78  	}
79  	
80  	@Override
81  	public void setInput(String port, ReadablePipe pipe) {
82  		source = pipe;
83  	}
84  	
85  	@Override
86  	public void reset() {
87  		source.resetReader();
88  	}
89  	
90  	@Override
91  	public void run() throws SaxonApiException {
92  		super.run();
93  		MutableQuery q = mutableQuery(query(getOption(_file_format, "")));
94  		q.removeAll("blank-last-page"); // has been handled in pef2text.xpl
95  		q.removeAll("sheets-multiple-of-two"); // has been handled in pef2text.xpl
96  		addOption(_line_breaks, q);
97  		addOption(_page_breaks, q);
98  		addOption(_pad, q);
99  		addOption(_charset, q);
100 		logger.debug("Finding file format for query: " + q);
101 		Iterable<FileFormat> fileFormats = fileFormatRegistry.get(q);
102 		if (!fileFormats.iterator().hasNext()) {
103 			throw new XProcException(step, "No file format found for query: " + q); }
104 		for (FileFormat fileFormat : fileFormats) {
105 			try {
106 				logger.debug("Storing PEF to file format: " + fileFormat);
107 				
108 				// Initialize output directory
109 				File textDir = new File(new URI(getOption(_output_dir).getString()));
110 				textDir.mkdirs();
111 				
112 				// Read source PEF
113 				ByteArrayOutputStream s = new ByteArrayOutputStream();
114 				Serializer serializer = runtime.getProcessor().newSerializer();
115 				serializer.setOutputStream(s);
116 				serializer.setCloseOnCompletion(true);
117 				serializer.setOutputProperty(Serializer.Property.INDENT, "yes");
118 				serializer.serializeNode(source.read());
119 				serializer.close();
120 				InputStream pefStream = new ByteArrayInputStream(s.toByteArray());
121 				s.close();
122 				
123 				// Parse pattern
124 				String singleVolumeName = getOption(_single_volume_name, "");
125 				String pattern = getOption(_name_pattern, "");
126 				if (pattern.isEmpty())
127 					pattern = "volume-{}";
128 				int match = pattern.indexOf("{}");
129 				if (match < 0 || match != pattern.lastIndexOf("{}")) {
130 					logger.error("name-pattern is invalid: '" + pattern + "'");
131 					if (singleVolumeName.isEmpty())
132 						throw new RuntimeException("name-pattern and single-volume-name may not both be empty");
133 				}
134 				if ((fileFormat.supportsVolumes() && !singleVolumeName.isEmpty())
135 				    || match < 0 || match != pattern.lastIndexOf("{}")) {
136 					// Output to single file
137 					convertPEF2Text(pefStream,
138 							new File(textDir, singleVolumeName + fileFormat.getFileExtension()), fileFormat);
139 				} else {
140 					// Split PEF
141 					pattern = pattern.replaceAll("'", "''")
142 							.replaceAll("([0#\\.,;%\u2030\u00A4-]+)", "'$1'");
143 					// Recalculate after replacement
144 					match = pattern.indexOf("{}");
145 					File splitDir = new File(textDir, "split");
146 					splitDir.mkdir();
147 					// FIXME: to validating the result PEFs, get a PEFValidator instance
148 					// (implemented in dotify.task.impl) through the streamline API.
149 					PEFFileSplitter splitter = new PEFFileSplitter(x -> true);
150 					String prefix = PEFFileSplitter.PREFIX;
151 					String postfix = PEFFileSplitter.POSTFIX;
152 					splitter.split(pefStream, splitDir, prefix, postfix);
153 					File[] pefFiles = splitDir.listFiles();
154 					String formatPattern = pattern.substring(0, match);
155 					int nWidth; {
156 						try {
157 							nWidth = Integer.parseInt(getOption(_number_width, "")); }
158 						catch (NumberFormatException e) {
159 							nWidth = 0; }}
160 					if (nWidth == 0)
161 						formatPattern += "###"; // Assume max 999 volumes
162 					else
163 						while (nWidth > 0) { formatPattern += "0"; nWidth--; }
164 					formatPattern += pattern.substring(match + 2);
165 					NumberFormat format = new DecimalFormat(formatPattern);
166 					for (File pefFile : pefFiles) {
167 						InputStream is = new FileInputStream(pefFile);
168 						if (pefFiles.length == 1 && !singleVolumeName.isEmpty()) {
169 							// Output to single file
170 							convertPEF2Text(is, new File(textDir, singleVolumeName + fileFormat.getFileExtension()), fileFormat);
171 						} else {
172 							String pefName = pefFile.getName();
173 							if (pefName.length() <= prefix.length() + postfix.length()
174 							    || !pefName.substring(0, prefix.length()).equals(prefix)
175 							    || !pefName.substring(pefName.length() - postfix.length()).equals(postfix)) {
176 								is.close();
177 								throw new RuntimeException("Coding error");
178 							}
179 							String textName = format.format(
180 									Integer.parseInt(pefName.substring(prefix.length(), pefName.length() - postfix.length())));
181 							convertPEF2Text(is,
182 									new File(textDir, textName + fileFormat.getFileExtension()),
183 									fileFormat);
184 						}
185 						is.close();
186 						if (!pefFile.delete()) pefFile.deleteOnExit();
187 					}
188 					pefStream.close();
189 					if (!splitDir.delete()) splitDir.deleteOnExit();
190 				}
191 				return; }
192 			catch (Exception e) {
193 				logger.error("Storing PEF to file format '" + fileFormat + "' failed", e); }}
194 		throw new XProcException(step, "pef:pef2text failed");
195 	}
196 	
197 	private void convertPEF2Text(InputStream pefStream, File textFile, FileFormat fileFormat)
198 			throws ParserConfigurationException, SAXException, IOException, UnsupportedWidthException {
199 		OutputStream textStream = new FileOutputStream(textFile);
200 		if ("pef".equals(fileFormat.getIdentifier())) {
201 
202 			// just write pefStream to textFile without parsing it
203 			byte[] buf = new byte[153600];
204 			int length;
205 			while ((length = pefStream.read(buf)) > 0)
206 				textStream.write(buf, 0, length);
207 		} else {
208 			EmbosserWriter writer = fileFormat.newEmbosserWriter(textStream);
209 			PEFHandler.Builder builder = new PEFHandler.Builder(writer);
210 			builder.range(null).align(Alignment.LEFT).offset(0);
211 			parsePefFile(pefStream, builder.build());
212 		}
213 		textStream.close();
214 	}
215 	
216 	private void addOption(QName option, MutableQuery query) {
217 		RuntimeValue v = getOption(option);
218 		if (v != null && !"".equals(v.getString()))
219 			query.add(option.getLocalName(), v.getString());
220 	}
221 	
222 	@Component(
223 		name = "pxi:pef2text",
224 		service = { XProcStepProvider.class },
225 		property = { "type:String={http://www.daisy.org/ns/pipeline/xproc/internal}pef2text" }
226 	)
227 	public static class Provider implements XProcStepProvider {
228 		
229 		@Override
230 		public XProcStep newStep(XProcRuntime runtime, XAtomicStep step, XProcMonitor monitor, Map<String,String> properties) {
231 			return new PEF2TextStep(runtime, step, fileFormatRegistry);
232 		}
233 		
234 		@Reference(
235 			name = "FileFormatRegistry",
236 			unbind = "-",
237 			service = FileFormatRegistry.class,
238 			cardinality = ReferenceCardinality.MANDATORY,
239 			policy = ReferencePolicy.STATIC
240 		)
241 		protected void bindFileFormatRegistry(FileFormatRegistry registry) {
242 			fileFormatRegistry = registry;
243 		}
244 		
245 		private FileFormatRegistry fileFormatRegistry;
246 		
247 	}
248 	
249 	// copied from org.daisy.braille.facade.PEFConverterFacade because it is no longer static
250 	/**
251 	 * Parses the given input stream using the supplied PEFHandler.
252 	 * @param is the input stream
253 	 * @param ph the PEFHandler
254 	 * @throws ParserConfigurationException
255 	 * @throws SAXException
256 	 * @throws IOException
257 	 * @throws UnsupportedWidthException
258 	 */
259 	private static void parsePefFile(InputStream is, PEFHandler ph)
260 			throws ParserConfigurationException, SAXException, IOException, UnsupportedWidthException {
261 		
262 		SAXParserFactory spf = SAXParserFactory.newInstance();
263 		spf.setNamespaceAware(true);
264 		SAXParser sp = spf.newSAXParser();
265 		try {
266 			sp.parse(is, ph); }
267 		catch (SAXException e) {
268 			if (ph.hasWidthError())
269 				throw new UnsupportedWidthException(e);
270 			else
271 				throw e; }
272 	}
273 	
274 	private static final Logger logger = LoggerFactory.getLogger(PEF2TextStep.class);
275 	
276 }