1 package org.daisy.pipeline.braille.tex.impl;
2
3 import java.io.InputStream;
4 import java.io.InputStreamReader;
5 import java.io.IOException;
6 import java.net.URI;
7 import java.net.URL;
8 import java.nio.charset.Charset;
9 import java.util.Locale;
10 import java.util.Properties;
11
12 import com.google.common.base.MoreObjects;
13 import com.google.common.base.MoreObjects.ToStringHelper;
14
15 import net.davidashen.text.Utf8TexParser.TexParserException;
16
17 import org.daisy.common.file.URLs;
18 import org.daisy.pipeline.braille.common.AbstractHyphenator;
19 import org.daisy.pipeline.braille.common.AbstractHyphenator.util.DefaultFullHyphenator;
20 import org.daisy.pipeline.braille.common.AbstractTransformProvider;
21 import org.daisy.pipeline.braille.common.AbstractTransformProvider.util.Function;
22 import org.daisy.pipeline.braille.common.AbstractTransformProvider.util.Iterables;
23 import static org.daisy.pipeline.braille.common.AbstractTransformProvider.util.Iterables.fromNullable;
24 import static org.daisy.pipeline.braille.common.AbstractTransformProvider.util.Iterables.transform;
25 import org.daisy.pipeline.braille.common.HyphenatorProvider;
26 import org.daisy.pipeline.braille.common.Query;
27 import org.daisy.pipeline.braille.common.Query.MutableQuery;
28 import static org.daisy.pipeline.braille.common.Query.util.mutableQuery;
29 import static org.daisy.pipeline.braille.common.util.Files.isAbsoluteFile;
30 import static org.daisy.pipeline.braille.common.util.Locales.parseLocale;
31 import static org.daisy.pipeline.braille.common.util.Strings.extractHyphens;
32 import static org.daisy.pipeline.braille.common.util.Tuple2;
33 import org.daisy.pipeline.braille.tex.TexHyphenator;
34
35 import org.osgi.service.component.annotations.Activate;
36 import org.osgi.service.component.annotations.Component;
37 import org.osgi.service.component.annotations.Deactivate;
38 import org.osgi.service.component.annotations.Reference;
39 import org.osgi.service.component.annotations.ReferenceCardinality;
40 import org.osgi.service.component.annotations.ReferencePolicy;
41
42 import org.slf4j.Logger;
43 import org.slf4j.LoggerFactory;
44
45 @Component(
46 name = "org.daisy.pipeline.braille.tex.impl.TexHyphenatorDotifyImpl",
47 service = {
48 TexHyphenator.Provider.class,
49 HyphenatorProvider.class
50 }
51 )
52 public class TexHyphenatorDotifyImpl extends AbstractTransformProvider<TexHyphenator>
53 implements TexHyphenator.Provider {
54
55 private TexHyphenatorTableRegistry tableRegistry;
56
57 @Activate
58 protected void activate() {
59 logger.debug("Loading TeX hyphenation service (Dotify impl)");
60 }
61
62 @Deactivate
63 protected void deactivate() {
64 logger.debug("Unloading TeX hyphenation service (Dotify impl)");
65 }
66
67 @Reference(
68 name = "TexHyphenatorTableRegistry",
69 unbind = "-",
70 service = TexHyphenatorTableRegistry.class,
71 cardinality = ReferenceCardinality.MANDATORY,
72 policy = ReferencePolicy.STATIC
73 )
74 protected void bindTableRegistry(TexHyphenatorTableRegistry registry) {
75 tableRegistry = registry;
76 logger.debug("Registering Tex hyphenation table registry: " + registry);
77 }
78
79 protected void unbindTableRegistry(TexHyphenatorTableRegistry registry) {
80 tableRegistry = null;
81 }
82
83 private final static Iterable<TexHyphenator> empty = Iterables.<TexHyphenator>empty();
84
85 public Iterable<TexHyphenator> _get(Query query) {
86 MutableQuery q = mutableQuery(query);
87 if (q.containsKey("hyphenator")) {
88 String v = q.removeOnly("hyphenator").getValue().get();
89 if (!"texhyph".equals(v) && !"tex".equals(v))
90 return fromNullable(fromId(v)); }
91 if (q.containsKey("table")) {
92 String v = q.removeOnly("table").getValue().get();
93 if (!q.isEmpty()) {
94 logger.warn("A query with both 'table' and '" + q.iterator().next().getKey() + "' never matches anything");
95 return empty; }
96 return fromNullable(get(URLs.asURI(v))); }
97 Locale locale; {
98 String loc;
99 if (q.containsKey("document-locale"))
100 loc = q.removeOnly("document-locale").getValue().get();
101 else
102 loc = "und";
103 try {
104 locale = parseLocale(loc); }
105 catch (IllegalArgumentException e) {
106 logger.error("Invalid locale", e);
107 return empty; }
108 }
109 if (!q.isEmpty()) {
110 logger.warn("A query with '" + q.iterator().next().getKey() + "' never matches anything");
111 return empty; }
112 if (tableRegistry != null) {
113 return transform(
114 tableRegistry.get(locale),
115 new Function<URI,TexHyphenator>() {
116 public TexHyphenator _apply(URI table) {
117 return get(table); }}); }
118 return empty;
119 }
120
121 private TexHyphenator get(URI table) {
122 try {
123 URL resolved = resolveTable(table);
124 Properties properties = new Properties();
125 URI base = null;
126 if (table.toString().endsWith(".tex")) {
127 properties.setProperty(TexHyphenatorImpl.PATTERN_PATH_KEY, URLs.asURI(resolved).toASCIIString());
128 properties.setProperty(TexHyphenatorImpl.MODE_KEY, TexHyphenatorImpl.BYTE_MODE); }
129 else if (table.toString().endsWith(".properties")) {
130 base = URLs.asURI(resolved);
131 InputStream stream = resolved.openStream();
132 properties.load(stream);
133 stream.close(); }
134 else if (table.toString().endsWith(".xml")) {
135 base = URLs.asURI(resolved);
136 InputStream stream = resolved.openStream();
137 properties.loadFromXML(stream);
138 stream.close(); }
139 else
140 return null;
141 try {
142 return new TexHyphenatorImpl(properties, base); }
143 catch (Exception e) {
144 logger.warn("Could not create a hyphenator for properties " + properties, e); }}
145 catch (Exception e) {
146 logger.warn("Could not create a hyphenator for table", e); }
147 return null;
148 }
149
150 private URL resolveTable(URI table) {
151 URL resolvedTable = isAbsoluteFile(table) ? URLs.asURL(table) : tableRegistry.resolve(table);
152 if (resolvedTable == null)
153 throw new RuntimeException("Hyphenation table " + table + " could not be resolved");
154 return resolvedTable;
155 }
156
157
158
159
160 private class TexHyphenatorImpl extends AbstractHyphenator implements TexHyphenator {
161
162 private final static String LEFT_HYPHEN_MIN_KEY = "beginLimit";
163 private final static String RIGHT_HYPHEN_MIN_KEY = "endLimit";
164 private final static String ENCODING_KEY = "encoding";
165 private final static String PATTERN_PATH_KEY = "patternPath";
166 private final static String MODE_KEY = "mode";
167 private final static String BYTE_MODE = "byte";
168 private final static String CHARACTER_MODE = "character";
169
170 private final URI table;
171 private final net.davidashen.text.Hyphenator hyphenator;
172 private final int beginLimit;
173 private final int endLimit;
174
175 private TexHyphenatorImpl(Properties props, URI base) throws IOException, TexParserException {
176 String patternPath = props.getProperty(PATTERN_PATH_KEY);
177 if (patternPath == null)
178 throw new RuntimeException("Required property named '" + PATTERN_PATH_KEY + "' missing.");
179 table = base == null ? URLs.asURI(patternPath) : URLs.resolve(base, URLs.asURI(patternPath));
180 hyphenator = new net.davidashen.text.Hyphenator();
181 String leftHyphenMinStr = props.getProperty(LEFT_HYPHEN_MIN_KEY);
182 if (leftHyphenMinStr != null)
183 beginLimit = Integer.parseInt(leftHyphenMinStr);
184 else
185 beginLimit = 1;
186 String rightHyphenMinStr = props.getProperty(RIGHT_HYPHEN_MIN_KEY);
187 if (rightHyphenMinStr != null)
188 endLimit = Integer.parseInt(rightHyphenMinStr);
189 else
190 endLimit = 1;
191 String encoding = props.getProperty(ENCODING_KEY);
192 String modeStr = props.getProperty(MODE_KEY);
193 if (modeStr == null)
194 throw new RuntimeException("Required property named '" + MODE_KEY + "' missing.");
195 else if (modeStr.equals(BYTE_MODE)) {
196 if (encoding != null)
197 logger.warn("Configuration problem: Encoding has no effect in byte mode.");
198 hyphenator.loadTable(URLs.asURL(table).openStream()); }
199 else if (modeStr.equals(CHARACTER_MODE)) {
200 if (encoding == null)
201 logger.warn("Configuration problem: Encoding should be set in character mode.");
202 hyphenator.loadTable(new InputStreamReader(URLs.asURL(table).openStream(), Charset.forName(encoding))); }
203 else
204 throw new RuntimeException("Unrecognized mode. Allowed values are " + BYTE_MODE + " and " + CHARACTER_MODE);
205 }
206
207 public URI asTexHyphenatorTable() {
208 return table;
209 }
210
211 @Override
212 public FullHyphenator asFullHyphenator() {
213 return fullHyphenator;
214 }
215
216 private final FullHyphenator fullHyphenator = new DefaultFullHyphenator() {
217
218 private final static char SHY = '\u00AD';
219 private final static char ZWSP = '\u200B';
220
221 protected boolean isCodePointAware() { return true; }
222 protected boolean isLanguageAdaptive() { return false; }
223
224
225
226 protected byte[] getHyphenationOpportunities(String textWithoutHyphens, Locale language) throws RuntimeException {
227 try {
228 Tuple2<String,byte[]> t = extractHyphens(
229 hyphenator.hyphenate(textWithoutHyphens, beginLimit, endLimit), true, SHY, ZWSP);
230 if (!t._1.equals(textWithoutHyphens))
231 throw new RuntimeException("Unexpected output from " + hyphenator);
232 return t._2; }
233 catch (Exception e) {
234 throw new RuntimeException("Error during TeX hyphenation", e); }
235 }
236 };
237 }
238
239 @Override
240 public ToStringHelper toStringHelper() {
241 return MoreObjects.toStringHelper("TexHyphenatorDotifyImpl");
242 }
243
244 private static final Logger logger = LoggerFactory.getLogger(TexHyphenatorDotifyImpl.class);
245
246 }