1   package org.daisy.pipeline.braille.libhyphen.impl;
2   
3   import java.io.File;
4   import java.io.FileNotFoundException;
5   import java.io.IOException;
6   import java.net.URI;
7   import java.net.URL;
8   import java.nio.file.attribute.BasicFileAttributes;
9   import java.nio.file.attribute.FileTime;
10  import java.nio.file.Files;
11  import java.util.concurrent.TimeUnit;
12  import java.util.Locale;
13  import java.util.Map;
14  import java.util.NoSuchElementException;
15  
16  import ch.sbs.jhyphen.CompilationException;
17  import ch.sbs.jhyphen.Hyphen;
18  import ch.sbs.jhyphen.Hyphenator;
19  import ch.sbs.jhyphen.StandardHyphenationException;
20  
21  import com.google.common.base.MoreObjects;
22  import com.google.common.base.MoreObjects.ToStringHelper;
23  import com.google.common.cache.CacheBuilder;
24  
25  import org.daisy.common.file.URLs;
26  import org.daisy.pipeline.braille.common.AbstractHyphenator;
27  import org.daisy.pipeline.braille.common.AbstractHyphenator.util.DefaultFullHyphenator;
28  import org.daisy.pipeline.braille.common.AbstractHyphenator.util.DefaultLineBreaker;
29  import org.daisy.pipeline.braille.common.AbstractTransformProvider;
30  import org.daisy.pipeline.braille.common.AbstractTransformProvider.util.Function;
31  import org.daisy.pipeline.braille.common.AbstractTransformProvider.util.Iterables;
32  import static org.daisy.pipeline.braille.common.AbstractTransformProvider.util.debug;
33  import static org.daisy.pipeline.braille.common.AbstractTransformProvider.util.Iterables.fromNullable;
34  import static org.daisy.pipeline.braille.common.AbstractTransformProvider.util.Iterables.intersection;
35  import static org.daisy.pipeline.braille.common.AbstractTransformProvider.util.Iterables.of;
36  import static org.daisy.pipeline.braille.common.AbstractTransformProvider.util.Iterables.transform;
37  import static org.daisy.pipeline.braille.common.AbstractTransformProvider.util.logCreate;
38  import org.daisy.pipeline.braille.common.HyphenatorProvider;
39  import org.daisy.pipeline.braille.common.NativePath;
40  import static org.daisy.pipeline.braille.common.Provider.util.memoize;
41  import org.daisy.pipeline.braille.common.Query;
42  import org.daisy.pipeline.braille.common.Query.MutableQuery;
43  import static org.daisy.pipeline.braille.common.Query.util.mutableQuery;
44  import static org.daisy.pipeline.braille.common.util.Files.asFile;
45  import static org.daisy.pipeline.braille.common.util.Files.isAbsoluteFile;
46  import static org.daisy.pipeline.braille.common.util.Locales.parseLocale;
47  import org.daisy.pipeline.braille.common.WithSideEffect;
48  import org.daisy.pipeline.braille.libhyphen.LibhyphenHyphenator;
49  
50  import org.osgi.service.component.annotations.Activate;
51  import org.osgi.service.component.annotations.Component;
52  import org.osgi.service.component.annotations.Deactivate;
53  import org.osgi.service.component.annotations.Reference;
54  import org.osgi.service.component.annotations.ReferenceCardinality;
55  import org.osgi.service.component.annotations.ReferencePolicy;
56  
57  import org.slf4j.Logger;
58  import org.slf4j.LoggerFactory;
59  
60  /**
61   * Provides a <a href="http://hunspell.github.io/">Hyphen</a> based {@link
62   * org.daisy.pipeline.braille.common.Hyphenator} implementation.
63   *
64   * @see <a href="../../../../../../../../../doc/">User documentation</a>.
65   */
66  @Component(
67  	name = "org.daisy.pipeline.braille.libhyphen.LibhyphenJnaImpl",
68  	service = {
69  		LibhyphenHyphenator.Provider.class,
70  		HyphenatorProvider.class
71  	}
72  )
73  public class LibhyphenJnaImpl extends AbstractTransformProvider<LibhyphenHyphenator>
74  	                          implements LibhyphenHyphenator.Provider {
75  	
76  	private LibhyphenTableRegistry tableRegistry;
77  	
78  	@Activate
79  	protected void activate() {
80  		logger.debug("Loading libhyphen service");
81  	}
82  	
83  	@Deactivate
84  	protected void deactivate() {
85  		logger.debug("Unloading libhyphen service");
86  	}
87  	
88  	@Reference(
89  		name = "LibhyphenLibrary",
90  		unbind = "-",
91  		service = NativePath.class,
92  		target = "(identifier=http://hunspell.sourceforge.net/Hyphen/native/*)",
93  		cardinality = ReferenceCardinality.MANDATORY,
94  		policy = ReferencePolicy.STATIC
95  	)
96  	protected void bindLibrary(NativePath path) {
97  		URI libraryPath = path.get("libhyphen").iterator().next();
98  		Hyphen.setLibraryPath(asFile(path.resolve(libraryPath)));
99  		logger.debug("Registering libhyphen library: " + libraryPath);
100 	}
101 	
102 	@Reference(
103 		name = "LibhyphenTableRegistry",
104 		unbind = "-",
105 		service = LibhyphenTableRegistry.class,
106 		cardinality = ReferenceCardinality.MANDATORY,
107 		policy = ReferencePolicy.STATIC
108 	)
109 	protected void bindTableRegistry(LibhyphenTableRegistry registry) {
110 		tableRegistry = registry;
111 		logger.debug("Registering libhyphen table registry: " + registry);
112 	}
113 	
114 	private final static Iterable<LibhyphenHyphenator> empty
115 	= Iterables.<LibhyphenHyphenator>empty();
116 	
117 	protected final Iterable<LibhyphenHyphenator> _get(Query query) {
118 		MutableQuery q = mutableQuery(query);
119 		if (q.containsKey("hyphenator")) {
120 			String v = q.removeOnly("hyphenator").getValue().get();
121 			if (!("hyphen".equals(v) || "libhyphen".equals(v))) {
122 				Iterable<LibhyphenHyphenator> ret;
123 				LibhyphenHyphenator h = fromId(v);
124 				if (h != null)
125 					ret = fromNullable(h);
126 				else
127 					ret = of(
128 						new WithSideEffect<LibhyphenHyphenator,Logger>() {
129 							public LibhyphenHyphenator _apply() {
130 								LibhyphenTable t = __apply(libhyphenTableProvider.get(URLs.asURI(v)));
131 								return __apply(logCreate(new LibhyphenHyphenatorImpl(t, null))); }});
132 				if (q.isEmpty())
133 					return ret;
134 				else
135 					return intersection(_get(q), ret); }}
136 		String tableKey;
137 		String table; {
138 			if (q.containsKey("libhyphen-table")) {
139 				table = q.removeOnly("libhyphen-table").getValue().get();
140 				tableKey = "libhyphen-table";
141 				if (q.containsKey("hyphen-table")) {
142 					logger.warn("A query with both 'libhyphen-table' and 'hyphen-table' never matches anything");
143 					return empty; }
144 				else if (q.containsKey("table")) {
145 					logger.warn("A query with both 'libhyphen-table' and 'table' never matches anything");
146 					return empty; }
147 			} else if (q.containsKey("hyphen-table")) {
148 				table = q.removeOnly("hyphen-table").getValue().get();
149 				tableKey = "hyphen-table";
150 				if (q.containsKey("table")) {
151 					logger.warn("A query with both 'hyphen-table' and 'table' never matches anything");
152 					return empty; }
153 			} else if (q.containsKey("table")) {
154 				table = q.removeOnly("table").getValue().get();
155 				tableKey = "table";
156 			} else {
157 				table = null;
158 				tableKey = null;
159 			}
160 		}
161 		Locale locale; {
162 			String loc = "und";
163 			if (q.containsKey("document-locale"))
164 				loc = q.removeOnly("document-locale").getValue().get();
165 			try {
166 				locale = parseLocale(loc); }
167 			catch (IllegalArgumentException e) {
168 				logger.error("Invalid locale", e);
169 				return empty; }
170 		}
171 		if (table != null) {
172 			if (!q.isEmpty()) {
173 				logger.warn("A query with both '" + tableKey + "' and '"
174 				            + q.iterator().next().getKey() + "' never matches anything");
175 				return empty; }
176 			return of(
177 				new WithSideEffect<LibhyphenHyphenator,Logger>() {
178 					public LibhyphenHyphenator _apply() {
179 						LibhyphenTable t = __apply(libhyphenTableProvider.get(URLs.asURI(table)));
180 						return __apply(logCreate(new LibhyphenHyphenatorImpl(t, null))); }}); }
181 		if (UND.equals(locale))
182 			return of(
183 				new WithSideEffect<LibhyphenHyphenator,Logger>() {
184 					public LibhyphenHyphenator _apply() {
185 						return __apply(logCreate(new LibhyphenHyphenatorImpl(null, UND))); }});
186 		return transform(
187 			libhyphenTableProvider.get(locale),
188 			new Function<LibhyphenTable,LibhyphenHyphenator>() {
189 				public LibhyphenHyphenator _apply(LibhyphenTable table) {
190 					return __apply(logCreate(new LibhyphenHyphenatorImpl(table, locale))); }});
191 	}
192 	
193 	private class LibhyphenTable {
194 		private URI table;
195 		private Hyphenator hyphenator;
196 		private LibhyphenTable(URI table) throws CompilationException, FileNotFoundException {
197 			this.table = table;
198 			this.hyphenator = compileTable(table);
199 		}
200 	}
201 	
202 	private final LibhyphenTableProvider libhyphenTableProvider = new LibhyphenTableProvider();
203 	
204 	private class LibhyphenTableProvider {
205 		public WithSideEffect<LibhyphenTable,Logger> get(final URI table) {
206 			try {
207 				return WithSideEffect.of(new LibhyphenTable(table)); }
208 			catch (CompilationException|FileNotFoundException e) {
209 				return new WithSideEffect<LibhyphenTable,Logger>() {
210 					public LibhyphenTable _apply() throws NoSuchElementException {
211 						__apply(debug("Could not create hyphenator for table " + table));
212 						throw new NoSuchElementException();
213 					}
214 				};
215 			}
216 		}
217 		public Iterable<LibhyphenTable> get(Locale locale) {
218 			return transform(
219 				tableRegistry.get(locale),
220 				new Function<URI,LibhyphenTable>() {
221 					public LibhyphenTable _apply(URI table) {
222 						return __apply(get(table)); }});
223 		}
224 	}
225 
226 	private final static Locale UND = parseLocale("und");
227 	
228 	private class LibhyphenHyphenatorImpl extends AbstractHyphenator implements LibhyphenHyphenator {
229 		
230 		private final Locale mainLanguage;
231 		private final LibhyphenTable mainTable;
232 		private final org.daisy.pipeline.braille.common.Provider<Locale,WithSideEffect<LibhyphenTable,Logger>> subHyphenators;
233 		
234 		/**
235 		 * @param table        When {@code null}, there is no main hyphenation table. Each language
236 		 *                     is handled with an appropriate table.
237 		 * @param mainLanguage {@code null} means that the main hyphenation table (which may not be
238 		 *                     {@code null}) should be used to handle all languages.
239 		 */
240 		private LibhyphenHyphenatorImpl(LibhyphenTable table, Locale mainLanguage) {
241 			if (table == null && mainLanguage == null)
242 				throw new IllegalArgumentException();
243 			this.mainLanguage = mainLanguage;
244 			this.mainTable = table;
245 			this.subHyphenators = memoize(libhyphenTableProvider::get);
246 		}
247 		
248 		public URI asLibhyphenTable() {
249 			return mainTable != null ? mainTable.table : null;
250 		}
251 		
252 		@Override
253 		public FullHyphenator asFullHyphenator() {
254 			return fullHyphenator;
255 		}
256 		
257 		private final FullHyphenator fullHyphenator = new DefaultFullHyphenator() {
258 			protected boolean isCodePointAware() { return false; }
259 			protected boolean isLanguageAdaptive() { return true; }
260 			protected byte[] getHyphenationOpportunities(String textWithoutHyphens, Locale language)
261 					throws NonStandardHyphenationException, RuntimeException {
262 				LibhyphenTable table = getTable(language);
263 				if (table == null)
264 					// note that breaking after hard hyphens is handled by DefaultFullHyphenator
265 					return null;
266 				try {
267 					return table.hyphenator.hyphenate(textWithoutHyphens);
268 				} catch (StandardHyphenationException e) {
269 					throw new NonStandardHyphenationException(e);
270 				} catch (Exception e) {
271 					throw new RuntimeException("Error during libhyphen hyphenation", e);
272 				}
273 			}
274 			@Override
275 			public String toString() {
276 				return LibhyphenHyphenatorImpl.this.toString();
277 			}
278 		};
279 		
280 		// FIXME: this LineBreaker does not consider SHY or ZWSP characters in the input
281 		@Override
282 		public LineBreaker asLineBreaker() {
283 			return lineBreaker;
284 		}
285 		
286 		private final LineBreaker lineBreaker = new DefaultLineBreaker() {
287 			protected Break breakWord(String word, Locale language, int limit, boolean force) {
288 				LibhyphenTable table = getTable(language);
289 				if (table != null) {
290 					Hyphenator.Break br = table.hyphenator.hyphenate(word, limit);
291 					if (force && br.getBreakPosition() == 0)
292 						return new Break(word, limit, false);
293 					else
294 						return new Break(br.getText(), br.getBreakPosition(), br.hasHyphen());
295 				}
296 				return super.breakWord(word, language, limit, force);
297 			}
298 			@Override
299 			public String toString() {
300 				return LibhyphenHyphenatorImpl.this.toString();
301 			}
302 		};
303 		
304 		private LibhyphenTable getTable(Locale language) {
305 			if (mainLanguage == null || mainLanguage.equals(language))
306 				return mainTable;
307 			else if (language == null || UND.equals(language))
308 				return null;
309 			else
310 				try {
311 					return subHyphenators.get(language).iterator().next().apply(logger);
312 				} catch (NoSuchElementException e) {
313 					logger.warn("No hyphenator for language " + language);
314 					return null;
315 				}
316 		}
317 		
318 		@Override
319 		public ToStringHelper toStringHelper() {
320 			return MoreObjects.toStringHelper("LibhyphenJnaImpl$LibhyphenHyphenatorImpl")
321 				.add("language", mainLanguage)
322 				.add("table", asLibhyphenTable());
323 		}
324 		
325 		@Override
326 		public boolean equals(Object o) {
327 			if (this == o)
328 				return true;
329 			if (o == null)
330 				return false;
331 			if (getClass() != o.getClass())
332 				return false;
333 			LibhyphenHyphenatorImpl that = (LibhyphenHyphenatorImpl)o;
334 			if (this.mainTable == null) {
335 				if (that.mainTable != null)
336 					return false;
337 			} else if (that.mainTable == null)
338 				return false;
339 			else if (!this.mainTable.table.equals(that.mainTable.table))
340 				return false;
341 			return true;
342 		}
343 	}
344 	
345 	private Hyphenator compileTable(URI table) throws FileNotFoundException, CompilationException {
346 		if ("volatile-file".equals(table.getScheme()))
347 			try {
348 				table = new URI("file", table.getSchemeSpecificPart(), table.getFragment());
349 			} catch (Exception e) {
350 				// should not happen
351 				throw new IllegalStateException(e);
352 			}
353 		ModifiedFile tableFile = new ModifiedFile(resolveTable(table));
354 		Hyphenator hyphenator = tableCache.get(tableFile);
355 		if (hyphenator == null) {
356 			hyphenator = new Hyphenator(tableFile.file);
357 			tableCache.put(tableFile, hyphenator);
358 		}
359 		return hyphenator;
360 	}
361 	
362 	private File resolveTable(URI table) throws FileNotFoundException {
363 		URL resolvedTable = isAbsoluteFile(table) ? URLs.asURL(table) : tableRegistry.resolve(table);
364 		if (resolvedTable == null)
365 			throw new FileNotFoundException("Hyphenation table " + table + " could not be resolved");
366 		return asFile(resolvedTable);
367 	}
368 
369 	private final Map<ModifiedFile,Hyphenator> tableCache
370 		= CacheBuilder.newBuilder()
371 		              .expireAfterAccess(300, TimeUnit.SECONDS)
372 		              .<ModifiedFile,Hyphenator>build()
373 		              .asMap();
374 
375 	@Override
376 	public ToStringHelper toStringHelper() {
377 		return MoreObjects.toStringHelper("LibhyphenJnaImpl");
378 	}
379 	
380 	private static final Logger logger = LoggerFactory.getLogger(LibhyphenJnaImpl.class);
381 	
382 	private static class ModifiedFile {
383 		
384 		public final File file;
385 		public final FileTime lastModifiedTime;
386 		
387 		public ModifiedFile(File file) {
388 			this.file = file;
389 			try {
390 				BasicFileAttributes attrs = Files.readAttributes(
391 					file.toPath(),
392 					BasicFileAttributes.class);
393 				this.lastModifiedTime = attrs.lastModifiedTime();
394 			} catch (IOException e) {
395 				throw new RuntimeException(e); // should not happen
396 			}
397 		}
398 		
399 		@Override
400 		public int hashCode() {
401 			final int prime = 31;
402 			int result = 1;
403 			result = prime * result + file.hashCode();
404 			result = prime * result + lastModifiedTime.hashCode();
405 			return result;
406 		}
407 		
408 		@Override
409 		public boolean equals(Object obj) {
410 			if (this == obj)
411 				return true;
412 			if (obj == null)
413 				return false;
414 			if (getClass() != obj.getClass())
415 				return false;
416 			ModifiedFile other = (ModifiedFile)obj;
417 			if (!file.equals(other.file))
418 				return false;
419 			if (!lastModifiedTime.equals(other.lastModifiedTime))
420 				return false;
421 			return true;
422 		}
423 	}
424 }