1   package org.daisy.pipeline.braille.tex.impl;
2   
3   import java.io.InputStream;
4   import java.io.IOException;
5   import java.net.URI;
6   import java.net.URL;
7   import java.util.Locale;
8   
9   import com.google.common.base.MoreObjects;
10  import com.google.common.base.MoreObjects.ToStringHelper;
11  
12  import org.daisy.common.file.URLs;
13  import org.daisy.pipeline.braille.common.AbstractHyphenator;
14  import org.daisy.pipeline.braille.common.AbstractHyphenator.util.DefaultFullHyphenator;
15  import org.daisy.pipeline.braille.common.AbstractTransformProvider;
16  import org.daisy.pipeline.braille.common.AbstractTransformProvider.util.Function;
17  import org.daisy.pipeline.braille.common.AbstractTransformProvider.util.Iterables;
18  import static org.daisy.pipeline.braille.common.AbstractTransformProvider.util.Iterables.fromNullable;
19  import static org.daisy.pipeline.braille.common.AbstractTransformProvider.util.Iterables.transform;
20  import org.daisy.pipeline.braille.common.HyphenatorProvider;
21  import org.daisy.pipeline.braille.common.Query;
22  import org.daisy.pipeline.braille.common.Query.MutableQuery;
23  import static org.daisy.pipeline.braille.common.Query.util.mutableQuery;
24  import static org.daisy.pipeline.braille.common.util.Files.isAbsoluteFile;
25  import static org.daisy.pipeline.braille.common.util.Locales.parseLocale;
26  import static org.daisy.pipeline.braille.common.util.Strings.extractHyphens;
27  import static org.daisy.pipeline.braille.common.util.Tuple2;
28  import org.daisy.pipeline.braille.tex.TexHyphenator;
29  
30  import org.osgi.service.component.annotations.Activate;
31  import org.osgi.service.component.annotations.Component;
32  import org.osgi.service.component.annotations.Deactivate;
33  import org.osgi.service.component.annotations.Reference;
34  import org.osgi.service.component.annotations.ReferenceCardinality;
35  import org.osgi.service.component.annotations.ReferencePolicy;
36  
37  import org.slf4j.Logger;
38  import org.slf4j.LoggerFactory;
39  
40  @Component(
41  	name = "org.daisy.pipeline.braille.tex.impl.TexHyphenatorSimpleImpl",
42  	service = {
43  		TexHyphenator.Provider.class,
44  		HyphenatorProvider.class
45  	}
46  )
47  public class TexHyphenatorSimpleImpl extends AbstractTransformProvider<TexHyphenator>
48  	                                 implements TexHyphenator.Provider {
49  	
50  	private TexHyphenatorTableRegistry tableRegistry;
51  	
52  	@Activate
53  	protected void activate() {
54  		logger.debug("Loading TeX hyphenation service");
55  	}
56  	
57  	@Deactivate
58  	protected void deactivate() {
59  		logger.debug("Unloading TeX hyphenation service");
60  	}
61  	
62  	@Reference(
63  		name = "TexHyphenatorTableRegistry",
64  		unbind = "-",
65  		service = TexHyphenatorTableRegistry.class,
66  		cardinality = ReferenceCardinality.MANDATORY,
67  		policy = ReferencePolicy.STATIC
68  	)
69  	protected void bindTableRegistry(TexHyphenatorTableRegistry registry) {
70  		tableRegistry = registry;
71  		logger.debug("Registering Tex hyphenation table registry: " + registry);
72  	}
73  	
74  	protected void unbindTableRegistry(TexHyphenatorTableRegistry registry) {
75  		tableRegistry = null;
76  	}
77  	
78  	private final static Iterable<TexHyphenator> empty = Iterables.<TexHyphenator>empty();
79  	
80  	public Iterable<TexHyphenator> _get(Query query) {
81  		MutableQuery q = mutableQuery(query);
82  		if (q.containsKey("hyphenator")) {
83  			String v = q.removeOnly("hyphenator").getValue().get();
84  			if (!"texhyph".equals(v) && !"tex".equals(v))
85  				return fromNullable(fromId(v)); }
86  		if (q.containsKey("table")) {
87  			String v = q.removeOnly("table").getValue().get();
88  			if (!q.isEmpty()) {
89  				logger.warn("A query with both 'table' and '" + q.iterator().next().getKey() + "' never matches anything");
90  				return empty; }
91  			return fromNullable(get(URLs.asURI(v))); }
92  		Locale locale; {
93  			String loc;
94  			if (q.containsKey("document-locale"))
95  				loc = q.removeOnly("document-locale").getValue().get();
96  			else
97  				loc = "und";
98  			try {
99  				locale = parseLocale(loc); }
100 			catch (IllegalArgumentException e) {
101 				logger.error("Invalid locale", e);
102 				return empty; }
103 		}
104 		if (!q.isEmpty()) {
105 			logger.warn("A query with '" + q.iterator().next().getKey() + "' never matches anything");
106 			return empty; }
107 		if (tableRegistry != null) {
108 			return transform(
109 				tableRegistry.get(locale),
110 				new Function<URI,TexHyphenator>() {
111 					public TexHyphenator _apply(URI table) {
112 						return get(table); }}); }
113 		return empty;
114 	}
115 	
116 	private TexHyphenator get(URI table) {
117 		if (table.toString().endsWith(".tex")) {
118 			try { return new TexHyphenatorImpl(table); }
119 			catch (Exception e) {
120 				logger.warn("Could not create hyphenator for table " + table, e); }}
121 		return null;
122 	}
123 	
124 	private class TexHyphenatorImpl extends AbstractHyphenator implements TexHyphenator {
125 		
126 		private final URI table;
127 		private final net.davidashen.text.Hyphenator hyphenator;
128 		
129 		private TexHyphenatorImpl(URI table) throws IOException {
130 			this.table = table;
131 			hyphenator = new net.davidashen.text.Hyphenator();
132 			InputStream stream = resolveTable(table).openStream();
133 			hyphenator.loadTable(stream);
134 			stream.close();
135 		}
136 		
137 		public URI asTexHyphenatorTable() {
138 			return table;
139 		}
140 		
141 		@Override
142 		public FullHyphenator asFullHyphenator() {
143 			return fullHyphenator;
144 		}
145 		
146 		private final FullHyphenator fullHyphenator = new DefaultFullHyphenator() {
147 
148 				private final static char SHY = '\u00AD';
149 				private final static char ZWSP = '\u200B';
150 
151 				protected boolean isCodePointAware() { return true; }
152 				protected boolean isLanguageAdaptive() { return true; }
153 		
154 				/**
155 				 * @param language ignored
156 				 */
157 				protected byte[] getHyphenationOpportunities(String textWithoutHyphens, Locale language) throws RuntimeException {
158 					try {
159 						Tuple2<String,byte[]> t = extractHyphens(
160 							hyphenator.hyphenate(textWithoutHyphens), true, SHY, ZWSP);
161 						if (!t._1.equals(textWithoutHyphens))
162 							throw new RuntimeException("Unexpected output from " + hyphenator);
163 						return t._2; }
164 					catch (Exception e) {
165 						throw new RuntimeException("Error during TeX hyphenation", e); }
166 				}
167 			};
168 	}
169 	
170 	private URL resolveTable(URI table) {
171 		URL resolvedTable = isAbsoluteFile(table) ? URLs.asURL(table) : tableRegistry.resolve(table);
172 		if (resolvedTable == null)
173 			throw new RuntimeException("Hyphenation table " + table + " could not be resolved");
174 		return resolvedTable;
175 	}
176 	
177 	@Override
178 	public ToStringHelper toStringHelper() {
179 		return MoreObjects.toStringHelper("TexHyphenatorSimpleImpl");
180 	}
181 	
182 	private static final Logger logger = LoggerFactory.getLogger(TexHyphenatorSimpleImpl.class);
183 	
184 }