1   package org.daisy.pipeline.braille.tex.impl;
2   
3   import java.io.InputStream;
4   import java.io.InputStreamReader;
5   import java.io.IOException;
6   import java.net.URI;
7   import java.net.URL;
8   import java.nio.charset.Charset;
9   import java.util.Locale;
10  import java.util.Properties;
11  
12  import com.google.common.base.MoreObjects;
13  import com.google.common.base.MoreObjects.ToStringHelper;
14  
15  import net.davidashen.text.Utf8TexParser.TexParserException;
16  
17  import org.daisy.common.file.URLs;
18  import org.daisy.pipeline.braille.common.AbstractHyphenator;
19  import org.daisy.pipeline.braille.common.AbstractHyphenator.util.DefaultFullHyphenator;
20  import org.daisy.pipeline.braille.common.AbstractTransformProvider;
21  import org.daisy.pipeline.braille.common.AbstractTransformProvider.util.Function;
22  import org.daisy.pipeline.braille.common.AbstractTransformProvider.util.Iterables;
23  import static org.daisy.pipeline.braille.common.AbstractTransformProvider.util.Iterables.fromNullable;
24  import static org.daisy.pipeline.braille.common.AbstractTransformProvider.util.Iterables.transform;
25  import org.daisy.pipeline.braille.common.HyphenatorProvider;
26  import org.daisy.pipeline.braille.common.Query;
27  import org.daisy.pipeline.braille.common.Query.MutableQuery;
28  import static org.daisy.pipeline.braille.common.Query.util.mutableQuery;
29  import static org.daisy.pipeline.braille.common.util.Files.isAbsoluteFile;
30  import static org.daisy.pipeline.braille.common.util.Locales.parseLocale;
31  import static org.daisy.pipeline.braille.common.util.Strings.extractHyphens;
32  import static org.daisy.pipeline.braille.common.util.Tuple2;
33  import org.daisy.pipeline.braille.tex.TexHyphenator;
34  
35  import org.osgi.service.component.annotations.Activate;
36  import org.osgi.service.component.annotations.Component;
37  import org.osgi.service.component.annotations.Deactivate;
38  import org.osgi.service.component.annotations.Reference;
39  import org.osgi.service.component.annotations.ReferenceCardinality;
40  import org.osgi.service.component.annotations.ReferencePolicy;
41  
42  import org.slf4j.Logger;
43  import org.slf4j.LoggerFactory;
44  
45  @Component(
46  	name = "org.daisy.pipeline.braille.tex.impl.TexHyphenatorDotifyImpl",
47  	service = {
48  		TexHyphenator.Provider.class,
49  		HyphenatorProvider.class
50  	}
51  )
52  public class TexHyphenatorDotifyImpl extends AbstractTransformProvider<TexHyphenator>
53  	                                 implements TexHyphenator.Provider {
54  	
55  	private TexHyphenatorTableRegistry tableRegistry;
56  	
57  	@Activate
58  	protected void activate() {
59  		logger.debug("Loading TeX hyphenation service (Dotify impl)");
60  	}
61  	
62  	@Deactivate
63  	protected void deactivate() {
64  		logger.debug("Unloading TeX hyphenation service (Dotify impl)");
65  	}
66  	
67  	@Reference(
68  		name = "TexHyphenatorTableRegistry",
69  		unbind = "-",
70  		service = TexHyphenatorTableRegistry.class,
71  		cardinality = ReferenceCardinality.MANDATORY,
72  		policy = ReferencePolicy.STATIC
73  	)
74  	protected void bindTableRegistry(TexHyphenatorTableRegistry registry) {
75  		tableRegistry = registry;
76  		logger.debug("Registering Tex hyphenation table registry: " + registry);
77  	}
78  	
79  	protected void unbindTableRegistry(TexHyphenatorTableRegistry registry) {
80  		tableRegistry = null;
81  	}
82  	
83  	private final static Iterable<TexHyphenator> empty = Iterables.<TexHyphenator>empty();
84  	
85  	public Iterable<TexHyphenator> _get(Query query) {
86  		MutableQuery q = mutableQuery(query);
87  		if (q.containsKey("hyphenator")) {
88  			String v = q.removeOnly("hyphenator").getValue().get();
89  			if (!"texhyph".equals(v) && !"tex".equals(v))
90  				return fromNullable(fromId(v)); }
91  		if (q.containsKey("table")) {
92  			String v = q.removeOnly("table").getValue().get();
93  			if (!q.isEmpty()) {
94  				logger.warn("A query with both 'table' and '" + q.iterator().next().getKey() + "' never matches anything");
95  				return empty; }
96  			return fromNullable(get(URLs.asURI(v))); }
97  		Locale locale; {
98  			String loc;
99  			if (q.containsKey("document-locale"))
100 				loc = q.removeOnly("document-locale").getValue().get();
101 			else
102 				loc = "und";
103 			try {
104 				locale = parseLocale(loc); }
105 			catch (IllegalArgumentException e) {
106 				logger.error("Invalid locale", e);
107 				return empty; }
108 		}
109 		if (!q.isEmpty()) {
110 			logger.warn("A query with '" + q.iterator().next().getKey() + "' never matches anything");
111 			return empty; }
112 		if (tableRegistry != null) {
113 			return transform(
114 				tableRegistry.get(locale),
115 				new Function<URI,TexHyphenator>() {
116 					public TexHyphenator _apply(URI table) {
117 						return get(table); }}); }
118 		return empty;
119 	}
120 	
121 	private TexHyphenator get(URI table) {
122 		try {
123 			URL resolved = resolveTable(table);
124 			Properties properties = new Properties();
125 			URI base = null;
126 			if (table.toString().endsWith(".tex")) {
127 				properties.setProperty(TexHyphenatorImpl.PATTERN_PATH_KEY, URLs.asURI(resolved).toASCIIString());
128 				properties.setProperty(TexHyphenatorImpl.MODE_KEY, TexHyphenatorImpl.BYTE_MODE); }
129 			else if (table.toString().endsWith(".properties")) {
130 				base = URLs.asURI(resolved);
131 				InputStream stream = resolved.openStream();
132 				properties.load(stream);
133 				stream.close(); }
134 			else if (table.toString().endsWith(".xml")) {
135 				base = URLs.asURI(resolved);
136 				InputStream stream = resolved.openStream();
137 				properties.loadFromXML(stream);
138 				stream.close(); }
139 			else
140 				return null;
141 			try {
142 				return new TexHyphenatorImpl(properties, base); }
143 			catch (Exception e) {
144 				logger.warn("Could not create a hyphenator for properties " + properties, e); }}
145 		catch (Exception e) {
146 			logger.warn("Could not create a hyphenator for table", e); }
147 		return null;
148 	}
149 	
150 	private URL resolveTable(URI table) {
151 		URL resolvedTable = isAbsoluteFile(table) ? URLs.asURL(table) : tableRegistry.resolve(table);
152 		if (resolvedTable == null)
153 			throw new RuntimeException("Hyphenation table " + table + " could not be resolved");
154 		return resolvedTable;
155 	}
156 	
157 	/*
158 	 * Code originally taken from org.daisy.dotify.impl.hyphenator.latex.HyphenationConfig
159 	 */
160 	private class TexHyphenatorImpl extends AbstractHyphenator implements TexHyphenator {
161 		
162 		private final static String LEFT_HYPHEN_MIN_KEY = "beginLimit";
163 		private final static String RIGHT_HYPHEN_MIN_KEY = "endLimit";
164 		private final static String ENCODING_KEY = "encoding";
165 		private final static String PATTERN_PATH_KEY = "patternPath";
166 		private final static String MODE_KEY = "mode";
167 		private final static String BYTE_MODE = "byte";
168 		private final static String CHARACTER_MODE = "character";
169 		
170 		private final URI table;
171 		private final net.davidashen.text.Hyphenator hyphenator;
172 		private final int beginLimit;
173 		private final int endLimit;
174 		
175 		private TexHyphenatorImpl(Properties props, URI base) throws IOException, TexParserException {
176 			String patternPath = props.getProperty(PATTERN_PATH_KEY);
177 			if (patternPath == null)
178 				throw new RuntimeException("Required property named '" + PATTERN_PATH_KEY + "' missing.");
179 			table = base == null ? URLs.asURI(patternPath) : URLs.resolve(base, URLs.asURI(patternPath));
180 			hyphenator = new net.davidashen.text.Hyphenator();
181 			String leftHyphenMinStr = props.getProperty(LEFT_HYPHEN_MIN_KEY);
182 			if (leftHyphenMinStr != null)
183 				beginLimit = Integer.parseInt(leftHyphenMinStr);
184 			else
185 				beginLimit = 1;
186 			String rightHyphenMinStr = props.getProperty(RIGHT_HYPHEN_MIN_KEY);
187 			if (rightHyphenMinStr != null)
188 				endLimit = Integer.parseInt(rightHyphenMinStr);
189 			else
190 				endLimit = 1;
191 			String encoding = props.getProperty(ENCODING_KEY);
192 			String modeStr = props.getProperty(MODE_KEY);
193 			if (modeStr == null)
194 				throw new RuntimeException("Required property named '" + MODE_KEY + "' missing.");
195 			else if (modeStr.equals(BYTE_MODE)) {
196 				if (encoding != null)
197 					logger.warn("Configuration problem: Encoding has no effect in byte mode.");
198 				hyphenator.loadTable(URLs.asURL(table).openStream()); }
199 			else if (modeStr.equals(CHARACTER_MODE)) {
200 				if (encoding == null)
201 					logger.warn("Configuration problem: Encoding should be set in character mode.");
202 				hyphenator.loadTable(new InputStreamReader(URLs.asURL(table).openStream(), Charset.forName(encoding))); }
203 			else
204 				throw new RuntimeException("Unrecognized mode. Allowed values are " + BYTE_MODE + " and " + CHARACTER_MODE);
205 		}
206 		
207 		public URI asTexHyphenatorTable() {
208 			return table;
209 		}
210 		
211 		@Override
212 		public FullHyphenator asFullHyphenator() {
213 			return fullHyphenator;
214 		}
215 		
216 		private final FullHyphenator fullHyphenator = new DefaultFullHyphenator() {
217 
218 				private final static char SHY = '\u00AD';
219 				private final static char ZWSP = '\u200B';
220 
221 				protected boolean isCodePointAware() { return true; }
222 				protected boolean isLanguageAdaptive() { return false; }
223 				/**
224 				 * @param language ignored
225 				 */
226 				protected byte[] getHyphenationOpportunities(String textWithoutHyphens, Locale language) throws RuntimeException {
227 					try {
228 						Tuple2<String,byte[]> t = extractHyphens(
229 							hyphenator.hyphenate(textWithoutHyphens, beginLimit, endLimit), true, SHY, ZWSP);
230 						if (!t._1.equals(textWithoutHyphens))
231 							throw new RuntimeException("Unexpected output from " + hyphenator);
232 						return t._2; }
233 					catch (Exception e) {
234 						throw new RuntimeException("Error during TeX hyphenation", e); }
235 				}
236 			};
237 	}
238 	
239 	@Override
240 	public ToStringHelper toStringHelper() {
241 		return MoreObjects.toStringHelper("TexHyphenatorDotifyImpl");
242 	}
243 	
244 	private static final Logger logger = LoggerFactory.getLogger(TexHyphenatorDotifyImpl.class);
245 	
246 }