1 package org.daisy.pipeline.braille.liblouis.impl;
2
3 import java.text.Normalizer;
4 import java.util.ArrayList;
5 import java.util.Arrays;
6 import static java.util.Collections.singleton;
7 import java.util.Iterator;
8 import java.util.List;
9 import java.util.Locale;
10 import java.util.Map;
11 import java.util.stream.Collectors;
12 import java.util.regex.Matcher;
13 import java.util.regex.Pattern;
14
15 import com.google.common.base.MoreObjects;
16 import com.google.common.base.MoreObjects.ToStringHelper;
17 import com.google.common.base.Splitter;
18 import com.google.common.collect.ImmutableList;
19 import com.google.common.collect.ImmutableMap;
20
21 import static com.google.common.collect.Iterables.any;
22 import static com.google.common.collect.Iterables.size;
23 import static com.google.common.collect.Iterables.toArray;
24 import com.google.common.collect.Iterators;
25
26 import cz.vutbr.web.css.CSSProperty;
27 import cz.vutbr.web.css.CSSProperty.FontStyle;
28 import cz.vutbr.web.css.CSSProperty.FontWeight;
29 import cz.vutbr.web.css.CSSProperty.TextDecoration;
30 import cz.vutbr.web.css.Term;
31 import cz.vutbr.web.css.TermIdent;
32 import cz.vutbr.web.css.TermInteger;
33 import cz.vutbr.web.css.TermList;
34
35 import org.daisy.braille.css.BrailleCSSProperty.BrailleCharset;
36 import org.daisy.braille.css.BrailleCSSProperty.Hyphens;
37 import org.daisy.braille.css.BrailleCSSProperty.LetterSpacing;
38 import org.daisy.braille.css.BrailleCSSProperty.TextTransform;
39 import org.daisy.braille.css.BrailleCSSProperty.WhiteSpace;
40 import org.daisy.braille.css.SimpleInlineStyle;
41
42 import org.daisy.pipeline.braille.common.AbstractBrailleTranslator;
43 import org.daisy.pipeline.braille.common.AbstractBrailleTranslator.util.DefaultLineBreaker;
44 import org.daisy.pipeline.braille.common.AbstractHyphenator.util.NoHyphenator;
45 import org.daisy.pipeline.braille.common.AbstractTransformProvider;
46 import org.daisy.pipeline.braille.common.AbstractTransformProvider.util.Iterables;
47 import org.daisy.pipeline.braille.common.AbstractTransformProvider.util.Function;
48 import static org.daisy.pipeline.braille.common.AbstractTransformProvider.util.Iterables.memoize;
49 import static org.daisy.pipeline.braille.common.AbstractTransformProvider.util.Iterables.transform;
50 import static org.daisy.pipeline.braille.common.AbstractTransformProvider.util.logCreate;
51 import static org.daisy.pipeline.braille.common.AbstractTransformProvider.util.logSelect;
52 import org.daisy.pipeline.braille.common.BrailleTranslator;
53 import org.daisy.pipeline.braille.common.BrailleTranslatorProvider;
54 import org.daisy.pipeline.braille.common.CompoundBrailleTranslator;
55 import org.daisy.pipeline.braille.common.Hyphenator;
56 import org.daisy.pipeline.braille.common.Hyphenator.NonStandardHyphenationException;
57 import org.daisy.pipeline.braille.common.Query;
58 import org.daisy.pipeline.braille.common.Query.Feature;
59 import org.daisy.pipeline.braille.common.Query.MutableQuery;
60 import static org.daisy.pipeline.braille.common.Query.util.mutableQuery;
61 import org.daisy.pipeline.braille.common.TransformationException;
62 import org.daisy.pipeline.braille.common.TransformProvider;
63 import org.daisy.pipeline.braille.common.UnityBrailleTranslator;
64 import static org.daisy.pipeline.braille.common.util.Strings.extractHyphens;
65 import static org.daisy.pipeline.braille.common.util.Strings.insertHyphens;
66 import static org.daisy.pipeline.braille.common.util.Strings.join;
67 import static org.daisy.pipeline.braille.common.util.Strings.splitInclDelimiter;
68 import static org.daisy.pipeline.braille.common.util.Tuple2;
69 import org.daisy.pipeline.braille.css.CSSStyledText;
70 import org.daisy.pipeline.braille.liblouis.LiblouisTable;
71 import org.daisy.pipeline.braille.liblouis.LiblouisTranslator;
72 import org.daisy.pipeline.braille.liblouis.impl.LiblouisTableJnaImplProvider.LiblouisTableJnaImpl;
73 import org.daisy.pipeline.braille.liblouis.pef.LiblouisDisplayTableBrailleConverter;
74
75 import org.liblouis.DisplayException;
76 import org.liblouis.DisplayTable;
77 import org.liblouis.TranslationException;
78 import org.liblouis.TranslationResult;
79 import org.liblouis.Translator;
80 import org.liblouis.Typeform;
81
82 import org.osgi.service.component.annotations.Component;
83 import org.osgi.service.component.annotations.Reference;
84 import org.osgi.service.component.annotations.ReferenceCardinality;
85 import org.osgi.service.component.annotations.ReferencePolicy;
86
87 import static org.slf4j.helpers.NOPLogger.NOP_LOGGER;
88 import org.slf4j.Logger;
89 import org.slf4j.LoggerFactory;
90
91
92
93
94 @Component(
95 name = "org.daisy.pipeline.braille.liblouis.impl.LiblouisTranslatorJnaImplProvider",
96 service = {
97 LiblouisTranslator.Provider.class,
98 BrailleTranslatorProvider.class,
99 TransformProvider.class
100 }
101 )
102 public class LiblouisTranslatorJnaImplProvider extends AbstractTransformProvider<LiblouisTranslator> implements LiblouisTranslator.Provider {
103
104 private final static char SHY = '\u00AD';
105 private final static char ZWSP = '\u200B';
106 private final static char NBSP = '\u00A0';
107 private final static char LS = '\u2028';
108 private final static char RS = '\u001E';
109 private final static char US = '\u001F';
110 private final static Splitter SEGMENT_SPLITTER = Splitter.on(RS);
111 private final static Pattern ON_NBSP_SPLITTER = Pattern.compile("[" + SHY + ZWSP + "]*" + NBSP + "[" + SHY + ZWSP + NBSP + "]*");
112 private final static Pattern ON_SPACE_SPLITTER = Pattern.compile("[" + SHY + ZWSP + "]*[\\x20\t\\n\\r\\u2800" + NBSP + "][" + SHY + ZWSP + "\\x20\t\\n\\r\\u2800" + NBSP+ "]*");
113 private final static Pattern LINE_SPLITTER = Pattern.compile("[" + SHY + ZWSP + "]*[\\n\\r][" + SHY + ZWSP + "\\n\\r]*");
114 private final static Pattern WORD_SPLITTER = Pattern.compile("[\\x20\t\\n\\r\\u2800" + NBSP + "]+");
115
116 private LiblouisTableJnaImplProvider tableProvider;
117
118 @Reference(
119 name = "LiblouisTableJnaImplProvider",
120 unbind = "-",
121 service = LiblouisTableJnaImplProvider.class,
122 cardinality = ReferenceCardinality.MANDATORY,
123 policy = ReferencePolicy.STATIC
124 )
125 protected void bindLiblouisTableJnaImplProvider(LiblouisTableJnaImplProvider provider) {
126 tableProvider = provider;
127 logger.debug("Registering Liblouis table provider: " + provider);
128 }
129
130 protected void unbindLiblouisTableJnaImplProvider(LiblouisTableJnaImplProvider provider) {
131 tableProvider = null;
132 }
133
134 private final static Iterable<LiblouisTranslator> empty
135 = Iterables.<LiblouisTranslator>empty();
136
137 private final static List<String> supportedInput = ImmutableList.of("text-css");
138
139 protected final Iterable<LiblouisTranslator> _get(Query query) {
140 MutableQuery q = mutableQuery(query);
141 for (Feature f : q.removeAll("input"))
142 if (!supportedInput.contains(f.getValue().get()))
143 return empty;
144 if (q.containsKey("output")) {
145 String v = q.removeOnly("output").getValue().get();
146 if ("braille".equals(v)) {}
147 else
148 return empty; }
149 if (q.containsKey("translator"))
150 if (!"liblouis".equals(q.removeOnly("translator").getValue().get()))
151 return empty;
152 String table = null;
153 if (q.containsKey("liblouis-table"))
154 table = q.removeOnly("liblouis-table").getValue().get();
155 if (q.containsKey("table"))
156 if (table != null) {
157 logger.warn("A query with both 'table' and 'liblouis-table' never matches anything");
158 return empty; }
159 else
160 table = q.removeOnly("table").getValue().get();
161 String v = null;
162 if (q.containsKey("handle-non-standard-hyphenation"))
163 v = q.removeOnly("handle-non-standard-hyphenation").getValue().get();
164 else
165 v = "ignore";
166 final int handleNonStandardHyphenation = v.equalsIgnoreCase("fail") ?
167 LiblouisTranslatorImpl.NON_STANDARD_HYPH_FAIL : v.equalsIgnoreCase("defer") ?
168 LiblouisTranslatorImpl.NON_STANDARD_HYPH_DEFER :
169 LiblouisTranslatorImpl.NON_STANDARD_HYPH_IGNORE;
170 if (table != null)
171 q.add("table", table);
172 q.add("white-space");
173 Iterable<LiblouisTranslator> translators = memoize(
174 getSimpleTranslator(
175 q.asImmutable(),
176 handleNonStandardHyphenation));
177 if (translators.apply(NOP_LOGGER).iterator().hasNext()) {
178
179
180 DisplayTable displayTable = tableProvider.withContext(NOP_LOGGER).get(q).iterator().next().getDisplayTable();
181 BrailleTranslator unityTranslator = new UnityBrailleTranslator(
182 new LiblouisDisplayTableBrailleConverter(displayTable), false);
183 return Iterables.transform(
184 translators,
185 new Function<LiblouisTranslator,LiblouisTranslator>() {
186 public LiblouisTranslator _apply(LiblouisTranslator t) {
187 return __apply(logCreate(new HandleTextTransformNone(t, unityTranslator))); }});
188 } else
189 return translators;
190 }
191
192 private Iterable<LiblouisTranslator> getSimpleTranslator(Query query, int handleNonStandardHyphenation) {
193 return transform(
194 logSelect(query, tableProvider),
195 new Function<LiblouisTableJnaImpl,LiblouisTranslator>() {
196 public LiblouisTranslator _apply(LiblouisTableJnaImpl table) {
197 return __apply(
198 logCreate((LiblouisTranslator)new LiblouisTranslatorImpl(
199 table,
200 null,
201 handleNonStandardHyphenation)));
202 }
203 }
204 );
205 }
206
207 @Override
208 public ToStringHelper toStringHelper() {
209 return MoreObjects.toStringHelper("LiblouisTranslatorJnaImplProvider");
210 }
211
212 class LiblouisTranslatorImpl extends AbstractBrailleTranslator implements LiblouisTranslator {
213
214 private final LiblouisTableJnaImpl table;
215 protected final Translator translator;
216 private final DisplayTable displayTable;
217 private Hyphenator hyphenator;
218 protected FullHyphenator fullHyphenator;
219 private Hyphenator.LineBreaker lineBreaker;
220 private final Map<String,Typeform> supportedTypeforms;
221
222
223
224
225
226
227
228
229
230 private final int handleNonStandardHyphenation;
231 private final Normalizer.Form unicodeNormalization;
232
233 public final static int NON_STANDARD_HYPH_IGNORE = 0;
234 public final static int NON_STANDARD_HYPH_FAIL = 1;
235 public final static int NON_STANDARD_HYPH_DEFER = 2;
236
237
238
239
240
241
242
243
244
245
246
247
248 LiblouisTranslatorImpl(LiblouisTableJnaImpl table,
249 Hyphenator hyphenator,
250 int handleNonStandardHyphenation) {
251 super(hyphenator, null);
252 this.table = table;
253 this.translator = table.getTranslator();
254 this.displayTable = table.getDisplayTable();
255 this.handleNonStandardHyphenation = handleNonStandardHyphenation;
256 this.supportedTypeforms
257 = translator.getSupportedTypeforms().stream().collect(Collectors.toMap(Typeform::getName, e -> e));
258 this.unicodeNormalization = table.getUnicodeNormalizationForm();
259 this.hyphenator = hyphenator;
260 if (hyphenator == null)
261 fullHyphenator = compoundWordHyphenator;
262 else {
263 try {
264 fullHyphenator = new HyphenatorAsFullHyphenator(hyphenator); }
265 catch (UnsupportedOperationException e) {}
266 try {
267 lineBreaker = hyphenator.asLineBreaker(); }
268 catch (UnsupportedOperationException e) {}}
269 }
270
271 private LiblouisTranslatorImpl(LiblouisTranslatorImpl from, Hyphenator hyphenator) {
272 super(from);
273 this.table = from.table;
274 this.translator = from.translator;
275 this.displayTable = from.displayTable;
276 this.handleNonStandardHyphenation = from.handleNonStandardHyphenation;
277 this.supportedTypeforms = from.supportedTypeforms;
278 this.unicodeNormalization = from.unicodeNormalization;
279 this.hyphenator = hyphenator;
280 if (hyphenator == null)
281 fullHyphenator = compoundWordHyphenator;
282 else {
283 try {
284 fullHyphenator = new HyphenatorAsFullHyphenator(hyphenator); }
285 catch (UnsupportedOperationException e) {}
286 try {
287 lineBreaker = hyphenator.asLineBreaker(); }
288 catch (UnsupportedOperationException e) {}}
289 }
290
291
292 public LiblouisTable asLiblouisTable() {
293 return table;
294 }
295
296 @Override
297 public LiblouisTranslatorImpl _withHyphenator(Hyphenator hyphenator) {
298 if (hyphenator == this.hyphenator)
299 return this;
300 LiblouisTranslatorImpl t = new LiblouisTranslatorImpl(this, hyphenator);
301 LiblouisTranslatorJnaImplProvider.this.rememberId(t);
302 return t;
303 }
304
305 private FromTypeformedTextToBraille fromTypeformedTextToBraille;
306
307 public FromTypeformedTextToBraille fromTypeformedTextToBraille() {
308 if (fromTypeformedTextToBraille == null)
309 fromTypeformedTextToBraille = new FromTypeformedTextToBraille() {
310 public String[] transform(String[] text, String[] emphClasses) {
311 Typeform[] typeform = new Typeform[emphClasses.length];
312 for (int i = 0; i < typeform.length; i++) {
313 typeform[i] = supportedTypeforms.get(emphClasses[i]);
314 if (typeform[i] == null)
315 logger.warn("emphclass 'italic' not defined in table {}", translator.getTable());
316 }
317 return LiblouisTranslatorImpl.this.transform(text, typeform);
318 }
319 @Override
320 public String toString() {
321 return LiblouisTranslatorImpl.this.toString();
322 }
323 };
324 return fromTypeformedTextToBraille;
325 }
326
327 private FromStyledTextToBraille fromStyledTextToBraille;
328
329
330
331
332
333 @Override
334 public FromStyledTextToBraille fromStyledTextToBraille() {
335 if (fromStyledTextToBraille == null)
336 fromStyledTextToBraille = new FromStyledTextToBraille() {
337 public java.lang.Iterable<String> transform(java.lang.Iterable<CSSStyledText> styledText, int from, int to)
338 throws TransformationException {
339 try {
340 List<String> result = LiblouisTranslatorImpl.this.transform(styledText, false, false);
341 if (to < 0) to = result.size();
342 if (from > 0 || to < result.size())
343 return result.subList(from, to);
344 else
345 return result;
346 } catch (NonStandardHyphenationException e) {
347 throw new TransformationException(e);
348 }
349 }
350 @Override
351 public String toString() {
352 return LiblouisTranslatorImpl.this.toString();
353 }
354 };
355 return fromStyledTextToBraille;
356 }
357
358 private LineBreakingFromStyledText lineBreakingFromStyledText;
359
360 @Override
361 public LineBreakingFromStyledText lineBreakingFromStyledText() {
362 if (lineBreakingFromStyledText == null)
363 lineBreakingFromStyledText = new LineBreaker(
364 new FromStyledTextToBraille() {
365 public java.lang.Iterable<String> transform(java.lang.Iterable<CSSStyledText> styledText, int from, int to) {
366 List<String> result = LiblouisTranslatorImpl.this.transform(styledText, true, true);
367 if (to < 0) to = result.size();
368 if (from > 0 || to < result.size())
369 return result.subList(from, to);
370 else
371 return result;
372 }
373 }) {
374 @Override
375 public String toString() {
376 return LiblouisTranslatorImpl.this.toString();
377 }
378 };
379 return lineBreakingFromStyledText;
380 }
381
382 class LineBreaker extends DefaultLineBreaker {
383
384 final FromStyledTextToBraille fullTranslator;
385
386 protected LineBreaker(FromStyledTextToBraille fullTranslator) {
387
388 super(displayTable.encode('\u2800'),
389 displayTable.encode('\u2824'),
390 new LiblouisDisplayTableBrailleConverter(displayTable),
391 logger);
392 this.fullTranslator = fullTranslator;
393 }
394
395 protected BrailleStream translateAndHyphenate(java.lang.Iterable<CSSStyledText> styledText, int from, int to) {
396
397 java.lang.Iterable<CSSStyledText> styledTextCopy
398 = org.daisy.pipeline.braille.common.util.Iterables.clone(styledText);
399 java.lang.Iterable<String> braille;
400 try {
401 braille = fullTranslator.transform(styledTextCopy); }
402 catch (NonStandardHyphenationException e) {
403 return new BrailleStreamImpl(styledText,
404 from,
405 to); }
406
407 Iterator<SimpleInlineStyle> style = Iterators.transform(styledTextCopy.iterator(), CSSStyledText::getStyle);
408 List<String> brailleWithPreservedWS = new ArrayList<>(); {
409 for (String s : braille) {
410
411
412 SimpleInlineStyle st = style.next();
413 if (st != null) {
414 CSSProperty ws = st.getProperty("white-space");
415 if (ws != null) {
416 if (ws == WhiteSpace.PRE_WRAP)
417 s = s.replaceAll("[\\x20\t\\u2800]+", "$0"+ZWSP)
418 .replaceAll("[\\x20\t\\u2800]", ""+NBSP);
419 if (ws == WhiteSpace.PRE_WRAP || ws == WhiteSpace.PRE_LINE)
420 s = s.replaceAll("[\\n\\r]", ""+LS); }}
421 brailleWithPreservedWS.add(s);
422 }
423 }
424 StringBuilder joined = new StringBuilder();
425 int fromChar = 0;
426 int toChar = to >= 0 ? 0 : -1;
427 for (String s : brailleWithPreservedWS) {
428 joined.append(s);
429 if (--from == 0)
430 fromChar = joined.length();
431 if (--to == 0)
432 toChar = joined.length();
433 }
434 return new FullyHyphenatedAndTranslatedString(joined.toString(), fromChar, toChar);
435 }
436
437 class BrailleStreamImpl implements BrailleStream {
438
439 final Locale[] languages;
440
441
442
443
444 final Typeform[] typeform;
445 final boolean[] hyphenate;
446 final boolean[] preserveLines;
447 final boolean[] preserveSpace;
448 final int[] letterSpacing;
449
450
451
452 final String[] textWithWs;
453
454
455
456 final boolean[] pre;
457
458
459 final int[] textWithWsMapping;
460
461
462
463 String joinedText;
464
465
466 int[] joinedTextMapping;
467
468
469 byte[] manualHyphens;
470
471
472 String joinedBraille;
473
474
475 int[] characterIndicesInBraille;
476
477
478
479 int[] interCharacterIndicesInBraille;
480
481
482 int curPos = -1;
483 int curPosInBraille = -1;
484 int endPos = -1;
485 int endPosInBraille = -1;
486 final int to;
487
488 BrailleStreamImpl(java.lang.Iterable<CSSStyledText> styledText,
489 int from,
490 int to) {
491
492
493 int size = size(styledText);
494 if (to < 0) to = size;
495 this.to = to;
496
497
498 String[] text = new String[size];
499 SimpleInlineStyle[] styles = new SimpleInlineStyle[size];
500 languages = new Locale[size]; {
501 int i = 0;
502 for (CSSStyledText t : styledText) {
503 text[i] = t.getText();
504 styles[i] = t.getStyle();
505 languages[i] = t.getLanguage();
506 i++; }}
507
508
509 if (unicodeNormalization != null)
510 for (int k = 0; k < text.length; k++)
511 text[k] = Normalizer.normalize(text[k], unicodeNormalization);
512
513 {
514 typeform = new Typeform[size];
515 hyphenate = new boolean[size];
516 preserveLines = new boolean[size];
517 preserveSpace = new boolean[size];
518 letterSpacing = new int[size];
519 for (int i = 0; i < size; i++) {
520 typeform[i] = Typeform.PLAIN_TEXT;
521 hyphenate[i] = false;
522 preserveLines[i] = preserveSpace[i] = false;
523 letterSpacing[i] = 0;
524 SimpleInlineStyle style = styles[i];
525 if (style != null) {
526 CSSProperty val = style.getProperty("white-space");
527 if (val != null) {
528 if (val == WhiteSpace.PRE_WRAP)
529 preserveLines[i] = preserveSpace[i] = true;
530 else if (val == WhiteSpace.PRE_LINE)
531 preserveLines[i] = true;
532 style.removeProperty("white-space"); }
533 val = style.getProperty("text-transform");
534 if (val != null) {
535 if (val == TextTransform.NONE) {
536
537
538
539
540
541
542 val = style.getProperty("braille-charset");
543 if (val != null) {
544 if (val == BrailleCharset.CUSTOM)
545
546 text[i] = displayTable.decode(text[i]);
547 style.removeProperty("braille-charset"); }
548 style.removeProperty("text-transform");
549 continue; }
550 else if (val == TextTransform.AUTO) {}
551 else if (val == TextTransform.list_values) {
552 TermList values = style.getValue(TermList.class, "text-transform");
553 text[i] = textFromTextTransform(text[i], values);
554 typeform[i] = typeform[i].add(typeformFromTextTransform(values, translator, supportedTypeforms)); }
555 style.removeProperty("text-transform"); }
556 val = style.getProperty("hyphens");
557 if (val != null) {
558 if (val == Hyphens.AUTO)
559 hyphenate[i] = true;
560 else if (val == Hyphens.NONE)
561 text[i] = extractHyphens(text[i], false, SHY, ZWSP)._1;
562 style.removeProperty("hyphens"); }
563 val = style.getProperty("letter-spacing");
564 if (val != null) {
565 if (val == LetterSpacing.length) {
566 letterSpacing[i] = style.getValue(TermInteger.class, "letter-spacing").getIntValue();
567 if (letterSpacing[i] < 0) {
568 logger.warn("letter-spacing: {} not supported, must be non-negative", val);
569 letterSpacing[i] = 0; }}
570 style.removeProperty("letter-spacing"); }
571 typeform[i] = typeform[i].add(typeformFromInlineCSS(style, translator, supportedTypeforms));
572 for (String prop : style.getPropertyNames())
573 logger.warn("{}: {} not supported", prop, style.get(prop)); }}
574 }
575 {
576 List<String> l1 = new ArrayList<String>();
577 List<Boolean> l2 = new ArrayList<Boolean>();
578 List<Integer> l3 = new ArrayList<Integer>();
579 for (int i = 0; i < text.length; i++) {
580 String t = text[i];
581 if (t.isEmpty()) {
582 l1.add(t);
583 l2.add(false);
584 l3.add(i); }
585 else {
586 Pattern ws;
587 if (preserveSpace[i])
588 ws = ON_SPACE_SPLITTER;
589 else if (preserveLines[i])
590 ws = LINE_SPLITTER;
591 else
592 ws = ON_NBSP_SPLITTER;
593 boolean p = false;
594 for (String s : splitInclDelimiter(t, ws)) {
595 if (!s.isEmpty()) {
596 l1.add(s);
597 l2.add(p);
598 l3.add(i); }
599 p = !p; }}}
600 int len = l1.size();
601 textWithWs = new String[len];
602 pre = new boolean[len];
603 textWithWsMapping = new int[len];
604 for (int i = 0; i < len; i++) {
605 textWithWs[i] = l1.get(i);
606 pre[i] = l2.get(i);
607 textWithWsMapping[i] = l3.get(i); }
608 }
609 {
610 String[] textWithWsReplaced = new String[textWithWs.length];
611 for (int i = 0; i < textWithWs.length; i++)
612 textWithWsReplaced[i] = pre[i] ? ""+NBSP : textWithWs[i];
613 Tuple2<String,byte[]> t = extractHyphens(join(textWithWsReplaced, RS), true, SHY, ZWSP);
614 manualHyphens = t._2;
615 String[] nohyph = toArray(SEGMENT_SPLITTER.split(t._1), String.class);
616 joinedTextMapping = new int[lengthByCodePoints(join(nohyph))];
617 int i = 0;
618 int j = 0;
619 for (String s : nohyph) {
620 int l = lengthByCodePoints(s);
621 for (int k = 0; k < l; k++)
622 joinedTextMapping[i++] = j;
623 j++; }
624 t = extractHyphens(manualHyphens, t._1, true, null, null, null, RS);
625 joinedText = t._1;
626 }
627 {
628 int fromChar = -1;
629 int toChar = -1;
630 for (int i = 0; i < joinedTextMapping.length; i++) {
631 if (fromChar < 0 || (toChar < 0 && to >= 0)) {
632 int indexInText = textWithWsMapping[joinedTextMapping[i]];
633 if (fromChar < 0 && indexInText >= from)
634 fromChar = i;
635 if (toChar < 0 && indexInText >= to)
636 toChar = i;
637 } else
638 break;
639 }
640 if (toChar < 0) toChar = joinedTextMapping.length;
641 this.curPos = fromChar;
642 this.endPos = toChar;
643 }
644 }
645
646 public String next(final int limit, final boolean force, boolean allowHyphens) {
647 String next = "";
648 if (limit > 0) {
649 int available = limit;
650 segments: while (true) {
651 if (curPos == endPos)
652 break;
653 if (joinedBraille == null)
654 updateBraille();
655 int curSegment = joinedTextMapping[curPos];
656 int curSegmentEnd; {
657 int i = curPos;
658 for (; i < endPos; i++)
659 if (joinedTextMapping[i] > curSegment)
660 break;
661 curSegmentEnd = i; }
662 int curSegmentEndInBraille = positionInBraille(curSegmentEnd);
663 if (curSegmentEndInBraille == curPosInBraille)
664 continue segments;
665 String segment = substringByCodePoints(joinedText, curPos, curSegmentEnd);
666 String segmentInBraille = joinedBraille.substring(curPosInBraille, curSegmentEndInBraille);
667 byte[] segmentManualHyphens = manualHyphens != null
668 ? Arrays.copyOfRange(manualHyphens, curPos, curSegmentEnd - 1)
669 : null;
670
671
672 if (pre[curSegment]) {
673 Matcher m = Pattern.compile("\\xA0([\\xAD\\u200B]*)").matcher(segmentInBraille);
674 if (m.matches()) {
675 String restoredSpace = segment.replaceAll("[\\x20\t\\u2800]", ""+NBSP)
676 .replaceAll("[\\n\\r]", ""+LS) + m.group(1);
677 next += restoredSpace;
678 available -= lengthByCodePoints(restoredSpace);
679 curPos = curSegmentEnd;
680 curPosInBraille = curSegmentEndInBraille;
681 continue segments; }}
682
683
684
685 if (segmentInBraille.length() <= available) {
686 segmentInBraille = addLetterSpacing(segment, segmentInBraille, curPos, curPosInBraille,
687 letterSpacing[textWithWsMapping[curSegment]]);
688 next += segmentInBraille;
689 available -= segmentInBraille.length();
690 curPos = curSegmentEnd;
691 curPosInBraille = curSegmentEndInBraille;
692 continue segments; }
693
694
695 Locale language = languages[textWithWsMapping[curSegment]];
696 if (!hyphenate[textWithWsMapping[curSegment]]) {
697 segmentInBraille = addHyphensAndLetterSpacing(compoundWordHyphenator,
698 segment, segmentInBraille, curPos, curPosInBraille,
699 segmentManualHyphens, language,
700 letterSpacing[textWithWsMapping[curSegment]]);
701 next += segmentInBraille;
702 available -= segmentInBraille.length();
703 curPos = curSegmentEnd;
704 curPosInBraille = curSegmentEndInBraille;
705 continue segments; }
706
707
708 if (fullHyphenator != null) {
709 if (fullHyphenator == compoundWordHyphenator)
710 logger.warn("hyphens: auto not supported");
711 try {
712 segmentInBraille = addHyphensAndLetterSpacing(fullHyphenator, segment, segmentInBraille, curPos, curPosInBraille,
713 segmentManualHyphens, language,
714 letterSpacing[textWithWsMapping[curSegment]]);
715 next += segmentInBraille;
716 available -= segmentInBraille.length();
717 curPos = curSegmentEnd;
718 curPosInBraille = curSegmentEndInBraille;
719 continue segments; }
720 catch (NonStandardHyphenationException e) {}}
721
722
723 Matcher m = WORD_SPLITTER.matcher(segment);
724 int segmentStart = curPos;
725 boolean foundSpace;
726 while ((foundSpace = m.find()) || curPos < curSegmentEnd) {
727 int wordEnd = foundSpace ? segmentStart + m.start() : curSegmentEnd;
728 if (wordEnd > curPos) {
729 int wordEndInBraille = positionInBraille(wordEnd);
730 if (wordEndInBraille > curPosInBraille) {
731 String word = substringByCodePoints(joinedText, curPos, wordEnd);
732 String wordInBraille = joinedBraille.substring(curPosInBraille, wordEndInBraille);
733 byte[] wordManualHyphens = manualHyphens != null
734 ? Arrays.copyOfRange(manualHyphens, curPos, wordEnd - 1)
735 : null;
736
737
738 if (wordInBraille.length() <= available) {
739 next += wordInBraille;
740 available -= wordInBraille.length();
741 curPos = wordEnd;
742 curPosInBraille = wordEndInBraille; }
743 else {
744
745
746 try {
747 if (fullHyphenator == null) throw new NonStandardHyphenationException();
748 wordInBraille = addHyphensAndLetterSpacing(fullHyphenator, word, wordInBraille, curPos, curPosInBraille,
749 wordManualHyphens, language,
750 letterSpacing[textWithWsMapping[curSegment]]);
751 next += wordInBraille;
752 available -= wordInBraille.length();
753 curPos = wordEnd;
754 curPosInBraille = wordEndInBraille; }
755 catch (NonStandardHyphenationException ee) {
756
757
758
759
760
761 if (!next.isEmpty())
762 break segments;
763
764
765 if (lineBreaker == null) throw ee;
766 Hyphenator.LineIterator lines = lineBreaker.transform(word, language);
767
768
769 LineBreakSolution bestSolution = null;
770 int left = 1;
771 int right = lengthByCodePoints(word) - 1;
772 int textAvailable = available;
773 if (textAvailable > right)
774 textAvailable = right;
775 if (textAvailable < left)
776 break segments;
777 while (true) {
778 String line = lines.nextLine(textAvailable, force && next.isEmpty(), allowHyphens);
779 String replacementWord = line + lines.remainder();
780 if (updateInput(curPos, wordEnd, replacementWord)) {
781 wordEnd = curPos + lengthByCodePoints(replacementWord);
782 updateBraille(); }
783 int lineEnd = curPos + lengthByCodePoints(line);
784 int lineEndInBraille = positionInBraille(lineEnd);
785 String lineInBraille = joinedBraille.substring(curPosInBraille, lineEndInBraille);
786 lineInBraille = addLetterSpacing(line, lineInBraille, curPos, curPosInBraille,
787 letterSpacing[textWithWsMapping[curSegment]]);
788 int lineInBrailleLength = lineInBraille.length();
789 if (lines.lineHasHyphen()) {
790 lineInBraille += "\u00ad";
791 lineInBrailleLength++; }
792 if (lineInBrailleLength == available) {
793 bestSolution = new LineBreakSolution(); {
794 bestSolution.line = line;
795 bestSolution.replacementWord = replacementWord;
796 bestSolution.lineInBraille = lineInBraille;
797 bestSolution.lineInBrailleLength = lineInBrailleLength; }
798 left = textAvailable + 1;
799 right = textAvailable - 1; }
800 else if (lineInBrailleLength < available) {
801 left = textAvailable + 1;
802 if (bestSolution == null || lineInBrailleLength > bestSolution.lineInBrailleLength) {
803 bestSolution = new LineBreakSolution(); {
804 bestSolution.line = line;
805 bestSolution.replacementWord = replacementWord;
806 bestSolution.lineInBraille = lineInBraille;
807 bestSolution.lineInBrailleLength = lineInBrailleLength; }}}
808 else
809 right = textAvailable - 1;
810 lines.reset();
811 textAvailable = (right + left) / 2;
812 if (textAvailable < left || textAvailable > right) {
813 if (bestSolution != null) {
814 next += bestSolution.lineInBraille;
815 available = 0;
816 if (updateInput(curPos, wordEnd, bestSolution.replacementWord))
817 updateBraille();
818 curPos += lengthByCodePoints(bestSolution.line);
819 curPosInBraille = positionInBraille(curPos); }
820 else if (force && next.isEmpty()) {
821 next = wordInBraille;
822 available = 0;
823 curPos = wordEnd;
824 curPosInBraille = wordEndInBraille; }
825 break segments; } }}}}}
826 if (foundSpace) {
827 int spaceEnd = segmentStart + m.end();
828 int spaceEndInBraille = positionInBraille(spaceEnd);
829 if (spaceEndInBraille > curPosInBraille) {
830 String spaceInBraille = joinedBraille.substring(curPosInBraille, spaceEndInBraille);
831 next += spaceInBraille;
832 available -= spaceInBraille.length();
833 curPos = spaceEnd;
834 curPosInBraille = spaceEndInBraille; }}}}
835 }
836 if (lastPeek != null && !next.isEmpty() && next.charAt(0) != lastPeek)
837 throw new IllegalStateException();
838 lastPeek = null;
839 return next;
840 }
841
842 public boolean hasNext() {
843 if (joinedBraille == null)
844 updateBraille();
845 boolean hasNextOutput = curPosInBraille < endPosInBraille;
846 boolean hasNextInput = curPos < endPos;
847 if (hasNextInput != hasNextOutput)
848 throw new RuntimeException("coding error");
849 return hasNextOutput;
850 }
851
852 Character lastPeek = null;
853
854 public Character peek() {
855 if (joinedBraille == null)
856 updateBraille();
857 lastPeek = joinedBraille.charAt(curPosInBraille);
858 return lastPeek;
859 }
860
861
862
863 public String remainder() {
864 if (joinedBraille == null)
865 updateBraille();
866 return joinedBraille.substring(curPosInBraille, endPosInBraille);
867 }
868
869
870
871 public boolean hasPrecedingSpace() {
872 if (joinedBraille == null)
873 updateBraille();
874 return DefaultLineBreaker.hasPrecedingSpace(joinedBraille, curPosInBraille);
875 }
876
877 @Override
878 public Object clone() {
879 try {
880 BrailleStreamImpl clone = (BrailleStreamImpl)super.clone();
881 if (joinedTextMapping != null)
882 clone.joinedTextMapping = joinedTextMapping.clone();
883 if (manualHyphens != null)
884 clone.manualHyphens = manualHyphens.clone();
885 if (characterIndicesInBraille != null)
886 clone.characterIndicesInBraille = characterIndicesInBraille.clone();
887 if (interCharacterIndicesInBraille != null)
888 clone.interCharacterIndicesInBraille = interCharacterIndicesInBraille.clone();
889 return clone;
890 } catch (CloneNotSupportedException e) {
891 throw new InternalError("coding error");
892 }
893 }
894
895 private int positionInBraille(int pos) {
896 int posInBraille = curPosInBraille;
897 if (posInBraille < 0) posInBraille = 0;
898 for (; posInBraille < joinedBraille.length(); posInBraille++)
899 if (characterIndicesInBraille[posInBraille] >= pos)
900 break;
901 return posInBraille;
902 }
903
904 private String addHyphensAndLetterSpacing(FullHyphenator fullHyphenator,
905 String segment,
906 String segmentInBraille,
907 int curPos,
908 int curPosInBraille,
909 byte[] manualHyphens,
910 Locale language,
911 int letterSpacing) {
912 byte[] hyphens = fullHyphenator.hyphenate(
913
914 insertHyphens(segment, manualHyphens, true, SHY, ZWSP),
915 language);
916
917 byte[] hyphensAndLetterBoundaries
918 = (letterSpacing > 0) ? detectLetterBoundaries(hyphens, segment, (byte)4) : hyphens;
919 if (hyphensAndLetterBoundaries == null && manualHyphens == null)
920 return segment;
921 byte[] hyphensAndLetterBoundariesInBraille = new byte[segmentInBraille.length() - 1];
922 if (hyphensAndLetterBoundaries != null) {
923 for (int i = 0; i < hyphensAndLetterBoundariesInBraille.length; i++) {
924 int pos = interCharacterIndicesInBraille[curPosInBraille + i] - 1;
925 if (pos >= 0)
926 hyphensAndLetterBoundariesInBraille[i] = hyphensAndLetterBoundaries[pos - curPos];
927 }
928 }
929 String r = insertHyphens(segmentInBraille, hyphensAndLetterBoundariesInBraille, false, SHY, ZWSP, US);
930 return (letterSpacing > 0) ? applyLetterSpacing(r, letterSpacing) : r;
931 }
932
933 private String addLetterSpacing(String segment,
934 String segmentInBraille,
935 int curPos,
936 int curPosInBraille,
937 int letterSpacing) {
938 if (letterSpacing > 0) {
939
940 byte[] letterBoundaries = detectLetterBoundaries(null, segment, (byte)1);
941 byte[] letterBoundariesInBraille = new byte[segmentInBraille.length() - 1];
942 for (int i = 0; i < letterBoundariesInBraille.length; i++) {
943 int pos = interCharacterIndicesInBraille[curPosInBraille + i] - 1;
944 if (pos >= 0)
945 letterBoundariesInBraille[i] = letterBoundaries[pos - curPos];
946 }
947 return applyLetterSpacing(insertHyphens(segmentInBraille, letterBoundariesInBraille, false, US), letterSpacing); }
948 else
949 return segmentInBraille;
950 }
951
952 private boolean updateInput(int start, int end, String replacement) {
953 if (substringByCodePoints(joinedText, start, end).equals(replacement))
954 return false;
955 joinedText = substringByCodePoints(joinedText, 0, start) + replacement + substringByCodePoints(joinedText, end);
956 {
957 int[] updatedJoinedTextMapping = new int[lengthByCodePoints(joinedText)];
958 int i = 0;
959 int j = 0;
960 while (i < start)
961 updatedJoinedTextMapping[j++] = joinedTextMapping[i++];
962 int startSegment = joinedTextMapping[start];
963 while (i < end)
964 if (joinedTextMapping[i++] != startSegment)
965 throw new RuntimeException("Coding error");
966 while (j < start + lengthByCodePoints(replacement))
967 updatedJoinedTextMapping[j++] = startSegment;
968 while (j < updatedJoinedTextMapping.length)
969 updatedJoinedTextMapping[j++] = joinedTextMapping[i++];
970 joinedTextMapping = updatedJoinedTextMapping;
971 }
972
973 if (manualHyphens != null) {
974 byte[] updatedManualHyphens = new byte[lengthByCodePoints(joinedText) - 1];
975 int i = 0;
976 int j = 0;
977 while (i < start)
978 updatedManualHyphens[j++] = manualHyphens[i++];
979 while (j < start + lengthByCodePoints(replacement) - 1)
980 updatedManualHyphens[j++] = 0;
981 i = end - 1;
982 while (j < updatedManualHyphens.length)
983 updatedManualHyphens[j++] = manualHyphens[i++];
984 manualHyphens = updatedManualHyphens;
985 }
986 {
987 int toChar = -1;
988 if (to >= 0)
989 for (int i = 0; i < joinedTextMapping.length; i++)
990 if (textWithWsMapping[joinedTextMapping[i]] >= to) {
991 toChar = i;
992 break; }
993 this.endPos = toChar > 0 ? toChar : joinedTextMapping.length;
994 }
995 return true;
996 }
997
998
999 private void updateBraille() {
1000 int joinedTextLength = lengthByCodePoints(joinedText);
1001
1002 String partBeforeCurPos = curPosInBraille > 0 ? joinedBraille.substring(0, curPosInBraille): null;
1003 int[] characterIndices = new int[joinedTextLength]; {
1004 for (int i = 0; i < joinedTextLength; i++)
1005 characterIndices[i] = i; }
1006 int[] interCharacterIndices = new int[joinedTextLength - 1]; {
1007 for (int i = 0; i < joinedTextLength - 1; i++)
1008 interCharacterIndices[i] = i + 1; }
1009
1010
1011 Typeform[] _typeform = null;
1012 for (Typeform t : typeform)
1013 if (t != Typeform.PLAIN_TEXT) {
1014 _typeform = new Typeform[joinedTextLength];
1015 for (int i = 0; i < _typeform.length; i++)
1016 _typeform[i] = typeform[textWithWsMapping[joinedTextMapping[i]]];
1017 break; }
1018 try {
1019 TranslationResult r = translator.translate(joinedText, _typeform, characterIndices, interCharacterIndices, displayTable);
1020 joinedBraille = r.getBraille();
1021 if (lengthByCodePoints(joinedBraille) != joinedBraille.length())
1022 throw new RuntimeException();
1023 characterIndicesInBraille = r.getCharacterAttributes();
1024 interCharacterIndicesInBraille = r.getInterCharacterAttributes(); }
1025 catch (TranslationException e) {
1026 throw new RuntimeException(e); }
1027 catch (DisplayException e) {
1028 throw new RuntimeException(e); }
1029 if (partBeforeCurPos != null)
1030 if (!joinedBraille.substring(0, curPosInBraille).equals(partBeforeCurPos))
1031 throw new IllegalStateException();
1032 int newCurPosInBraille = positionInBraille(curPos);
1033 if (curPosInBraille >= 0) {
1034 if (curPosInBraille != newCurPosInBraille)
1035 throw new IllegalStateException();
1036 } else
1037 curPosInBraille = newCurPosInBraille;
1038 endPosInBraille = positionInBraille(endPos);
1039 }
1040 }
1041 }
1042
1043 private List<String> transform(java.lang.Iterable<CSSStyledText> styledText,
1044 boolean forceBraille,
1045 boolean failWhenNonStandardHyphenation) throws NonStandardHyphenationException {
1046 try {
1047 if (fullHyphenator == compoundWordHyphenator)
1048 if (any(styledText, t -> {
1049 SimpleInlineStyle style = t.getStyle();
1050 return style != null && style.getProperty("hyphens") == Hyphens.AUTO; }))
1051 logger.warn("hyphens: auto not supported");
1052 styledText = fullHyphenator.transform(styledText); }
1053 catch (NonStandardHyphenationException e) {
1054 if (failWhenNonStandardHyphenation)
1055 throw e;
1056 else
1057 switch (handleNonStandardHyphenation) {
1058 case NON_STANDARD_HYPH_IGNORE:
1059 logger.warn("hyphens: auto can not be applied due to non-standard hyphenation points.");
1060 break;
1061 case NON_STANDARD_HYPH_FAIL:
1062 logger.error("hyphens: auto can not be applied due to non-standard hyphenation points.");
1063 throw e;
1064 case NON_STANDARD_HYPH_DEFER:
1065 if (forceBraille) {
1066 logger.error("hyphens: auto can not be applied due to non-standard hyphenation points.");
1067 throw e; }
1068 logger.debug("Deferring hyphenation to formatting phase due to non-standard hyphenation points.");
1069
1070
1071 List<String> result = new ArrayList<>();
1072 for (CSSStyledText t : styledText) result.add(t.getText());
1073 return result; }}
1074
1075 int size = size(styledText);
1076 String[] text = new String[size];
1077 SimpleInlineStyle[] style = new SimpleInlineStyle[size];
1078 int i = 0;
1079 for (CSSStyledText t : styledText) {
1080 text[i] = t.getText();
1081 style[i] = t.getStyle();
1082 if (style[i] != null)
1083 style[i].removeProperty("hyphens");
1084 i++; }
1085 return Arrays.asList(transform(text, style));
1086 }
1087
1088 private String[] transform(String[] text, SimpleInlineStyle[] styles) {
1089 int size = text.length;
1090 Typeform[] typeform = new Typeform[size];
1091 boolean[] preserveLines = new boolean[size];
1092 boolean[] preserveSpace = new boolean[size];
1093 int[] letterSpacing = new int[size];
1094 for (int i = 0; i < size; i++) {
1095 typeform[i] = Typeform.PLAIN_TEXT;
1096 preserveLines[i] = preserveSpace[i] = false;
1097 letterSpacing[i] = 0;
1098 SimpleInlineStyle style = styles[i];
1099 if (style != null) {
1100 CSSProperty val = style.getProperty("white-space");
1101 if (val != null) {
1102 if (val == WhiteSpace.PRE_WRAP)
1103 preserveLines[i] = preserveSpace[i] = true;
1104 else if (val == WhiteSpace.PRE_LINE)
1105 preserveLines[i] = true;
1106
1107 }
1108 val = style.getProperty("text-transform");
1109 if (val != null) {
1110 if (val == TextTransform.NONE) {
1111
1112
1113
1114
1115
1116 val = style.getProperty("braille-charset");
1117 if (val != null) {
1118 if (val == BrailleCharset.CUSTOM)
1119
1120 text[i] = displayTable.decode(text[i]);
1121 style.removeProperty("braille-charset"); }
1122 style.removeProperty("text-transform");
1123 continue; }
1124 else if (val == TextTransform.AUTO) {}
1125 else if (val == TextTransform.list_values) {
1126 TermList values = style.getValue(TermList.class, "text-transform");
1127 text[i] = textFromTextTransform(text[i], values);
1128 typeform[i] = typeform[i].add(typeformFromTextTransform(values, translator, supportedTypeforms)); }
1129 style.removeProperty("text-transform"); }
1130 val = style.getProperty("letter-spacing");
1131 if (val != null) {
1132 if (val == LetterSpacing.length) {
1133 letterSpacing[i] = style.getValue(TermInteger.class, "letter-spacing").getIntValue();
1134 if (letterSpacing[i] < 0) {
1135 logger.warn("letter-spacing: {} not supported, must be non-negative", val);
1136 letterSpacing[i] = 0; }}
1137 style.removeProperty("letter-spacing"); }
1138 typeform[i] = typeform[i].add(typeformFromInlineCSS(style, translator, supportedTypeforms));
1139 for (String prop : style.getPropertyNames())
1140 if (!"white-space".equals(prop))
1141 logger.warn("{}: {} not supported", prop, style.get(prop)); }}
1142
1143 return transform(text, typeform, preserveLines, preserveSpace, letterSpacing);
1144 }
1145
1146 private String[] transform(String[] text, Typeform[] typeform) {
1147 int size = text.length;
1148 boolean[] preserveLines = new boolean[size];
1149 boolean[] preserveSpace = new boolean[size];
1150 int[] letterSpacing = new int[size];
1151 for (int i = 0; i < text.length; i++) {
1152 preserveLines[i] = preserveSpace[i] = false;
1153 letterSpacing[i] = 0; }
1154 return transform(text, typeform, preserveLines, preserveSpace, letterSpacing);
1155 }
1156
1157
1158 private String applyLetterSpacing(String text, int letterSpacing) {
1159 String space = "";
1160 for (int i = 0; i < letterSpacing; i++)
1161 space += NBSP;
1162 return text.replaceAll("\u001F", space);
1163 }
1164
1165 private String[] transform(String[] text,
1166 Typeform[] typeform,
1167 boolean[] preserveLines,
1168 boolean[] preserveSpace,
1169 int[] letterSpacing) {
1170
1171
1172 if (unicodeNormalization != null)
1173 for (int k = 0; k < text.length; k++)
1174 text[k] = Normalizer.normalize(text[k], unicodeNormalization);
1175
1176
1177
1178 String[] textWithWs;
1179
1180
1181 boolean[] pre;
1182
1183 int[] textWithWsMapping; {
1184 List<String> l1 = new ArrayList<String>();
1185 List<Boolean> l2 = new ArrayList<Boolean>();
1186 List<Integer> l3 = new ArrayList<Integer>();
1187 for (int i = 0; i < text.length; i++) {
1188 String t = text[i];
1189 if (t.isEmpty()) {
1190 l1.add(t);
1191 l2.add(false);
1192 l3.add(i); }
1193 else {
1194 Pattern ws;
1195 if (preserveSpace[i])
1196 ws = ON_SPACE_SPLITTER;
1197 else if (preserveLines[i])
1198 ws = LINE_SPLITTER;
1199 else
1200 ws = ON_NBSP_SPLITTER;
1201 boolean p = false;
1202 for (String s : splitInclDelimiter(t, ws)) {
1203 if (!s.isEmpty()) {
1204 l1.add(s);
1205 l2.add(p);
1206 l3.add(i); }
1207 p = !p; }}}
1208 int len = l1.size();
1209 textWithWs = new String[len];
1210 pre = new boolean[len];
1211 textWithWsMapping = new int[len];
1212 for (int i = 0; i < len; i++) {
1213 textWithWs[i] = l1.get(i);
1214 pre[i] = l2.get(i);
1215 textWithWsMapping[i] = l3.get(i); }
1216 }
1217
1218
1219
1220 String joinedText;
1221
1222 int[] joinedTextMapping;
1223
1224
1225 byte[] inputAttrs; {
1226 String[] textWithWsReplaced = new String[textWithWs.length];
1227 for (int i = 0; i < textWithWs.length; i++)
1228 textWithWsReplaced[i] = pre[i] ? ""+NBSP : textWithWs[i];
1229 Tuple2<String,byte[]> t = extractHyphens(join(textWithWsReplaced, RS), true, SHY, ZWSP);
1230 joinedText = t._1;
1231 inputAttrs = t._2;
1232 String[] nohyph = toArray(SEGMENT_SPLITTER.split(joinedText), String.class);
1233 joinedTextMapping = new int[lengthByCodePoints(join(nohyph))];
1234 int i = 0;
1235 int j = 0;
1236 for (String s : nohyph) {
1237 int l = lengthByCodePoints(s);
1238 for (int k = 0; k < l; k++)
1239 joinedTextMapping[i++] = j;
1240 j++; }
1241 t = extractHyphens(inputAttrs, joinedText, true, null, null, null, RS);
1242 joinedText = t._1;
1243 inputAttrs = t._2;
1244 if (joinedText.matches("\\xA0*"))
1245 return text;
1246 if (inputAttrs == null)
1247 inputAttrs = new byte[lengthByCodePoints(joinedText) - 1];
1248 }
1249
1250
1251 boolean someLetterSpacing = false; {
1252 for (int i = 0; i < letterSpacing.length; i++)
1253 if (letterSpacing[i] > 0) someLetterSpacing = true; }
1254 if (someLetterSpacing)
1255
1256 inputAttrs = detectLetterBoundaries(inputAttrs, joinedText, (byte)4);
1257
1258
1259 Typeform[] _typeform = null;
1260 for (Typeform t : typeform)
1261 if (t != Typeform.PLAIN_TEXT) {
1262 _typeform = new Typeform[lengthByCodePoints(joinedText)];
1263 for (int i = 0; i < _typeform.length; i++)
1264 _typeform[i] = typeform[textWithWsMapping[joinedTextMapping[i]]];
1265 break; }
1266
1267
1268 String[] brailleWithWs;
1269 try {
1270
1271
1272 String joinedBrailleWithoutHyphens;
1273 String joinedBraille;
1274 byte[] outputAttrs; {
1275 int[] inputAttrsAsInt = new int[inputAttrs.length];
1276 for (int i = 0; i < inputAttrs.length; i++)
1277 inputAttrsAsInt[i] = inputAttrs[i];
1278 TranslationResult r = translator.translate(joinedText, _typeform, null, inputAttrsAsInt, displayTable);
1279 joinedBrailleWithoutHyphens = r.getBraille();
1280 if (lengthByCodePoints(joinedBrailleWithoutHyphens) != joinedBrailleWithoutHyphens.length())
1281 throw new RuntimeException();
1282 int [] outputAttrsAsInt = r.getInterCharacterAttributes();
1283 if (outputAttrsAsInt != null) {
1284 outputAttrs = new byte[outputAttrsAsInt.length];
1285 for (int i = 0; i < outputAttrs.length; i++)
1286 outputAttrs[i] = (byte)outputAttrsAsInt[i];
1287 joinedBraille = insertHyphens(joinedBrailleWithoutHyphens, outputAttrs, false, SHY, ZWSP, US, RS); }
1288 else {
1289 joinedBraille = joinedBrailleWithoutHyphens;
1290 outputAttrs = null; }
1291 }
1292
1293
1294 if (textWithWs.length == 1)
1295 brailleWithWs = new String[]{joinedBraille};
1296 else {
1297
1298
1299 {
1300 brailleWithWs = new String[textWithWs.length];
1301 int i = 0;
1302 int imax = lengthByCodePoints(joinedText);
1303 int kmax = textWithWs.length;
1304 int k = (i < imax) ? joinedTextMapping[i] : kmax;
1305 int l = 0;
1306 while (l < k) brailleWithWs[l++] = "";
1307 for (String s : SEGMENT_SPLITTER.split(joinedBraille)) {
1308 brailleWithWs[l++] = s;
1309 while (k < l)
1310 k = (++i < imax) ? joinedTextMapping[i] : kmax;
1311 while (l < k)
1312 brailleWithWs[l++] = ""; }
1313 if (l == kmax) {
1314 boolean wsLost = false;
1315 for (k = 0; k < kmax; k++)
1316 if (pre[k]) {
1317 Matcher m = Pattern.compile("\\xA0([\\xAD\\u200B]*)").matcher(brailleWithWs[k]);
1318 if (m.matches())
1319 brailleWithWs[k] = textWithWs[k] + m.group(1);
1320 else
1321 wsLost = true; }
1322 if (wsLost) {
1323 logger.warn("White space was not preserved (see detailed log for more info)");
1324 logger.debug("White space was lost in the output.\n"
1325 + "Input: " + Arrays.toString(textWithWs) + "\n"
1326 + "Output: " + Arrays.toString(brailleWithWs)); }}
1327 else {
1328 logger.warn("Text segmentation was lost (see detailed log for more info)");
1329 logger.debug("Text segmentation was lost in the output. Falling back to fuzzy mode.\n"
1330 + "=> input segments: " + Arrays.toString(textWithWs) + "\n"
1331 + "=> output segments: " + Arrays.toString(Arrays.copyOf(brailleWithWs, l)));
1332 brailleWithWs = null; }
1333 }
1334
1335
1336 if (brailleWithWs == null) {
1337
1338
1339
1340 int[] inputSegmentNumbers = joinedTextMapping;
1341
1342
1343 TranslationResult r = translator.translate(joinedText, _typeform, inputSegmentNumbers, null, displayTable);
1344 if (!r.getBraille().equals(joinedBrailleWithoutHyphens))
1345 throw new RuntimeException("Coding error");
1346 int[] outputSegmentNumbers = r.getCharacterAttributes();
1347 brailleWithWs = new String[textWithWs.length];
1348 boolean wsLost = false;
1349 StringBuffer b = new StringBuffer();
1350 int jmax = joinedBrailleWithoutHyphens.length();
1351 int kmax = textWithWs.length;
1352 int k = joinedTextMapping[0];
1353 int l = 0;
1354 while (l < k)
1355 brailleWithWs[l++] = "";
1356 for (int j = 0; j < jmax; j++) {
1357 if (outputSegmentNumbers[j] > l) {
1358 brailleWithWs[l] = b.toString();
1359 b = new StringBuffer();
1360
1361 if (j > 0 && (outputAttrs[j - 1] & 8) == 8) {
1362 if (pre[l]) {
1363 Matcher m = Pattern.compile("\\xA0([\\xAD\\u200B]*)").matcher(brailleWithWs[l]);
1364 if (m.matches())
1365 brailleWithWs[l] = textWithWs[l] + m.group(1);
1366 else
1367 wsLost = true; }}
1368 else {
1369 if (pre[l])
1370 wsLost = true;
1371 if (l <= kmax && pre[l + 1]) {
1372 pre[l + 1] = false;
1373 wsLost = true; }}
1374 l++;
1375 while (outputSegmentNumbers[j] > l) {
1376 brailleWithWs[l] = "";
1377 if (pre[l])
1378 wsLost = true;
1379 l++; }}
1380 b.append(joinedBrailleWithoutHyphens.charAt(j));
1381 if (j < jmax - 1) {
1382
1383 if ((outputAttrs[j] & 1) == 1)
1384 b.append(SHY);
1385 if ((outputAttrs[j] & 2) == 2)
1386 b.append(ZWSP);
1387 if ((outputAttrs[j] & 4) == 4)
1388 b.append(US); }}
1389 brailleWithWs[l] = b.toString();
1390 if (pre[l])
1391 if (brailleWithWs[l].equals(""+NBSP))
1392 brailleWithWs[l] = textWithWs[l];
1393 else
1394 wsLost = true;
1395 l++;
1396 while (l < kmax) {
1397 if (pre[l])
1398 wsLost = true;
1399 brailleWithWs[l++] = ""; }
1400 if (wsLost) {
1401 logger.warn("White space was not preserved: " + joinedText.replaceAll("\\s+"," "));
1402 logger.debug("White space was lost in the output.\n"
1403 + "Input: " + Arrays.toString(textWithWs) + "\n"
1404 + "Output: " + Arrays.toString(brailleWithWs)); }
1405 }
1406 }
1407 } catch (TranslationException e) {
1408 throw new RuntimeException(e);
1409 } catch (DisplayException e) {
1410 throw new RuntimeException(e); }
1411
1412
1413 String braille[] = new String[text.length];
1414 for (int i = 0; i < braille.length; i++)
1415 braille[i] = "";
1416 for (int j = 0; j < brailleWithWs.length; j++)
1417 braille[textWithWsMapping[j]] += brailleWithWs[j];
1418
1419
1420 if (someLetterSpacing)
1421 for (int i = 0; i < braille.length; i++)
1422 braille[i] = applyLetterSpacing(braille[i], letterSpacing[i]);
1423
1424 return braille;
1425 }
1426
1427
1428
1429
1430 private byte[] detectLetterBoundaries(byte[] addTo, String text, byte val) {
1431 if (addTo == null)
1432 addTo = new byte[lengthByCodePoints(text) - 1];
1433 int i = 0;
1434 int prev = -1;
1435 for (int c : text.codePoints().toArray()) {
1436 if (i > 0 && ((Character.isLetter(c) && Character.isLetter(prev)) ||
1437 c == '-' ||
1438 prev == '-'))
1439 addTo[i - 1] |= val;
1440 if (i < addTo.length && c == '\u00ad')
1441 addTo[i] |= val;
1442 prev = c;
1443 i++;
1444 }
1445 return addTo;
1446 }
1447
1448 @Override
1449 public ToStringHelper toStringHelper() {
1450 return MoreObjects.toStringHelper("LiblouisTranslatorJnaImplProvider$LiblouisTranslatorImpl")
1451 .add("translator", translator)
1452 .add("displayTable", displayTable)
1453 .add("hyphenator", hyphenator);
1454 }
1455
1456 @Override
1457 public int hashCode() {
1458 final int prime = 31;
1459 int hash = 1;
1460 hash = prime * hash + translator.hashCode();
1461 hash = prime * hash + ((hyphenator == null) ? 0 : hyphenator.hashCode());
1462 hash = prime * hash + handleNonStandardHyphenation;
1463 return hash;
1464 }
1465
1466 @Override
1467 public boolean equals(Object object) {
1468 if (this == object)
1469 return true;
1470 if (object == null)
1471 return false;
1472 if (object.getClass() != LiblouisTranslatorImpl.class)
1473 return false;
1474 LiblouisTranslatorImpl that = (LiblouisTranslatorImpl)object;
1475 if (!this.translator.equals(that.translator))
1476 return false;
1477 if (!this.displayTable.equals(that.displayTable))
1478 return false;
1479 if (this.hyphenator == null && that.hyphenator != null)
1480 return false;
1481 if (this.hyphenator != null && that.hyphenator == null)
1482 return false;
1483 if (!this.hyphenator.equals(that.hyphenator))
1484 return false;
1485 return true;
1486 }
1487 }
1488
1489 private class HandleTextTransformNone extends CompoundBrailleTranslator implements LiblouisTranslator {
1490
1491 final LiblouisTranslator translator;
1492
1493 HandleTextTransformNone(LiblouisTranslator translator, BrailleTranslator unityTranslator) {
1494 super(translator, ImmutableMap.of("none", () -> unityTranslator));
1495 this.translator = translator;
1496 }
1497
1498 private HandleTextTransformNone(CompoundBrailleTranslator from, LiblouisTranslator translator) {
1499 super(from);
1500 this.translator = translator;
1501 }
1502
1503 @Override
1504 public HandleTextTransformNone _withHyphenator(Hyphenator hyphenator) {
1505 HandleTextTransformNone t = new HandleTextTransformNone(
1506 (CompoundBrailleTranslator)super._withHyphenator(hyphenator),
1507 translator);
1508 LiblouisTranslatorJnaImplProvider.this.rememberId(t);
1509 return t;
1510 }
1511
1512 @Override
1513 public LiblouisTable asLiblouisTable() {
1514 return translator.asLiblouisTable();
1515 }
1516
1517 @Override
1518 public FromTypeformedTextToBraille fromTypeformedTextToBraille() {
1519 return translator.fromTypeformedTextToBraille();
1520 }
1521 }
1522
1523 private interface FullHyphenator extends Hyphenator.FullHyphenator {
1524 public byte[] hyphenate(String text, Locale language);
1525 }
1526
1527
1528
1529
1530
1531 private final static FullHyphenator compoundWordHyphenator = new CompoundWordHyphenator();
1532
1533 private static class CompoundWordHyphenator extends NoHyphenator implements FullHyphenator {
1534
1535 public byte[] hyphenate(String text, Locale language) {
1536 if (text.isEmpty())
1537 return null;
1538 Tuple2<String,byte[]> t = extractHyphens(text, true, SHY, ZWSP);
1539 if (t._1.isEmpty())
1540 return null;
1541 return transform(t._2, t._1, language);
1542 }
1543 }
1544
1545 private static class HyphenatorAsFullHyphenator implements FullHyphenator {
1546
1547 private final Hyphenator.FullHyphenator hyphenator;
1548
1549 private HyphenatorAsFullHyphenator(Hyphenator hyphenator) {
1550 this.hyphenator = hyphenator.asFullHyphenator();
1551 }
1552
1553 public java.lang.Iterable<CSSStyledText> transform(java.lang.Iterable<CSSStyledText> text) {
1554 return hyphenator.transform(text);
1555 }
1556
1557 private final static SimpleInlineStyle HYPHENS_AUTO = new SimpleInlineStyle("hyphens: auto");
1558
1559 public byte[] hyphenate(String text, Locale language) {
1560 return extractHyphens(
1561 hyphenator.transform(singleton(new CSSStyledText(text, HYPHENS_AUTO, language))).iterator().next().getText(),
1562 true, SHY, ZWSP)._2;
1563 }
1564 }
1565
1566
1567
1568
1569
1570
1571 protected static Typeform typeformFromInlineCSS(SimpleInlineStyle style, Translator table, Map<String,Typeform> supportedTypeforms) {
1572 Typeform typeform = Typeform.PLAIN_TEXT;
1573 for (String prop : style.getPropertyNames()) {
1574 if (prop.equals("font-style")) {
1575 CSSProperty value = style.getProperty(prop);
1576 if (value == FontStyle.ITALIC || value == FontStyle.OBLIQUE) {
1577 Typeform t = supportedTypeforms.get("italic");
1578 if (t != null)
1579 typeform = typeform.add(t);
1580 else
1581 logger.warn("{}: {} not supported: emphclass 'italic' not defined in table {}",
1582 prop, style.get(prop),
1583 table.getTable());
1584 style.removeProperty(prop);
1585 continue; }}
1586 else if (prop.equals("font-weight")) {
1587 CSSProperty value = style.getProperty(prop);
1588 if (value == FontWeight.BOLD) {
1589 Typeform t = supportedTypeforms.get("bold");
1590 if (t != null)
1591 typeform = typeform.add(t);
1592 else
1593 logger.warn("{}: {} not supported: emphclass 'bold' not defined in table {}",
1594 prop, style.get(prop),
1595 table.getTable());
1596 style.removeProperty(prop);
1597 continue; }}
1598 else if (prop.equals("text-decoration")) {
1599 CSSProperty value = style.getProperty(prop);
1600 if (value == TextDecoration.UNDERLINE) {
1601 Typeform t = supportedTypeforms.get("underline");
1602 if (t != null)
1603 typeform = typeform.add(t);
1604 else
1605 logger.warn("{}: {} not supported: emphclass 'underline' not defined in table {}",
1606 prop, style.get(prop),
1607 table.getTable());
1608 style.removeProperty(prop);
1609 continue; }}}
1610 return typeform;
1611 }
1612
1613
1614
1615
1616
1617
1618 protected static String textFromTextTransform(String text, TermList textTransform) {
1619 for (Term<?> t : textTransform) {
1620 String tt = ((TermIdent)t).getValue();
1621 if (tt.equals("uppercase"))
1622 text = text.toUpperCase();
1623 else if (tt.equals("lowercase"))
1624 text = text.toLowerCase();
1625 else if (!LOUIS_TEXT_TRANSFORM.matcher(tt).matches())
1626 logger.warn("text-transform: {} not supported", tt);
1627 }
1628 return text;
1629 }
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642 protected static Typeform typeformFromTextTransform(TermList textTransform, Translator table, Map<String,Typeform> supportedTypeforms) {
1643 Typeform typeform = Typeform.PLAIN_TEXT;
1644 for (Term<?> t : textTransform) {
1645 String tt = ((TermIdent)t).getValue();
1646 Matcher m = LOUIS_TEXT_TRANSFORM.matcher(tt);
1647 if (m.matches()) {
1648 String emphClass = m.group("class");
1649 if (emphClass.equals("computer") || emphClass.equals("comp")) {
1650 typeform = typeform.add(Typeform.COMPUTER);
1651 } else {
1652 if (emphClass.equals("ital"))
1653 emphClass= "italic";
1654 else if (emphClass.equals("under"))
1655 emphClass = "underline";
1656 Typeform tf = supportedTypeforms.get(emphClass);
1657 if (tf != null)
1658 typeform = typeform.add(tf);
1659 else
1660 logger.warn("text-transform: {} not supported: emphclass '{}' not defined in table {}",
1661 tt,
1662 emphClass,
1663 table.getTable());
1664 }
1665 continue;
1666 } else if (tt.equals("uppercase") || tt.equals("lowercase")) {
1667
1668 continue;
1669 }
1670 logger.warn("text-transform: {} not supported", tt);
1671 }
1672 return typeform;
1673 }
1674
1675 private final static Pattern LOUIS_TEXT_TRANSFORM = Pattern.compile("^-?(lib)?louis-(?<class>.+)$");
1676
1677 @SuppressWarnings("unused")
1678 private static int mod(int a, int n) {
1679 int result = a % n;
1680 if (result < 0)
1681 result += n;
1682 return result;
1683 }
1684
1685 private static int lengthByCodePoints(String s) {
1686 return s.codePointCount(0, s.length());
1687 }
1688
1689 private static String substringByCodePoints(String s, int beginIndex) {
1690 return s.substring(s.offsetByCodePoints(0, beginIndex));
1691 }
1692
1693 private static String substringByCodePoints(String s, int beginIndex, int endIndex) {
1694 return s.substring(s.offsetByCodePoints(0, beginIndex), s.offsetByCodePoints(0, endIndex));
1695 }
1696
1697 private static class LineBreakSolution {
1698 String line;
1699 String replacementWord;
1700 String lineInBraille;
1701 int lineInBrailleLength;
1702 }
1703
1704 private static final Logger logger = LoggerFactory.getLogger(LiblouisTranslatorJnaImplProvider.class);
1705
1706 }