#
# Copyright (C) 2016 Rimas Kudelis <rq@akl.lt>
#
#  This file is part of liblouis.
#
#  liblouis is free software: you can redistribute it and/or modify it
#  under the terms of the GNU Lesser General Public License as
#  published by the Free Software Foundation, either version 2.1 of the
#  License, or (at your option) any later version.
#
#  liblouis is distributed in the hope that it will be useful, but
#  WITHOUT ANY WARRANTY; without even the implied warranty of
#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
#  Lesser General Public License for more details.
#
#  You should have received a copy of the GNU Lesser General Public
#  License along with liblouis. If not, see
#  <http://www.gnu.org/licenses/>.

# The Lithuanian 8-dot Braille writing system is described in a decree of the Minister of
# Social Security and Labor, called "Dėl vieningos aštuonių taškų Brailio rašto sistemos
# naudojimo tvarkos aprašo patvirtinimo", which is in effect since 2011-04-13. Document
# number is A1-183. At the time of writing this file, the document was accessible at
# https://www.e-tar.lt/portal/lt/legalAct/TAR.443D667CA047 .
# The document is referred to as "the standard" below.
#
# The standard maps ISO-8859-13 character set to 8-dot Braille writing system. However,
# even though it defines different mappings for literary and computer braille modes, it
# doesn't seem like the authors had a good understanding of why these two modes exist
# and how they differ. Furthermore, the standard contains a few errors (incorrectly named
# characters as well as mapping conflicts). I would say it needs further improvements.
#
# This file is based on the standard, but does not exactly follow it. Some standard
# definitions are commented out, some changed, and some extra ones are added.

###
### WHITESPACE
###

# No-break space is defined as 7 in the standard.
space         \x00a0 7                      [NO-BREAK SPACE]
include spaces.uti


###
### DIGITS
###

include digits8Dots.uti


###
### LETTERS
###

include latinLetterDef8Dots.uti

# Accented letters which are part of the Lithuanian alphabet.
uppercase         \x0104 167         Ą LATIN CAPITAL LETTER A WITH OGONEK
lowercase         \x0105 16          ą LATIN SMALL LETTER A WITH OGONEK
uppercase         \x010c 1467        Č LATIN CAPITAL LETTER C WITH CARON
lowercase         \x010d 146         č LATIN SMALL LETTER C WITH CARON
uppercase         \x0116 3457        Ė LATIN CAPITAL LETTER E WITH DOT ABOVE
lowercase         \x0117 345         ė LATIN SMALL LETTER E WITH DOT ABOVE
uppercase         \x0118 1567        Ę LATIN CAPITAL LETTER E WITH OGONEK
lowercase         \x0119 156         ę LATIN SMALL LETTER E WITH OGONEK
uppercase         \x012e 2467        Į LATIN CAPITAL LETTER I WITH OGONEK
lowercase         \x012f 246         į LATIN SMALL LETTER E WITH OGONEK
uppercase         \x0160 23467       Š LATIN CAPITAL LETTER S WITH CARON
lowercase         \x0161 2346        š LATIN SMALL LETTER S WITH CARON
uppercase         \x0172 3467        Ų LATIN CAPITAL LETTER U WITH OGONEK
lowercase         \x0173 346         ų LATIN SMALL LETTER U WITH OGONEK
uppercase         \x016a 12567       Ū LATIN CAPITAL LETTER U WITH MACRON
lowercase         \x016b 1256        ū LATIN SMALL LETTER U WITH MACRON
uppercase         \x017d 1267        Ž LATIN CAPITAL LETTER Z WITH CARON
lowercase         \x017e 126         ž LATIN SMALL LETTER Z WITH CARON


###
### PUNCTUATION
###

punctuation   \x0021 235                    ! EXCLAMATION MARK
punctuation   \x0022 4                      " QUOTATION MARK
punctuation   \x0027 3                      ' APOSTROPHE
# The following symbol is defined as 134568 in the standard, but that is hardly useful.
# According to Unicode, it is the preferred character to use for apostrophe, hence
# defining it as one here.
punctuation   \x2019 3                      ’ RIGHT SINGLE QUOTATION MARK
punctuation   \x0028 23567                  ( LEFT PARENTHESIS
punctuation   \x0029 23568                  ) RIGHT PARENTHESIS
punctuation   \x002c 2                      , COMMA
punctuation   \x002d 36                     - HYPHEN-MINUS
punctuation   \x002e 256                    . FULL STOP
punctuation   \x003a 25                     : COLON
punctuation   \x003b 23                     ; SEMICOLON
punctuation   \x003f 26                     ? QUESTION MARK
punctuation   \x005b 12356                  [ LEFT SQUARE BRACKET
punctuation   \x005d 23456                  ] RIGHT SQUARE BRACKET
punctuation   \x007b 24678                  { LEFT CURLY BRACKET
punctuation   \x007d 13578                  } RIGHT CURLY BRACKET
# The following symbol is defined as 368 in the standard.
punctuation   \x00ad 36                     [SOFT HYPHEN]
# The following symbol is not defined in the standard.
punctuation   \x2013 36                     – EN DASH
punctuation   \x2014 36                     — EM DASH

# These are the typographically correct quotes in Lithuania.
punctuation   \x201e 2367                   „ DOUBLE LOW-9 QUOTATION MARK
punctuation   \x201c 3567                   “ LEFT DOUBLE QUOTATION MARK
# The following character is defined in the standard, because it exists in ISO-8859-13
# character set, even though it should not be used in Lithuanian texts.
punctuation   \x201d 367                    ” RIGHT DOUBLE QUOTATION MARK
# Let's make straight quotes smarter if we can.
prepunc       \x0022 2367                   " QUOTATION MARK
postpunc      \x0022 3567                   " QUOTATION MARK
# The following combination of smart quotes is sometimes mistakenly used due to automatic
# replacement based on English rules. This will turn them to the correct quotes.
prepunc       \x201c 2367                   “ LEFT DOUBLE QUOTATION MARK
postpunc      \x201d 3567                   ” RIGHT DOUBLE QUOTATION MARK

# Two versions of horizontal ellipsis.
punctuation   \x2026 3-3-3                  … HORIZONTAL ELLIPSIS
always        \x002e\x002e\x002e 3-3-3      ... Three times FULL STOP


###
### MATHEMATICAL SYMBOLS
###

math          \x002b 2358                   + PLUS SIGN
math          \x003c 2468                   < LESS-THAN SIGN
math          \x003d 2356                   = EQUALS SIGN
math          \x003e 1358                   > GREATER-THAN SIGN
math          \x00b1 23578                  ± PLUS-MINUS SIGN
# math          \x00bc 1368                   ¼ VULGAR FRACTION ONE QUARTER
# math          \x00bd 12368                  ½ VULGAR FRACTION ONE HALF
# math          \x00be 13468                  ¾ VULGAR FRACTION THREE QUARTERS
math          \x2212 36                     − MINUS SIGN
math          \x00d7 2348                   × MULTIPLICATION SIGN
math          \x00f7 125678                 ÷ DIVISION SIGN
# math          \x2215 25                     ∕ DIVISION SLASH
# Parentheses are different in Math than in literature.
# midnum        \x0028 126                    ( LEFT PARENTHESIS
# midnum        \x0029 345                    ) RIGHT PARENTHESIS


###
### OTHER CHARACTERS
###

sign          \x0023 3456                   # NUMBER SIGN
sign          \x0024 46                     $ DOLLAR SIGN
sign          \x0025 123456                 % PERCENT SIGN
sign          \x0026 12346                  & AMPERSAND
sign          \x002a 35                     * ASTERISK
sign          \x002f 34                     / SOLIDUS
sign          \x0040 34578                  @ COMMERCIAL AT
sign          \x005c 347                    \ REVERSE SOLIDUS
sign          \x005e 2568                   ^ CIRCUMFLEX ACCENT
sign          \x005f 4567                   _ LOW LINE
sign          \x0060 6                      `  GRAVE ACCENT
sign          \x007c 456                    |  VERTICAL LINE
sign          \x007e 268                    ~  TILDE
sign          \x00a2 58                     ¢ CENT SIGN
sign          \x00a3 467                    £ POUND SIGN
sign          \x00a4 4678                   ¤ CURRENCY SIGN
sign          \x00a6 1456                   ¦ BROKEN BAR
sign          \x00a7 3468                   § SECTION SIGN
sign          \x00a9 123468                 © COPYRIGHT SIGN
# Angle quotation marks are not commonly used in Lithuanian language.
# It's probably much more common to see them as fancy breadcrumb separators
# than anything else, thus marking them as sign, not punctuation.
sign          \x00ab 5678                   « LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
sign          \x00bb 4578                   » RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
sign          \x00ac 5                      ¬ NOT SIGN
sign          \x00ae 12358                  ® REGISTERED SIGN
sign          \x00b0 4568                   ° DEGREE SIGN
sign          \x00b2 1278                   ² SUPERSCRIPT TWO
sign          \x00b3 1478                   ³ SUPERSCRIPT THREE
sign          \x00b5 1348                   µ MICRO SIGN
sign          \x00b6 12348                  ¶ PILCROW SIGN
sign          \x00b7 37                     · MIDDLE DOT
sign          \x00b9 178                    ¹ SUPERSCRIPT ONE


###
### INDICATOR AND SPECIAL SYMBOL OPCODES
###

# numsign 3456                                Number indicator in text (6-dot output only)
hyphen        \x002d 36                     - HYPHEN-MINUS
decpoint      \x002c 2                      , DECIMAL SEPARATOR

sign          \xfffd 7                      � REPLACEMENT CHARACTER
undefined            7                      Replacement dots for characters not defined in this table


###
### ATTIC (commented out definitions)
###

# Other letters from the ISO-8859-13 repertoire.
# This set seems quite useless and poorly designed, so leaving it commented out.
# uplow         \x00c4\x00e4 567,3458         Ää LATIN CAPITAL LETTER A WITH DIAERESIS,LATIN SMALL LETTER A WITH DIAERESIS
# uplow         \x00c5\x00e5 34567,345678     Åå LATIN CAPITAL LETTER A WITH RING ABOVE,LATIN SMALL LETTER A WITH RING ABOVE
# uplow         \x00c6\x00e6 458,38           Ææ LATIN CAPITAL LETTER AE,LATIN SMALL LETTER AE
# uplow         \x00c9\x00e9 238,1268         Éé LATIN CAPITAL LETTER E WITH ACUTE,LATIN SMALL LETTER E WITH ACUTE
# uplow         \x00d3\x00f3 123567,14568     Óó LATIN CAPITAL LETTER O WITH ACUTE,LATIN SMALL LETTER O WITH ACUTE
# uplow         \x00d5\x00f5 267,1234567      Õõ LATIN CAPITAL LETTER O WITH TILDE,LATIN SMALL LETTER O WITH TILDE
# uplow         \x00d6\x00f6 358,1234568      Öö LATIN CAPITAL LETTER O WITH DIAERESIS,LATIN SMALL LETTER O WITH DIAERESIS
# uplow         \x00d8\x00f8 48,68            Øø LATIN CAPITAL LETTER O WITH STROKE,LATIN SMALL LETTER O WITH STROKE
# uplow         \x00dc\x00fc 2368,12568       Üü LATIN CAPITAL LETTER U WITH DIAERESIS,LATIN SMALL LETTER U WITH DIAERESIS
# lowercase     \x00df 34568                  ß  LATIN SMALL LETTER SHARP S
# uplow         \x0100\x0101 12468,1678       Āā LATIN CAPITAL LETTER A WITH MACRON,LATIN SMALL LETTER A WITH MACRON
# uplow         \x0106\x0107 15678,34678      Ćć LATIN CAPITAL LETTER C WITH ACUTE,LATIN SMALL LETTER C WITH ACUTE
# uplow         \x0112\x0113 123467,1234678   Ēē LATIN CAPITAL LETTER E WITH MACRON,LATIN SMALL LETTER E WITH MACRON
# uplow         \x0122\x0123 57,348           Ģģ LATIN CAPITAL LETTER G WITH CEDILLA,LATIN SMALL LETTER G WITH CEDILLA
# uplow         \x012a\x012b 23468,14678      Īī LATIN CAPITAL LETTER I WITH MACRON,LATIN SMALL LETTER I WITH MACRON
# uplow         \x0136\x0137 258,1468         Ķķ LATIN CAPITAL LETTER K WITH CEDILLA,LATIN SMALL LETTER K WITH CEDILLA
# uplow         \x013b\x013c 23678,124568     Ļļ LATIN CAPITAL LETTER L WITH CEDILLA,LATIN SMALL LETTER L WITH CEDILLA
# uplow         \x0141\x0142 35678,234568     Łł LATIN CAPITAL LETTER L WITH STROKE,LATIN SMALL LETTER L WITH STROKE
# uplow         \x0143\x0144 2567,13458       Ńń LATIN CAPITAL LETTER N WITH ACUTE,LATIN SMALL LETTER N WITH ACUTE
# uplow         \x0145\x0146 578,234567       Ņņ LATIN CAPITAL LETTER N WITH CEDILLA,LATIN SMALL LETTER N WITH CEDILLA
# uplow         \x014c\x014d 14567,145678     Ōō LATIN CAPITAL LETTER O WITH MACRON,LATIN SMALL LETTER O WITH MACRON
# uplow         \x015a\x015b 123568,1568      Śś LATIN CAPITAL LETTER S WITH ACUTE,LATIN SMALL LETTER S WITH ACUTE
# uplow         \x0179\x017a 23458,12678      Źź LATIN CAPITAL LETTER Z WITH ACUTE,LATIN SMALL LETTER A WITH ACUTE
# uplow         \x017b\x017c 3568,24568       Żż LATIN CAPITAL LETTER Z WITH DOT ABOVE,LATIN SMALL LETTER Z WITH DOT ABOVE
# uplow         \x0156\x0157 2357,12456       Ŗŗ LATIN CAPITAL LETTER R WITH CEDILLA,LATIN SMALL LETTER R WITH CEDILLA

# These control chars are defined by the standard, but are hardly useful.
# These official definitions are commented out below, and only whitespace characters
# are defined as 0 (no dots) in the Whitespace section at the top of this file.
# sign          \x0000 3578                   [NULL]
# sign          \x0001 28                     [START OF HEADING]
# sign          \x0002 45                     [START OF TEXT]
# sign          \x0003 47                     [END OF TEXT]
# sign          \x0004 14578                  [END OF TRANSMISSION]
# sign          \x0005 1578                   [ENQUIRY]
# sign          \x0006 12478                  [ACKNOWLEDGE]
# sign          \x0007 124578                 [BELL]
# sign          \x0008 12578                  [BACKSPACE]
# space         \x0009 2478                   [CHARACTER TABULATION]
# space         \x000a 24578                  [LINE FEED (LF)]
# space         \x000b 1378                   [LINE TABULATION]
# space         \x000c 12378                  [FORM FEED (FF)]
# space         \x000d 13478                  [CARRIAGE RETURN (CR)]
# sign          \x000e 134578                 [SHIFT OUT]
# sign          \x000f 56                     [SHIFT IN]
# sign          \x0010 123478                 [DATA LINK ESCAPE]
# sign          \x0011 1234578                [DEVICE CONTROL ONE]
# sign          \x0012 123578                 [DEVICE CONTROL TWO]
# sign          \x0013 23478                  [DEVICE CONTROL THREE]
# sign          \x0014 234578                 [DEVICE CONTROL FOUR]
# sign          \x0015 13678                  [NEGATIVE ACKNOWLEDGE]
# sign          \x0016 123678                 [SYNCHRONOUS IDLE]
# sign          \x0017 245678                 [END OF TRANSMISSION BLOCK]
# sign          \x0018 134678                 [CANCEL]
# sign          \x0019 1345678                [END OF MEDIUM]
# sign          \x001a 135678                 [SUBSTITUTE]
# sign          \x001b 1235678                [ESCAPE]
# sign          \x001c 3478                   [INFORMATION SEPARATOR FOUR]
# sign          \x001d 2345678                [INFORMATION SEPARATOR THREE]
# sign          \x001e 234678                 [INFORMATION SEPARATOR TWO]
# sign          \x001f 45678                  [INFORMATION SEPARATOR ONE]
# sign          \x007f 138                    [DELETE]
# sign          \x0080 457
# sign          \x0081 8
# sign          \x0082 3678                   [BREAK PERMITTED HERE]
# sign          \x0083 78                     [NO BREAK HERE]
# sign          \x0084 236
# sign          \x0085 25678                  [NEXT LINE (NEL)]
# sign          \x0086 168                    [START OF SELECTED AREA]
# sign          \x0087 357                    [END OF SELECTED AREA]
# sign          \x0088 378                    [CHARACTER TABULATION SET]
# sign          \x0089 468                    [CHARACTER TABULATION WITH JUSTIFICATION]
# sign          \x008a 678                    [LINE TABULATION SET]
# sign          \x008b 27                     [PARTIAL LINE FORWARD]
# sign          \x008c 237                    [PARTIAL LINE BACKWARD]
# sign          \x008d 257                    [REVERSE LINE FEED]
# sign          \x008e 478                    [SINGLE SHIFT TWO]
# sign          \x008f 12467                  [SINGLE SHIFT THREE]
# sign          \x0090 124678                 [DEVICE CONTROL STRING]
# sign          \x0091 568                    [PRIVATE USE ONE]
# sign          \x0092 1246                   [PRIVATE USE TWO]
# sign          \x0093 356                    [SET TRANSMIT STATE]
# sign          \x0094 1238                   [CANCEL CHARACTER]
# sign          \x0095 278                    [MESSAGE WAITING]
# sign          \x0096 2378                   [START OF GUARDED AREA]
# sign          \x0097 13568                  [END OF GUARDED AREA]
# sign          \x0098 2578                   [START OF STRING]
# sign          \x0099 1245678
# sign          \x009a 2678                   [SINGLE CHARACTER INTRODUCER]
# sign          \x009b 123458                 [CONTROL SEQUENCE INTRODUCER]
# sign          \x009c 67                     [STRING TERMINATOR]
# sign          \x009d 124567                 [OPERATING SYSTEM COMMAND]
# sign          \x009e 235678                 [PRIVACY MESSAGE]
# sign          \x009f 12345678               [APPLICATION PROGRAM COMMAND]