1 /* 2 * Distributed under the Boost Software License, Version 1.0. 3 * (See accompanying file LICENSE_1_0.txt or copy at 4 * http://www.boost.org/LICENSE_1_0.txt) 5 */ 6 module pango.script; 7 8 import pango.utils; 9 import pango.language; 10 import pango.c.script; 11 import pango.c.language; 12 13 import std..string; 14 import std.range; 15 import std.conv; 16 17 18 /** 19 * PangoScript: 20 * InvalidCode: a value never returned from pango_script_for_unichar() 21 * Common: a character used by multiple different scripts 22 * Inherited: a mark glyph that takes its script from the 23 * base glyph to which it is attached 24 * Arabic: Arabic 25 * Armenian: Armenian 26 * Bengali: Bengali 27 * Bopomofo: Bopomofo 28 * Cherokee: Cherokee 29 * Coptic: Coptic 30 * Cyrillic: Cyrillic 31 * Deseret: Deseret 32 * Devanagari: Devanagari 33 * Ethiopic: Ethiopic 34 * Georgian: Georgian 35 * Gothic: Gothic 36 * Greek: Greek 37 * Gujarati: Gujarati 38 * Gurmukhi: Gurmukhi 39 * Han: Han 40 * Hangul: Hangul 41 * Hebrew: Hebrew 42 * Hiragana: Hiragana 43 * Kannada: Kannada 44 * Katakana: Katakana 45 * Khmer: Khmer 46 * Lao: Lao 47 * Latin: Latin 48 * Malayalam: Malayalam 49 * Mongolian: Mongolian 50 * Myanmar: Myanmar 51 * Ogham: Ogham 52 * OldItalic: Old Italic 53 * Oriya: Oriya 54 * Runic: Runic 55 * Sinhala: Sinhala 56 * Syriac: Syriac 57 * Tamil: Tamil 58 * Telugu: Telugu 59 * Thaana: Thaana 60 * Thai: Thai 61 * Tibetan: Tibetan 62 * CanadianAboriginal: Canadian Aboriginal 63 * Yi: Yi 64 * Tagalog: Tagalog 65 * Hanunoo: Hanunoo 66 * Buhid: Buhid 67 * Tagbanwa: Tagbanwa 68 * Braille: Braille 69 * Cypriot: Cypriot 70 * Limbu: Limbu 71 * Osmanya: Osmanya 72 * Shavian: Shavian 73 * LinearB: Linear B 74 * TaiLe: Tai Le 75 * Ugaritic: Ugaritic 76 * NewTaiLue: New Tai Lue. Since 1.10 77 * Buginese: Buginese. Since 1.10 78 * Glagolitic: Glagolitic. Since 1.10 79 * Tifinagh: Tifinagh. Since 1.10 80 * SylotiNagri: Syloti Nagri. Since 1.10 81 * OldPersian: Old Persian. Since 1.10 82 * Kharoshthi: Kharoshthi. Since 1.10 83 * Unknown: an unassigned code point. Since 1.14 84 * Balinese: Balinese. Since 1.14 85 * Cuneiform: Cuneiform. Since 1.14 86 * Phoenician: Phoenician. Since 1.14 87 * PhagsPa: Phags-pa. Since 1.14 88 * Nko: N'Ko. Since 1.14 89 * KayahLi: Kayah Li. Since 1.20.1 90 * Lepcha: Lepcha. Since 1.20.1 91 * Rejang: Rejang. Since 1.20.1 92 * Sundanese: Sundanese. Since 1.20.1 93 * Saurashtra: Saurashtra. Since 1.20.1 94 * Cham: Cham. Since 1.20.1 95 * OlChiki: Ol Chiki. Since 1.20.1 96 * Vai: Vai. Since 1.20.1 97 * Carian: Carian. Since 1.20.1 98 * Lycian: Lycian. Since 1.20.1 99 * Lydian: Lydian. Since 1.20.1 100 * Batak: Batak. Since 1.32 101 * Brahmi: Brahmi. Since 1.32 102 * Mandaic: Mandaic. Since 1.32 103 * Chakma: Chakma. Since: 1.32 104 * MeroiticCursive: Meroitic Cursive. Since: 1.32 105 * MeroiticHieroglyphs: Meroitic Hieroglyphs. Since: 1.32 106 * Miao: Miao. Since: 1.32 107 * Sharada: Sharada. Since: 1.32 108 * SoraSompeng: Sora Sompeng. Since: 1.32 109 * Takri: Takri. Since: 1.32 110 * 111 * The #PangoScript enumeration identifies different writing 112 * systems. The values correspond to the names as defined in the 113 * Unicode standard. 114 * Note that new types may be added in the future. Applications should be ready 115 * to handle unknown values. This enumeration is interchangeable with 116 * #GUnicodeScript. See <ulink 117 * url="http://www.unicode.org/reports/tr24/">Unicode Standard Annex 118 * #24: Script names</ulink>. 119 */ 120 enum Script { /* ISO 15924 code */ 121 InvalidCode = PangoScript.PANGO_SCRIPT_INVALID_CODE, 122 Common = PangoScript.PANGO_SCRIPT_COMMON, /* Zyyy */ 123 Inherited = PangoScript.PANGO_SCRIPT_INHERITED, /* Qaai */ 124 Arabic = PangoScript.PANGO_SCRIPT_ARABIC, /* Arab */ 125 Armenian = PangoScript.PANGO_SCRIPT_ARMENIAN, /* Armn */ 126 Bengali = PangoScript.PANGO_SCRIPT_BENGALI, /* Beng */ 127 Bopomofo = PangoScript.PANGO_SCRIPT_BOPOMOFO, /* Bopo */ 128 Cherokee = PangoScript.PANGO_SCRIPT_CHEROKEE, /* Cher */ 129 Coptic = PangoScript.PANGO_SCRIPT_COPTIC, /* Qaac */ 130 Cyrillic = PangoScript.PANGO_SCRIPT_CYRILLIC, /* Cyrl (Cyrs) */ 131 Deseret = PangoScript.PANGO_SCRIPT_DESERET, /* Dsrt */ 132 Devanagari = PangoScript.PANGO_SCRIPT_DEVANAGARI, /* Deva */ 133 Ethiopic = PangoScript.PANGO_SCRIPT_ETHIOPIC, /* Ethi */ 134 Georgian = PangoScript.PANGO_SCRIPT_GEORGIAN, /* Geor (Geon, Geoa) */ 135 Gothic = PangoScript.PANGO_SCRIPT_GOTHIC, /* Goth */ 136 Greek = PangoScript.PANGO_SCRIPT_GREEK, /* Grek */ 137 Gujarati = PangoScript.PANGO_SCRIPT_GUJARATI, /* Gujr */ 138 Gurmukhi = PangoScript.PANGO_SCRIPT_GURMUKHI, /* Guru */ 139 Han = PangoScript.PANGO_SCRIPT_HAN, /* Hani */ 140 Hangul = PangoScript.PANGO_SCRIPT_HANGUL, /* Hang */ 141 Hebrew = PangoScript.PANGO_SCRIPT_HEBREW, /* Hebr */ 142 Hiragana = PangoScript.PANGO_SCRIPT_HIRAGANA, /* Hira */ 143 Kannada = PangoScript.PANGO_SCRIPT_KANNADA, /* Knda */ 144 Katakana = PangoScript.PANGO_SCRIPT_KATAKANA, /* Kana */ 145 Khmer = PangoScript.PANGO_SCRIPT_KHMER, /* Khmr */ 146 Lao = PangoScript.PANGO_SCRIPT_LAO, /* Laoo */ 147 Latin = PangoScript.PANGO_SCRIPT_LATIN, /* Latn (Latf, Latg) */ 148 Malayalam = PangoScript.PANGO_SCRIPT_MALAYALAM, /* Mlym */ 149 Mongolian = PangoScript.PANGO_SCRIPT_MONGOLIAN, /* Mong */ 150 Myanmar = PangoScript.PANGO_SCRIPT_MYANMAR, /* Mymr */ 151 Ogham = PangoScript.PANGO_SCRIPT_OGHAM, /* Ogam */ 152 OldItalic = PangoScript.PANGO_SCRIPT_OLD_ITALIC, /* Ital */ 153 Oriya = PangoScript.PANGO_SCRIPT_ORIYA, /* Orya */ 154 Runic = PangoScript.PANGO_SCRIPT_RUNIC, /* Runr */ 155 Sinhala = PangoScript.PANGO_SCRIPT_SINHALA, /* Sinh */ 156 Syriac = PangoScript.PANGO_SCRIPT_SYRIAC, /* Syrc (Syrj, Syrn, Syre) */ 157 Tamil = PangoScript.PANGO_SCRIPT_TAMIL, /* Taml */ 158 Telugu = PangoScript.PANGO_SCRIPT_TELUGU, /* Telu */ 159 Thaana = PangoScript.PANGO_SCRIPT_THAANA, /* Thaa */ 160 Thai = PangoScript.PANGO_SCRIPT_THAI, /* Thai */ 161 Tibetan = PangoScript.PANGO_SCRIPT_TIBETAN, /* Tibt */ 162 CanadianAboriginal = PangoScript.PANGO_SCRIPT_CANADIAN_ABORIGINAL, /* Cans */ 163 Yi = PangoScript.PANGO_SCRIPT_YI, /* Yiii */ 164 Tagalog = PangoScript.PANGO_SCRIPT_TAGALOG, /* Tglg */ 165 Hanunoo = PangoScript.PANGO_SCRIPT_HANUNOO, /* Hano */ 166 Buhid = PangoScript.PANGO_SCRIPT_BUHID, /* Buhd */ 167 Tagbanwa = PangoScript.PANGO_SCRIPT_TAGBANWA, /* Tagb */ 168 169 /* Unicode-4.0 additions */ 170 Braille = PangoScript.PANGO_SCRIPT_BRAILLE, /* Brai */ 171 Cypriot = PangoScript.PANGO_SCRIPT_CYPRIOT, /* Cprt */ 172 Limbu = PangoScript.PANGO_SCRIPT_LIMBU, /* Limb */ 173 Osmanya = PangoScript.PANGO_SCRIPT_OSMANYA, /* Osma */ 174 Shavian = PangoScript.PANGO_SCRIPT_SHAVIAN, /* Shaw */ 175 LinearB = PangoScript.PANGO_SCRIPT_LINEAR_B, /* Linb */ 176 TaiLe = PangoScript.PANGO_SCRIPT_TAI_LE, /* Tale */ 177 Ugaritic = PangoScript.PANGO_SCRIPT_UGARITIC, /* Ugar */ 178 179 /* Unicode-4.1 additions */ 180 NewTaiLue = PangoScript.PANGO_SCRIPT_NEW_TAI_LUE, /* Talu */ 181 Buginese = PangoScript.PANGO_SCRIPT_BUGINESE, /* Bugi */ 182 Glagolitic = PangoScript.PANGO_SCRIPT_GLAGOLITIC, /* Glag */ 183 Tifinagh = PangoScript.PANGO_SCRIPT_TIFINAGH, /* Tfng */ 184 SylotiNagri = PangoScript.PANGO_SCRIPT_SYLOTI_NAGRI, /* Sylo */ 185 OldPersian = PangoScript.PANGO_SCRIPT_OLD_PERSIAN, /* Xpeo */ 186 Kharoshthi = PangoScript.PANGO_SCRIPT_KHAROSHTHI, /* Khar */ 187 188 /* Unicode-5.0 additions */ 189 Unknown = PangoScript.PANGO_SCRIPT_UNKNOWN, /* Zzzz */ 190 Balinese = PangoScript.PANGO_SCRIPT_BALINESE, /* Bali */ 191 Cuneiform = PangoScript.PANGO_SCRIPT_CUNEIFORM, /* Xsux */ 192 Phoenician = PangoScript.PANGO_SCRIPT_PHOENICIAN, /* Phnx */ 193 PhagsPa = PangoScript.PANGO_SCRIPT_PHAGS_PA, /* Phag */ 194 Nko = PangoScript.PANGO_SCRIPT_NKO, /* Nkoo */ 195 196 /* Unicode-5.1 additions */ 197 KayahLi = PangoScript.PANGO_SCRIPT_KAYAH_LI, /* Kali */ 198 Lepcha = PangoScript.PANGO_SCRIPT_LEPCHA, /* Lepc */ 199 Rejang = PangoScript.PANGO_SCRIPT_REJANG, /* Rjng */ 200 Sundanese = PangoScript.PANGO_SCRIPT_SUNDANESE, /* Sund */ 201 Saurashtra = PangoScript.PANGO_SCRIPT_SAURASHTRA, /* Saur */ 202 Cham = PangoScript.PANGO_SCRIPT_CHAM, /* Cham */ 203 OlChiki = PangoScript.PANGO_SCRIPT_OL_CHIKI, /* Olck */ 204 Vai = PangoScript.PANGO_SCRIPT_VAI, /* Vaii */ 205 Carian = PangoScript.PANGO_SCRIPT_CARIAN, /* Cari */ 206 Lycian = PangoScript.PANGO_SCRIPT_LYCIAN, /* Lyci */ 207 Lydian = PangoScript.PANGO_SCRIPT_LYDIAN, /* Lydi */ 208 209 /* Unicode-6.0 additions */ 210 Batak = PangoScript.PANGO_SCRIPT_BATAK, /* Batk */ 211 Brahmi = PangoScript.PANGO_SCRIPT_BRAHMI, /* Brah */ 212 Mandaic = PangoScript.PANGO_SCRIPT_MANDAIC, /* Mand */ 213 214 /* Unicode-6.1 additions */ 215 Chakma = PangoScript.PANGO_SCRIPT_CHAKMA, /* Cakm */ 216 MeroiticCursive = PangoScript.PANGO_SCRIPT_MEROITIC_CURSIVE, /* Merc */ 217 MeroiticHieroglyphs = PangoScript.PANGO_SCRIPT_MEROITIC_HIEROGLYPHS,/* Mero */ 218 Miao = PangoScript.PANGO_SCRIPT_MIAO, /* Plrd */ 219 Sharada = PangoScript.PANGO_SCRIPT_SHARADA, /* Shrd */ 220 SoraSompeng = PangoScript.PANGO_SCRIPT_SORA_SOMPENG, /* Sora */ 221 Takri = PangoScript.PANGO_SCRIPT_TAKRI /* Takr */ 222 } 223 224 225 Script scriptForChar(dchar ch) { 226 return cast(Script)pango_script_for_unichar(ch); 227 } 228 229 Language scriptSampleLanguage(Script script) { 230 PangoLanguage *lang = pango_script_get_sample_language(cast(PangoScript)script); 231 if (!lang) { 232 throw new Exception("could not get Language for script " ~ script.to!string); 233 } 234 return Language(lang); 235 } 236 237 238 /** 239 * TextScriptRange: wrapper around PangoScriptIter 240 */ 241 struct TextScriptRange { 242 243 private PangoScriptIter *ptr_ = null; 244 private string text_; 245 private bool empty_ = false; 246 247 248 struct Section { 249 const(char)[] text; 250 Script script; 251 } 252 253 254 this(string text) { 255 text_ = text; 256 ptr_ = pango_script_iter_new(text_.ptr, cast(int)text_.length); 257 if (text.length == 0) { 258 empty_ = true; 259 } 260 } 261 262 @disable this(this); 263 264 ~this() { 265 pango_script_iter_free(ptr_); 266 ptr_ = null; 267 } 268 269 @property bool empty() const { 270 return empty_; 271 } 272 273 @property Section front() { 274 const (char) *start; 275 const (char) *end; 276 PangoScript script; 277 pango_script_iter_get_range(ptr_, &start, &end, &script); 278 assert(end >= start); 279 if (end == start) { 280 empty_ = true; 281 return Section("", cast(Script)script); 282 } 283 empty_ = false; 284 return Section(start[0 .. (end-start)], cast(Script)script); 285 } 286 287 void popFront() { 288 empty_ = !(cast(bool)pango_script_iter_next(ptr_)); 289 } 290 } 291 292 static assert(isInputRange!TextScriptRange);