1 /* 2 * Distributed under the Boost Software License, Version 1.0. 3 * (See accompanying file LICENSE_1_0.txt or copy at 4 * http://www.boost.org/LICENSE_1_0.txt) 5 */ 6 module pango.c.break_; 7 8 import pango.c.item; 9 import pango.c.language; 10 11 import glib; 12 13 import std.bitmanip; 14 15 extern(C): 16 17 /* Logical attributes of a character. 18 */ 19 /** 20 * PangoLogAttr: 21 * @is_line_break: if set, can break line in front of character 22 * @is_mandatory_break: if set, must break line in front of character 23 * @is_char_break: if set, can break here when doing character wrapping 24 * @is_white: is whitespace character 25 * @is_cursor_position: if set, cursor can appear in front of character. 26 * i.e. this is a grapheme boundary, or the first character 27 * in the text. 28 * This flag implements Unicode's 29 * <ulink url="http://www.unicode.org/reports/tr29/">Grapheme 30 * Cluster Boundaries</ulink> semantics. 31 * @is_word_start: is first character in a word 32 * @is_word_end: is first non-word char after a word 33 * Note that in degenerate cases, you could have both @is_word_start 34 * and @is_word_end set for some character. 35 * @is_sentence_boundary: is a sentence boundary. 36 * There are two ways to divide sentences. The first assigns all 37 * inter-sentence whitespace/control/format chars to some sentence, 38 * so all chars are in some sentence; @is_sentence_boundary denotes 39 * the boundaries there. The second way doesn't assign 40 * between-sentence spaces, etc. to any sentence, so 41 * @is_sentence_start/@is_sentence_end mark the boundaries of those sentences. 42 * @is_sentence_start: is first character in a sentence 43 * @is_sentence_end: is first char after a sentence. 44 * Note that in degenerate cases, you could have both @is_sentence_start 45 * and @is_sentence_end set for some character. (e.g. no space after a 46 * period, so the next sentence starts right away) 47 * @backspace_deletes_character: if set, backspace deletes one character 48 * rather than the entire grapheme cluster. This 49 * field is only meaningful on grapheme 50 * boundaries (where @is_cursor_position is 51 * set). In some languages, the full grapheme 52 * (e.g. letter + diacritics) is considered a 53 * unit, while in others, each decomposed 54 * character in the grapheme is a unit. In the 55 * default implementation of pango_break(), this 56 * bit is set on all grapheme boundaries except 57 * those following Latin, Cyrillic or Greek base characters. 58 * @is_expandable_space: is a whitespace character that can possibly be 59 * expanded for justification purposes. (Since: 1.18) 60 * @is_word_boundary: is a word boundary. 61 * More specifically, means that this is not a position in the middle 62 * of a word. For example, both sides of a punctuation mark are 63 * considered word boundaries. This flag is particularly useful when 64 * selecting text word-by-word. 65 * This flag implements Unicode's 66 * <ulink url="http://www.unicode.org/reports/tr29/">Word 67 * Boundaries</ulink> semantics. (Since: 1.22) 68 * 69 * The #PangoLogAttr structure stores information 70 * about the attributes of a single character. 71 */ 72 struct PangoLogAttr 73 { 74 mixin(bitfields!( 75 guint, "is_line_break", 1, /* Can break line in front of character */ 76 77 guint, "is_mandatory_break", 1, /* Must break line in front of character */ 78 79 guint, "is_char_break", 1, /* Can break here when doing char wrap */ 80 81 guint, "is_white", 1, /* Whitespace character */ 82 83 /* Cursor can appear in front of character (i.e. this is a grapheme 84 * boundary, or the first character in the text). 85 */ 86 guint, "is_cursor_position", 1, 87 88 /* Note that in degenerate cases, you could have both start/end set on 89 * some text, most likely for sentences (e.g. no space after a period, so 90 * the next sentence starts right away). 91 */ 92 93 guint, "is_word_start", 1, /* first character in a word */ 94 guint, "is_word_end", 1, /* is first non-word char after a word */ 95 96 /* There are two ways to divide sentences. The first assigns all 97 * intersentence whitespace/control/format chars to some sentence, 98 * so all chars are in some sentence; is_sentence_boundary denotes 99 * the boundaries there. The second way doesn't assign 100 * between-sentence spaces, etc. to any sentence, so 101 * is_sentence_start/is_sentence_end mark the boundaries of those 102 * sentences. 103 */ 104 guint, "is_sentence_boundary", 1, 105 guint, "is_sentence_start", 1, /* first character in a sentence */ 106 guint, "is_sentence_end", 1, /* first non-sentence char after a sentence */ 107 108 /* If set, backspace deletes one character rather than 109 * the entire grapheme cluster. 110 */ 111 guint, "backspace_deletes_character", 1, 112 113 /* Only few space variants (U+0020 and U+00A0) have variable 114 * width during justification. 115 */ 116 guint, "is_expandable_space", 1, 117 118 /* Word boundary as defined by UAX#29 */ 119 guint, "is_word_boundary", 1, /* is NOT in the middle of a word */ 120 121 guint, "", 3)); 122 }; 123 124 /* Determine information about cluster/word/line breaks in a string 125 * of Unicode text. 126 */ 127 void pango_break (const(gchar) *text, 128 int length, 129 PangoAnalysis *analysis, 130 PangoLogAttr *attrs, 131 int attrs_len); 132 133 void pango_find_paragraph_boundary (const(gchar) *text, 134 gint length, 135 gint *paragraph_delimiter_index, 136 gint *next_paragraph_start); 137 138 void pango_get_log_attrs (const(char) *text, 139 int length, 140 int level, 141 PangoLanguage *language, 142 PangoLogAttr *log_attrs, 143 int attrs_len); 144 145 146 /* This is the default break algorithm, used if no language 147 * engine overrides it. Normally you should use pango_break() 148 * instead; this function is mostly useful for chaining up 149 * from a language engine override. 150 */ 151 void pango_default_break (const(gchar) *text, 152 int length, 153 PangoAnalysis *analysis, 154 PangoLogAttr *attrs, 155 int attrs_len); 156