691 lines
24 KiB
JSON
691 lines
24 KiB
JSON
{
|
||
"segments": {
|
||
"identity": {
|
||
"version": {
|
||
"_number": "$Revision: 13761 $",
|
||
"_cldrVersion": "32"
|
||
},
|
||
"language": "root"
|
||
},
|
||
"segmentations": {
|
||
"GraphemeClusterBreak": {
|
||
"variables": [
|
||
{
|
||
"$CR": "\\\\p{Grapheme_Cluster_Break=CR}"
|
||
},
|
||
{
|
||
"$LF": "\\\\p{Grapheme_Cluster_Break=LF}"
|
||
},
|
||
{
|
||
"$Control": "\\\\p{Grapheme_Cluster_Break=Control}"
|
||
},
|
||
{
|
||
"$Extend": "\\\\p{Grapheme_Cluster_Break=Extend}"
|
||
},
|
||
{
|
||
"$ZWJ": "\\\\p{Grapheme_Cluster_Break=ZWJ}"
|
||
},
|
||
{
|
||
"$RI": "\\\\p{Grapheme_Cluster_Break=Regional_Indicator}"
|
||
},
|
||
{
|
||
"$Prepend": "\\\\p{Grapheme_Cluster_Break=Prepend}"
|
||
},
|
||
{
|
||
"$SpacingMark": "\\\\p{Grapheme_Cluster_Break=SpacingMark}"
|
||
},
|
||
{
|
||
"$L": "\\\\p{Grapheme_Cluster_Break=L}"
|
||
},
|
||
{
|
||
"$V": "\\\\p{Grapheme_Cluster_Break=V}"
|
||
},
|
||
{
|
||
"$T": "\\\\p{Grapheme_Cluster_Break=T}"
|
||
},
|
||
{
|
||
"$LV": "\\\\p{Grapheme_Cluster_Break=LV}"
|
||
},
|
||
{
|
||
"$LVT": "\\\\p{Grapheme_Cluster_Break=LVT}"
|
||
},
|
||
{
|
||
"$E_Base": "\\\\p{Grapheme_Cluster_Break=E_Base}"
|
||
},
|
||
{
|
||
"$E_Modifier": "\\\\p{Grapheme_Cluster_Break=E_Modifier}"
|
||
},
|
||
{
|
||
"$EBG": "\\\\p{Grapheme_Cluster_Break=E_Base_GAZ}"
|
||
},
|
||
{
|
||
"$Extended_Pict": "[\\u2388\\u2605\\u2607-\\u260D\\u260F-\\u2610\\u2612\\u2616-\\u2617\\u2619-\\u261C\\u261E-\\u261F\\u2621\\u2624-\\u2625\\u2627-\\u2629\\u262B-\\u262D\\u2630-\\u2637\\u263B-\\u2647\\u2654-\\u265F\\u2661-\\u2662\\u2664\\u2667\\u2669-\\u267A\\u267C-\\u267E\\u2680-\\u2691\\u2695\\u2698\\u269A\\u269D-\\u269F\\u26A2-\\u26A9\\u26AC-\\u26AF\\u26B2-\\u26BC\\u26BF-\\u26C3\\u26C6-\\u26C7\\u26C9-\\u26CD\\u26D0\\u26D2\\u26D5-\\u26E8\\u26EB-\\u26EF\\u26F6\\u26FB-\\u26FC\\u26FE-\\u2701\\u2703-\\u2704\\u270E\\u2710-\\u2711\\u2765-\\u2767\\\\U0001F000-\\\\U0001F003\\\\U0001F005-\\\\U0001F0FF\\\\U0001F10D-\\\\U0001F10F\\\\U0001F12F\\\\U0001F16C-\\\\U0001F16F\\\\U0001F1AD-\\\\U0001F1E5\\\\U0001F203-\\\\U0001F20F\\\\U0001F23C-\\\\U0001F23F\\\\U0001F249-\\\\U0001F24F\\\\U0001F252-\\\\U0001F2FF\\\\U0001F322-\\\\U0001F323\\\\U0001F394-\\\\U0001F395\\\\U0001F398\\\\U0001F39C-\\\\U0001F39D\\\\U0001F3F1-\\\\U0001F3F2\\\\U0001F3F6\\\\U0001F4FE\\\\U0001F53E-\\\\U0001F548\\\\U0001F54F\\\\U0001F568-\\\\U0001F56E\\\\U0001F571-\\\\U0001F572\\\\U0001F57B-\\\\U0001F586\\\\U0001F588-\\\\U0001F589\\\\U0001F58E-\\\\U0001F58F\\\\U0001F591-\\\\U0001F594\\\\U0001F597-\\\\U0001F5A3\\\\U0001F5A6-\\\\U0001F5A7\\\\U0001F5A9-\\\\U0001F5B0\\\\U0001F5B3-\\\\U0001F5BB\\\\U0001F5BD-\\\\U0001F5C1\\\\U0001F5C5-\\\\U0001F5D0\\\\U0001F5D4-\\\\U0001F5DB\\\\U0001F5DF-\\\\U0001F5E0\\\\U0001F5E2\\\\U0001F5E4-\\\\U0001F5E7\\\\U0001F5E9-\\\\U0001F5EE\\\\U0001F5F0-\\\\U0001F5F2\\\\U0001F5F4-\\\\U0001F5F9\\\\U0001F6C6-\\\\U0001F6CA\\\\U0001F6D3-\\\\U0001F6DF\\\\U0001F6E6-\\\\U0001F6E8\\\\U0001F6EA\\\\U0001F6ED-\\\\U0001F6EF\\\\U0001F6F1-\\\\U0001F6F2\\\\U0001F6F7-\\\\U0001F6FF\\\\U0001F774-\\\\U0001F77F\\\\U0001F7D5-\\\\U0001F7FF\\\\U0001F80C-\\\\U0001F80F\\\\U0001F848-\\\\U0001F84F\\\\U0001F85A-\\\\U0001F85F\\\\U0001F888-\\\\U0001F88F\\\\U0001F8AE-\\\\U0001F90F\\\\U0001F91F\\\\U0001F928-\\\\U0001F92F\\\\U0001F931-\\\\U0001F932\\\\U0001F93F\\\\U0001F94C-\\\\U0001F94F\\\\U0001F95F-\\\\U0001F97F\\\\U0001F992-\\\\U0001F9BF\\\\U0001F9C1-\\\\U0001FFFD]"
|
||
},
|
||
{
|
||
"$EmojiNRK": "[[\\\\p{Emoji}] - [\\\\p{Grapheme_Cluster_Break=Regional_Indicator}*\\u00230-9©®™〰〽]]"
|
||
}
|
||
],
|
||
"segmentRules": {
|
||
"3": " $CR × $LF ",
|
||
"4": " ( $Control | $CR | $LF ) ÷ ",
|
||
"5": " ÷ ( $Control | $CR | $LF ) ",
|
||
"6": " $L × ( $L | $V | $LV | $LVT ) ",
|
||
"7": " ( $LV | $V ) × ( $V | $T ) ",
|
||
"8": " ( $LVT | $T) × $T ",
|
||
"9": " × ($Extend | $ZWJ) ",
|
||
"9.1": " × $SpacingMark ",
|
||
"9.2": " $Prepend × ",
|
||
"10": " ($E_Base | $EBG) $Extend* × $E_Modifier ",
|
||
"11": " ($Extended_Pict | $EmojiNRK) $Extend* $ZWJ × ($Extended_Pict | $EmojiNRK) ",
|
||
"12": " ^ ($RI $RI)* $RI × $RI ",
|
||
"13": " [^$RI] ($RI $RI)* $RI × $RI "
|
||
}
|
||
},
|
||
"LineBreak": {
|
||
"variables": [
|
||
{
|
||
"$AI": "\\\\p{Line_Break=Ambiguous}"
|
||
},
|
||
{
|
||
"$AL": "\\\\p{Line_Break=Alphabetic}"
|
||
},
|
||
{
|
||
"$B2": "\\\\p{Line_Break=Break_Both}"
|
||
},
|
||
{
|
||
"$BA": "\\\\p{Line_Break=Break_After}"
|
||
},
|
||
{
|
||
"$BB": "\\\\p{Line_Break=Break_Before}"
|
||
},
|
||
{
|
||
"$BK": "\\\\p{Line_Break=Mandatory_Break}"
|
||
},
|
||
{
|
||
"$CB": "\\\\p{Line_Break=Contingent_Break}"
|
||
},
|
||
{
|
||
"$CL": "\\\\p{Line_Break=Close_Punctuation}"
|
||
},
|
||
{
|
||
"$CP": "\\\\p{Line_Break=CP}"
|
||
},
|
||
{
|
||
"$CM1": "\\\\p{Line_Break=Combining_Mark}"
|
||
},
|
||
{
|
||
"$CR": "\\\\p{Line_Break=Carriage_Return}"
|
||
},
|
||
{
|
||
"$EX": "\\\\p{Line_Break=Exclamation}"
|
||
},
|
||
{
|
||
"$GL": "\\\\p{Line_Break=Glue}"
|
||
},
|
||
{
|
||
"$H2": "\\\\p{Line_Break=H2}"
|
||
},
|
||
{
|
||
"$H3": "\\\\p{Line_Break=H3}"
|
||
},
|
||
{
|
||
"$HL": "\\\\p{Line_Break=HL}"
|
||
},
|
||
{
|
||
"$HY": "\\\\p{Line_Break=Hyphen}"
|
||
},
|
||
{
|
||
"$ID": "\\\\p{Line_Break=Ideographic}"
|
||
},
|
||
{
|
||
"$IN": "\\\\p{Line_Break=Inseparable}"
|
||
},
|
||
{
|
||
"$IS": "\\\\p{Line_Break=Infix_Numeric}"
|
||
},
|
||
{
|
||
"$JL": "\\\\p{Line_Break=JL}"
|
||
},
|
||
{
|
||
"$JT": "\\\\p{Line_Break=JT}"
|
||
},
|
||
{
|
||
"$JV": "\\\\p{Line_Break=JV}"
|
||
},
|
||
{
|
||
"$LF": "\\\\p{Line_Break=Line_Feed}"
|
||
},
|
||
{
|
||
"$NL": "\\\\p{Line_Break=Next_Line}"
|
||
},
|
||
{
|
||
"$NS": "\\\\p{Line_Break=Nonstarter}"
|
||
},
|
||
{
|
||
"$NU": "\\\\p{Line_Break=Numeric}"
|
||
},
|
||
{
|
||
"$OP": "\\\\p{Line_Break=Open_Punctuation}"
|
||
},
|
||
{
|
||
"$PO": "\\\\p{Line_Break=Postfix_Numeric}"
|
||
},
|
||
{
|
||
"$PR": "\\\\p{Line_Break=Prefix_Numeric}"
|
||
},
|
||
{
|
||
"$QU": "\\\\p{Line_Break=Quotation}"
|
||
},
|
||
{
|
||
"$SA": "\\\\p{Line_Break=Complex_Context}"
|
||
},
|
||
{
|
||
"$SG": "\\\\p{Line_Break=Surrogate}"
|
||
},
|
||
{
|
||
"$SP": "\\\\p{Line_Break=Space}"
|
||
},
|
||
{
|
||
"$SY": "\\\\p{Line_Break=Break_Symbols}"
|
||
},
|
||
{
|
||
"$WJ": "\\\\p{Line_Break=Word_Joiner}"
|
||
},
|
||
{
|
||
"$XX": "\\\\p{Line_Break=Unknown}"
|
||
},
|
||
{
|
||
"$ZW": "\\\\p{Line_Break=ZWSpace}"
|
||
},
|
||
{
|
||
"$CJ": "\\\\p{Line_Break=Conditional_Japanese_Starter}"
|
||
},
|
||
{
|
||
"$RI": "\\\\p{Line_Break=Regional_Indicator}"
|
||
},
|
||
{
|
||
"$EB": "\\\\p{Line_Break=E_Base}"
|
||
},
|
||
{
|
||
"$EM": "\\\\p{Line_Break=E_Modifier}"
|
||
},
|
||
{
|
||
"$ZWJ_O": "\\\\p{Line_Break=ZWJ}"
|
||
},
|
||
{
|
||
"$ZWJ": "\\\\p{Line_Break=ZWJ}"
|
||
},
|
||
{
|
||
"$EmojiNRK": "[[\\\\p{Emoji}] - [$RI \\u002a\\u00230-9©®™〰〽]]"
|
||
},
|
||
{
|
||
"$Extended_Pict": "[\\u2388\\u2605\\u2607-\\u260D\\u260F-\\u2610\\u2612\\u2616-\\u2617\\u2619-\\u261C\\u261E-\\u261F\\u2621\\u2624-\\u2625\\u2627-\\u2629\\u262B-\\u262D\\u2630-\\u2637\\u263B-\\u2647\\u2654-\\u265F\\u2661-\\u2662\\u2664\\u2667\\u2669-\\u267A\\u267C-\\u267E\\u2680-\\u2691\\u2695\\u2698\\u269A\\u269D-\\u269F\\u26A2-\\u26A9\\u26AC-\\u26AF\\u26B2-\\u26BC\\u26BF-\\u26C3\\u26C6-\\u26C7\\u26C9-\\u26CD\\u26D0\\u26D2\\u26D5-\\u26E8\\u26EB-\\u26EF\\u26F6\\u26FB-\\u26FC\\u26FE-\\u2701\\u2703-\\u2704\\u270E\\u2710-\\u2711\\u2765-\\u2767\\\\U0001F000-\\\\U0001F003\\\\U0001F005-\\\\U0001F0FF\\\\U0001F10D-\\\\U0001F10F\\\\U0001F12F\\\\U0001F16C-\\\\U0001F16F\\\\U0001F1AD-\\\\U0001F1E5\\\\U0001F203-\\\\U0001F20F\\\\U0001F23C-\\\\U0001F23F\\\\U0001F249-\\\\U0001F24F\\\\U0001F252-\\\\U0001F2FF\\\\U0001F322-\\\\U0001F323\\\\U0001F394-\\\\U0001F395\\\\U0001F398\\\\U0001F39C-\\\\U0001F39D\\\\U0001F3F1-\\\\U0001F3F2\\\\U0001F3F6\\\\U0001F4FE\\\\U0001F53E-\\\\U0001F548\\\\U0001F54F\\\\U0001F568-\\\\U0001F56E\\\\U0001F571-\\\\U0001F572\\\\U0001F57B-\\\\U0001F586\\\\U0001F588-\\\\U0001F589\\\\U0001F58E-\\\\U0001F58F\\\\U0001F591-\\\\U0001F594\\\\U0001F597-\\\\U0001F5A3\\\\U0001F5A6-\\\\U0001F5A7\\\\U0001F5A9-\\\\U0001F5B0\\\\U0001F5B3-\\\\U0001F5BB\\\\U0001F5BD-\\\\U0001F5C1\\\\U0001F5C5-\\\\U0001F5D0\\\\U0001F5D4-\\\\U0001F5DB\\\\U0001F5DF-\\\\U0001F5E0\\\\U0001F5E2\\\\U0001F5E4-\\\\U0001F5E7\\\\U0001F5E9-\\\\U0001F5EE\\\\U0001F5F0-\\\\U0001F5F2\\\\U0001F5F4-\\\\U0001F5F9\\\\U0001F6C6-\\\\U0001F6CA\\\\U0001F6D3-\\\\U0001F6DF\\\\U0001F6E6-\\\\U0001F6E8\\\\U0001F6EA\\\\U0001F6ED-\\\\U0001F6EF\\\\U0001F6F1-\\\\U0001F6F2\\\\U0001F6F7-\\\\U0001F6FF\\\\U0001F774-\\\\U0001F77F\\\\U0001F7D5-\\\\U0001F7FF\\\\U0001F80C-\\\\U0001F80F\\\\U0001F848-\\\\U0001F84F\\\\U0001F85A-\\\\U0001F85F\\\\U0001F888-\\\\U0001F88F\\\\U0001F8AE-\\\\U0001F90F\\\\U0001F91F\\\\U0001F928-\\\\U0001F92F\\\\U0001F931-\\\\U0001F932\\\\U0001F93F\\\\U0001F94C-\\\\U0001F94F\\\\U0001F95F-\\\\U0001F97F\\\\U0001F992-\\\\U0001F9BF\\\\U0001F9C1-\\\\U0001FFFD]"
|
||
},
|
||
{
|
||
"$CM": "[$CM1 $ZWJ]"
|
||
},
|
||
{
|
||
"$AL": "[$AI $AL $SG $XX [$SA-[[:Mn:][:Mc:]]]]"
|
||
},
|
||
{
|
||
"$CM": "[$CM $ZWJ [$SA & [[:Mn:][:Mc:]]]]"
|
||
},
|
||
{
|
||
"$NS": "[$NS $CJ]"
|
||
},
|
||
{
|
||
"$X": "$CM*"
|
||
},
|
||
{
|
||
"$Spec1_": "[$SP $BK $CR $LF $NL $ZW]"
|
||
},
|
||
{
|
||
"$Spec2_": "[^ $SP $BK $CR $LF $NL $ZW]"
|
||
},
|
||
{
|
||
"$Spec3a_": "[^ $SP $BA $HY $CM]"
|
||
},
|
||
{
|
||
"$Spec3b_": "[^ $BA $HY $CM]"
|
||
},
|
||
{
|
||
"$Spec4_": "[^ $NU $CM]"
|
||
},
|
||
{
|
||
"$AI": "($AI $X)"
|
||
},
|
||
{
|
||
"$AL": "($AL $X)"
|
||
},
|
||
{
|
||
"$B2": "($B2 $X)"
|
||
},
|
||
{
|
||
"$BA": "($BA $X)"
|
||
},
|
||
{
|
||
"$BB": "($BB $X)"
|
||
},
|
||
{
|
||
"$CB": "($CB $X)"
|
||
},
|
||
{
|
||
"$CL": "($CL $X)"
|
||
},
|
||
{
|
||
"$CP": "($CP $X)"
|
||
},
|
||
{
|
||
"$CM": "($CM $X)"
|
||
},
|
||
{
|
||
"$EX": "($EX $X)"
|
||
},
|
||
{
|
||
"$GL": "($GL $X)"
|
||
},
|
||
{
|
||
"$H2": "($H2 $X)"
|
||
},
|
||
{
|
||
"$H3": "($H3 $X)"
|
||
},
|
||
{
|
||
"$HL": "($HL $X)"
|
||
},
|
||
{
|
||
"$HY": "($HY $X)"
|
||
},
|
||
{
|
||
"$ID": "($ID $X)"
|
||
},
|
||
{
|
||
"$IN": "($IN $X)"
|
||
},
|
||
{
|
||
"$IS": "($IS $X)"
|
||
},
|
||
{
|
||
"$JL": "($JL $X)"
|
||
},
|
||
{
|
||
"$JT": "($JT $X)"
|
||
},
|
||
{
|
||
"$JV": "($JV $X)"
|
||
},
|
||
{
|
||
"$NS": "($NS $X)"
|
||
},
|
||
{
|
||
"$NU": "($NU $X)"
|
||
},
|
||
{
|
||
"$OP": "($OP $X)"
|
||
},
|
||
{
|
||
"$PO": "($PO $X)"
|
||
},
|
||
{
|
||
"$PR": "($PR $X)"
|
||
},
|
||
{
|
||
"$QU": "($QU $X)"
|
||
},
|
||
{
|
||
"$SA": "($SA $X)"
|
||
},
|
||
{
|
||
"$SG": "($SG $X)"
|
||
},
|
||
{
|
||
"$SY": "($SY $X)"
|
||
},
|
||
{
|
||
"$WJ": "($WJ $X)"
|
||
},
|
||
{
|
||
"$XX": "($XX $X)"
|
||
},
|
||
{
|
||
"$RI": "($RI $X)"
|
||
},
|
||
{
|
||
"$EB": "($EB $X)"
|
||
},
|
||
{
|
||
"$EM": "($EM $X)"
|
||
},
|
||
{
|
||
"$ZWJ": "($ZWJ $X)"
|
||
},
|
||
{
|
||
"$EmojiNRK": "($EmojiNRK $X)"
|
||
},
|
||
{
|
||
"$Extended_Pict": "($Extended_Pict $X)"
|
||
},
|
||
{
|
||
"$AL": "($AL | ^ $CM | (?<=$Spec1_) $CM)"
|
||
}
|
||
],
|
||
"segmentRules": {
|
||
"4": " $BK ÷ ",
|
||
"5.01": " $CR × $LF ",
|
||
"5.02": " $CR ÷ ",
|
||
"5.03": " $LF ÷ ",
|
||
"5.04": " $NL ÷ ",
|
||
"6": " × ( $BK | $CR | $LF | $NL ) ",
|
||
"7.01": " × $SP ",
|
||
"7.02": " × $ZW ",
|
||
"8": " $ZW $SP* ÷ ",
|
||
"8.1": " $ZWJ_0 × ($ID | $Extended_Pict | $EmojiNRK) ",
|
||
"9": " $Spec2_ × $CM ",
|
||
"11.01": " × $WJ ",
|
||
"11.02": " $WJ × ",
|
||
"12": " $GL × ",
|
||
"12.1": " $Spec3a_ × $GL ",
|
||
"12.2": " $Spec3b_ $CM+ × $GL ",
|
||
"12.3": " ^ $CM+ × $GL ",
|
||
"13.01": " × $EX ",
|
||
"13.02": " $Spec4_ × ($CL | $CP | $IS | $SY) ",
|
||
"13.03": " $Spec4_ $CM+ × ($CL | $CP | $IS | $SY) ",
|
||
"13.04": " ^ $CM+ × ($CL | $CP | $IS | $SY) ",
|
||
"14": " $OP $SP* × ",
|
||
"15": " $QU $SP* × $OP ",
|
||
"16": " ($CL | $CP) $SP* × $NS ",
|
||
"17": " $B2 $SP* × $B2 ",
|
||
"18": " $SP ÷ ",
|
||
"19.01": " × $QU ",
|
||
"19.02": " $QU × ",
|
||
"20.01": " ÷ $CB ",
|
||
"20.02": " $CB ÷ ",
|
||
"21.01": " × $BA ",
|
||
"21.02": " × $HY ",
|
||
"21.03": " × $NS ",
|
||
"21.04": " $BB × ",
|
||
"21.1": " $HL ($HY | $BA) × ",
|
||
"21.2": " $SY × $HL ",
|
||
"22.01": " ($AL | $HL) × $IN ",
|
||
"22.02": " $EX × $IN ",
|
||
"22.03": " ($ID | $EB | $EM) × $IN ",
|
||
"22.04": " $IN × $IN ",
|
||
"22.05": " $NU × $IN ",
|
||
"23.02": " ($AL | $HL) × $NU ",
|
||
"23.03": " $NU × ($AL | $HL) ",
|
||
"23.12": " $PR × ($ID | $EB | $EM) ",
|
||
"23.13": " ($ID | $EB | $EM) × $PO ",
|
||
"24.02": " ($PR | $PO) × ($AL | $HL) ",
|
||
"24.03": " ($AL | $HL) × ($PR | $PO) ",
|
||
"25.01": " ($PR | $PO) × ( $OP | $HY )? $NU ",
|
||
"25.02": " ( $OP | $HY ) × $NU ",
|
||
"25.03": " $NU × ($NU | $SY | $IS) ",
|
||
"25.04": " $NU ($NU | $SY | $IS)* × ($NU | $SY | $IS | $CL | $CP) ",
|
||
"25.05": " $NU ($NU | $SY | $IS)* ($CL | $CP)? × ($PO | $PR) ",
|
||
"26.01": " $JL × $JL | $JV | $H2 | $H3 ",
|
||
"26.02": " $JV | $H2 × $JV | $JT ",
|
||
"26.03": " $JT | $H3 × $JT ",
|
||
"27.01": " $JL | $JV | $JT | $H2 | $H3 × $IN ",
|
||
"27.02": " $JL | $JV | $JT | $H2 | $H3 × $PO ",
|
||
"27.03": " $PR × $JL | $JV | $JT | $H2 | $H3 ",
|
||
"28": " ($AL | $HL) × ($AL | $HL) ",
|
||
"29": " $IS × ($AL | $HL) ",
|
||
"30.01": " ($AL | $HL | $NU) × $OP ",
|
||
"30.02": " $CP × ($AL | $HL | $NU) ",
|
||
"30.11": " ^ ($RI $RI)* $RI × $RI ",
|
||
"30.12": " [^$RI] ($RI $RI)* $RI × $RI ",
|
||
"30.13": " $RI ÷ $RI ",
|
||
"30.2": " $EB × $EM "
|
||
}
|
||
},
|
||
"SentenceBreak": {
|
||
"variables": [
|
||
{
|
||
"$CR": "\\\\p{Sentence_Break=CR}"
|
||
},
|
||
{
|
||
"$LF": "\\\\p{Sentence_Break=LF}"
|
||
},
|
||
{
|
||
"$Extend": "\\\\p{Sentence_Break=Extend}"
|
||
},
|
||
{
|
||
"$Format": "\\\\p{Sentence_Break=Format}"
|
||
},
|
||
{
|
||
"$Sep": "\\\\p{Sentence_Break=Sep}"
|
||
},
|
||
{
|
||
"$Sp": "\\\\p{Sentence_Break=Sp}"
|
||
},
|
||
{
|
||
"$Lower": "\\\\p{Sentence_Break=Lower}"
|
||
},
|
||
{
|
||
"$Upper": "\\\\p{Sentence_Break=Upper}"
|
||
},
|
||
{
|
||
"$OLetter": "\\\\p{Sentence_Break=OLetter}"
|
||
},
|
||
{
|
||
"$Numeric": "\\\\p{Sentence_Break=Numeric}"
|
||
},
|
||
{
|
||
"$ATerm": "\\\\p{Sentence_Break=ATerm}"
|
||
},
|
||
{
|
||
"$STerm": "\\\\p{Sentence_Break=STerm}"
|
||
},
|
||
{
|
||
"$Close": "\\\\p{Sentence_Break=Close}"
|
||
},
|
||
{
|
||
"$SContinue": "\\\\p{Sentence_Break=SContinue}"
|
||
},
|
||
{
|
||
"$Any": "."
|
||
},
|
||
{
|
||
"$FE": "[$Format $Extend]"
|
||
},
|
||
{
|
||
"$NotPreLower_": "[^ $OLetter $Upper $Lower $Sep $CR $LF $STerm $ATerm]"
|
||
},
|
||
{
|
||
"$Sp": "($Sp $FE*)"
|
||
},
|
||
{
|
||
"$Lower": "($Lower $FE*)"
|
||
},
|
||
{
|
||
"$Upper": "($Upper $FE*)"
|
||
},
|
||
{
|
||
"$OLetter": "($OLetter $FE*)"
|
||
},
|
||
{
|
||
"$Numeric": "($Numeric $FE*)"
|
||
},
|
||
{
|
||
"$ATerm": "($ATerm $FE*)"
|
||
},
|
||
{
|
||
"$STerm": "($STerm $FE*)"
|
||
},
|
||
{
|
||
"$Close": "($Close $FE*)"
|
||
},
|
||
{
|
||
"$SContinue": "($SContinue $FE*)"
|
||
},
|
||
{
|
||
"$ParaSep": "($Sep | $CR | $LF)"
|
||
},
|
||
{
|
||
"$SATerm": "($STerm | $ATerm)"
|
||
}
|
||
],
|
||
"segmentRules": {
|
||
"3": " $CR × $LF ",
|
||
"4": " $ParaSep ÷ ",
|
||
"5": " × [$Format $Extend] ",
|
||
"6": " $ATerm × $Numeric ",
|
||
"7": " ($Upper | $Lower) $ATerm × $Upper ",
|
||
"8": " $ATerm $Close* $Sp* × $NotPreLower_* $Lower ",
|
||
"8.1": " $SATerm $Close* $Sp* × ($SContinue | $SATerm) ",
|
||
"9": " $SATerm $Close* × ( $Close | $Sp | $ParaSep ) ",
|
||
"10": " $SATerm $Close* $Sp* × ( $Sp | $ParaSep ) ",
|
||
"11": " $SATerm $Close* $Sp* $ParaSep? ÷ ",
|
||
"998": " × $Any "
|
||
},
|
||
"standard": {}
|
||
},
|
||
"WordBreak": {
|
||
"variables": [
|
||
{
|
||
"$CR": "\\\\p{Word_Break=CR}"
|
||
},
|
||
{
|
||
"$LF": "\\\\p{Word_Break=LF}"
|
||
},
|
||
{
|
||
"$Newline": "\\\\p{Word_Break=Newline}"
|
||
},
|
||
{
|
||
"$Extend": "\\\\p{Word_Break=Extend}"
|
||
},
|
||
{
|
||
"$Format": "\\\\p{Word_Break=Format}"
|
||
},
|
||
{
|
||
"$Katakana": "\\\\p{Word_Break=Katakana}"
|
||
},
|
||
{
|
||
"$ALetter": "\\\\p{Word_Break=ALetter}"
|
||
},
|
||
{
|
||
"$MidLetter": "\\\\p{Word_Break=MidLetter}"
|
||
},
|
||
{
|
||
"$MidNum": "\\\\p{Word_Break=MidNum}"
|
||
},
|
||
{
|
||
"$MidNumLet": "\\\\p{Word_Break=MidNumLet}"
|
||
},
|
||
{
|
||
"$Numeric": "\\\\p{Word_Break=Numeric}"
|
||
},
|
||
{
|
||
"$ExtendNumLet": "\\\\p{Word_Break=ExtendNumLet}"
|
||
},
|
||
{
|
||
"$RI": "\\\\p{Word_Break=Regional_Indicator}"
|
||
},
|
||
{
|
||
"$Hebrew_Letter": "\\\\p{Word_Break=Hebrew_Letter}"
|
||
},
|
||
{
|
||
"$Double_Quote": "\\\\p{Word_Break=Double_Quote}"
|
||
},
|
||
{
|
||
"$Single_Quote": "\\\\p{Word_Break=Single_Quote}"
|
||
},
|
||
{
|
||
"$E_Base": "\\\\p{Word_Break=E_Base}"
|
||
},
|
||
{
|
||
"$E_Modifier": "\\\\p{Word_Break=E_Modifier}"
|
||
},
|
||
{
|
||
"$ZWJ": "\\\\p{Word_Break=ZWJ}"
|
||
},
|
||
{
|
||
"$EBG": "\\\\p{Word_Break=E_Base_GAZ}"
|
||
},
|
||
{
|
||
"$Extended_Pict": "[\\u2388\\u2605\\u2607-\\u260D\\u260F-\\u2610\\u2612\\u2616-\\u2617\\u2619-\\u261C\\u261E-\\u261F\\u2621\\u2624-\\u2625\\u2627-\\u2629\\u262B-\\u262D\\u2630-\\u2637\\u263B-\\u2647\\u2654-\\u265F\\u2661-\\u2662\\u2664\\u2667\\u2669-\\u267A\\u267C-\\u267E\\u2680-\\u2691\\u2695\\u2698\\u269A\\u269D-\\u269F\\u26A2-\\u26A9\\u26AC-\\u26AF\\u26B2-\\u26BC\\u26BF-\\u26C3\\u26C6-\\u26C7\\u26C9-\\u26CD\\u26D0\\u26D2\\u26D5-\\u26E8\\u26EB-\\u26EF\\u26F6\\u26FB-\\u26FC\\u26FE-\\u2701\\u2703-\\u2704\\u270E\\u2710-\\u2711\\u2765-\\u2767\\\\U0001F000-\\\\U0001F003\\\\U0001F005-\\\\U0001F0FF\\\\U0001F10D-\\\\U0001F10F\\\\U0001F12F\\\\U0001F16C-\\\\U0001F16F\\\\U0001F1AD-\\\\U0001F1E5\\\\U0001F203-\\\\U0001F20F\\\\U0001F23C-\\\\U0001F23F\\\\U0001F249-\\\\U0001F24F\\\\U0001F252-\\\\U0001F2FF\\\\U0001F322-\\\\U0001F323\\\\U0001F394-\\\\U0001F395\\\\U0001F398\\\\U0001F39C-\\\\U0001F39D\\\\U0001F3F1-\\\\U0001F3F2\\\\U0001F3F6\\\\U0001F4FE\\\\U0001F53E-\\\\U0001F548\\\\U0001F54F\\\\U0001F568-\\\\U0001F56E\\\\U0001F571-\\\\U0001F572\\\\U0001F57B-\\\\U0001F586\\\\U0001F588-\\\\U0001F589\\\\U0001F58E-\\\\U0001F58F\\\\U0001F591-\\\\U0001F594\\\\U0001F597-\\\\U0001F5A3\\\\U0001F5A6-\\\\U0001F5A7\\\\U0001F5A9-\\\\U0001F5B0\\\\U0001F5B3-\\\\U0001F5BB\\\\U0001F5BD-\\\\U0001F5C1\\\\U0001F5C5-\\\\U0001F5D0\\\\U0001F5D4-\\\\U0001F5DB\\\\U0001F5DF-\\\\U0001F5E0\\\\U0001F5E2\\\\U0001F5E4-\\\\U0001F5E7\\\\U0001F5E9-\\\\U0001F5EE\\\\U0001F5F0-\\\\U0001F5F2\\\\U0001F5F4-\\\\U0001F5F9\\\\U0001F6C6-\\\\U0001F6CA\\\\U0001F6D3-\\\\U0001F6DF\\\\U0001F6E6-\\\\U0001F6E8\\\\U0001F6EA\\\\U0001F6ED-\\\\U0001F6EF\\\\U0001F6F1-\\\\U0001F6F2\\\\U0001F6F7-\\\\U0001F6FF\\\\U0001F774-\\\\U0001F77F\\\\U0001F7D5-\\\\U0001F7FF\\\\U0001F80C-\\\\U0001F80F\\\\U0001F848-\\\\U0001F84F\\\\U0001F85A-\\\\U0001F85F\\\\U0001F888-\\\\U0001F88F\\\\U0001F8AE-\\\\U0001F90F\\\\U0001F91F\\\\U0001F928-\\\\U0001F92F\\\\U0001F931-\\\\U0001F932\\\\U0001F93F\\\\U0001F94C-\\\\U0001F94F\\\\U0001F95F-\\\\U0001F97F\\\\U0001F992-\\\\U0001F9BF\\\\U0001F9C1-\\\\U0001FFFD]"
|
||
},
|
||
{
|
||
"$EmojiNRK": "[[\\\\p{Emoji}] - [\\\\p{Word_Break=Regional_Indicator}\\u002a\\u00230-9©®™〰〽]]"
|
||
},
|
||
{
|
||
"$AHLetter": "($ALetter | $Hebrew_Letter)"
|
||
},
|
||
{
|
||
"$MidNumLetQ": "($MidNumLet | $Single_Quote)"
|
||
},
|
||
{
|
||
"$FE": "[$Format $Extend $ZWJ]"
|
||
},
|
||
{
|
||
"$NotBreak_": "[^ $Newline $CR $LF ]"
|
||
},
|
||
{
|
||
"$Katakana": "($Katakana $FE*)"
|
||
},
|
||
{
|
||
"$ALetter": "($ALetter $FE*)"
|
||
},
|
||
{
|
||
"$MidLetter": "($MidLetter $FE*)"
|
||
},
|
||
{
|
||
"$MidNum": "($MidNum $FE*)"
|
||
},
|
||
{
|
||
"$MidNumLet": "($MidNumLet $FE*)"
|
||
},
|
||
{
|
||
"$Numeric": "($Numeric $FE*)"
|
||
},
|
||
{
|
||
"$ExtendNumLet": "($ExtendNumLet $FE*)"
|
||
},
|
||
{
|
||
"$RI": "($RI $FE*)"
|
||
},
|
||
{
|
||
"$Hebrew_Letter": "($Hebrew_Letter $FE*)"
|
||
},
|
||
{
|
||
"$Double_Quote": "($Double_Quote $FE*)"
|
||
},
|
||
{
|
||
"$Single_Quote": "($Single_Quote $FE*)"
|
||
},
|
||
{
|
||
"$AHLetter": "($AHLetter $FE*)"
|
||
},
|
||
{
|
||
"$MidNumLetQ": "($MidNumLetQ $FE*)"
|
||
}
|
||
],
|
||
"segmentRules": {
|
||
"3": " $CR × $LF ",
|
||
"3.1": " ($Newline | $CR | $LF) ÷ ",
|
||
"3.2": " ÷ ($Newline | $CR | $LF) ",
|
||
"3.3": " $ZWJ × ($Extended_Pict | $EmojiNRK) ",
|
||
"4": " $NotBreak_ × [$Format $Extend $ZWJ] ",
|
||
"5": " $AHLetter × $AHLetter ",
|
||
"6": " $AHLetter × ($MidLetter | $MidNumLetQ) $AHLetter ",
|
||
"7": " $AHLetter ($MidLetter | $MidNumLetQ) × $AHLetter ",
|
||
"7.1": " $Hebrew_Letter × $Single_Quote ",
|
||
"7.2": " $Hebrew_Letter × $Double_Quote $Hebrew_Letter ",
|
||
"7.3": " $Hebrew_Letter $Double_Quote × $Hebrew_Letter ",
|
||
"8": " $Numeric × $Numeric ",
|
||
"9": " $AHLetter × $Numeric ",
|
||
"10": " $Numeric × $AHLetter ",
|
||
"11": " $Numeric ($MidNum | $MidNumLetQ) × $Numeric ",
|
||
"12": " $Numeric × ($MidNum | $MidNumLetQ) $Numeric ",
|
||
"13": " $Katakana × $Katakana ",
|
||
"13.1": " ($AHLetter | $Numeric | $Katakana | $ExtendNumLet) × $ExtendNumLet ",
|
||
"13.2": " $ExtendNumLet × ($AHLetter | $Numeric | $Katakana) ",
|
||
"14": " ($E_Base | $EBG) ($Format | $Extend | $ZWJ)* × $E_Modifier ",
|
||
"15": " ^ ($RI $RI)* $RI × $RI ",
|
||
"16": " [^$RI] ($RI $RI)* $RI × $RI "
|
||
}
|
||
}
|
||
}
|
||
}
|
||
}
|