Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- $anusvara = [\u1032\u1036];
- $asat = \u103a;
- $consonant = [\u1000-\u1020\u103f\u104e];
- $consonant_na = \u1014;
- $consonant_nya = \u1009;
- $consonant_nnya = \u100a;
- $digit_except_zero = [\u1041-\u1049];
- $digit_zero = \u1040;
- $dot_below = \u1037;
- $dot_below_cluster = $dot_below $asat?;
- $generic_base = [\u00A0\u00D7\u2012-\u2015\u2022\u25CC\u25FB-\u25FE];
- $halant = \u1039;
- $independent_vowel = [\u1021-\u102A];
- $joiner = [\u200C\u200D];
- $kinzi_start = \u1004;
- $kinzi = $kinzi_start \u103A \u1039;
- $medial_consonant_h = \u103E;
- $medial_consonant_r = \u103C;
- $medial_consonant_w = \u103D;
- $medial_consonant_y = \u103B;
- $medial_consonant_w_cluster = $medial_consonant_w $asat?;
- $medial_consonant_h_cluster = $medial_consonant_w? $medial_consonant_h $asat?;
- $common = [\p{Common}];
- $punctuation = [\u104A\u104B];
- $reserved = [\uAA7C-\uAA7F];
- $symbol = [\u104C\u104D\u104F];
- $visarga = \u1038;
- $vowel_above = [\u102D\u102E];
- $vowel_below = [\u102F\u1030];
- $vowel_pre = \u1031;
- $vowel_post = [\u102B\u102C];
- $vowel_post_prefix = $vowel_post $medial_consonant_h? $asat*
- $vowel_above* $anusvara*;
- $vowel_post_cluster = $vowel_post_prefix $dot_below? $asat?;
- $vowel_u = \u102F;
- $vowel_uu = \u1030;
- $variation_selector = [\uFE00-\uFE0F];
- $word_joiner = \u2060;
- $ws = [\p{Whitespace}];
- $cluster_base = [$consonant $independent_vowel $digit_except_zero
- $generic_base];
- $cluster_base_vs = $cluster_base $variation_selector?;
- $halant_cluster = $halant [$consonant $independent_vowel]
- $variation_selector?;
- $cluster_base_short_narrow = [\u1001\u1002\u1004\u1005\u1007\u100e
- \u1012\u1013\u1015\u1016\u1017\u1019\u101d];
- $cluster_base_short_wide = [\u1000\u1003\u1006\u100f\u1010\u1011
- \u1018\u101a\u101c\u101e\u101f\u1021];
- $cluster_base_tall_narrow = [\u100b\u100c\u100d\u1014\u101b\u1020
- \u1025\u1026\u1028];
- $cluster_base_tall_wide = [\u1008\u1009\u100a\u1023\u1024\u1029\u102a];
- $cluster_base_tall = [$cluster_base_tall_narrow $cluster_base_tall_wide];
- $cluster_base_wide = [$cluster_base_short_wide $cluster_base_tall_wide];
- $cluster_base_narrow = [$cluster_base_short_narrow $cluster_base_tall_narrow];
- # PUA placeholders for code points which are re-used for different meanings
- $placeholder_medial_consonant_r = \uE100;
- $placeholder_medial_consonant_y = \uE101;
- $placeholder_medial_consonant_w = \uE102;
- $placeholder_medial_consonant_h = \uE103;
- $placeholder_medial_consonant_w_cluster = $placeholder_medial_consonant_w $asat?;
- $placeholder_medial_consonant_h_cluster = $placeholder_medial_consonant_w? $placeholder_medial_consonant_h $asat?;
- # Zawgyi versions of Unicode code points
- $zawgyi_asat = \u1039;
- $zawgyi_consonant_na_without_tail = \u108f;
- $zawgyi_consonant_nya_without_tail = \u106a;
- $zawgyi_consonant_nnya_without_tail = \u106b;
- $zawgyi_medial_r_wide = \u107e;
- $zawgyi_medial_r_narrow = \u103b;
- $zawgyi_medial_r_wide_short_top = \u1080;
- $zawgyi_medial_r_narrow_short_top = \u107f;
- $zawgyi_medial_r_wide_short_bottom = \u1082;
- $zawgyi_medial_r_narrow_short_bottom = \u1081;
- # TODO: Two zawgyi versions of medial Y (also U+103A), figure out which ya pin to use
- $zawgyi_medial_consonant_y = \u107d;
- $zawgyi_medial_consonant_w = \u103c;
- $zawgyi_medial_consonant_h = \u103d;
- $zawgyi_vowel_u_post = \u1033;
- $zawgyi_vowel_uu_post = \u1034;
- $zawgyi_dot_below_right = \u1094;
- # Logical to visual order: Cluster terminating in halant
- ($kinzi?) ($cluster_base_vs) ($halant_cluster*) ($halant) > $2 $1 $3 $4;
- # Logical to visual order: Complex cluster
- # VISUAL ORDER:
- #
- # vowel_pre medial_consonant_r cluster_base kinzi? halant_cluster*
- # asat medial_consonant_y medial_consonant_w_cluster medial_consonant_h_cluster
- # vowel_above anusvara vowel_below dot_below_cluster vowel_post_cluster visarga
- # joiner
- ($kinzi?) ($cluster_base_vs) ($halant_cluster*)
- ($asat?) ($medial_consonant_y?) ($medial_consonant_r?)
- ($medial_consonant_w_cluster)? ($medial_consonant_h_cluster)?
- ($vowel_pre*) ($vowel_above*) ($vowel_below*) ($anusvara*) ($dot_below_cluster)?
- ($vowel_post_cluster)* ($visarga*) ($joiner?) > $9 $6 $2 $1 $3 $4 $5 $7 $8 $10 $12 $11 $13 $14 $15 $16;
- ::Null;
- $medial_consonant_r > $placeholder_medial_consonant_r;
- $medial_consonant_y > $placeholder_medial_consonant_y;
- $medial_consonant_w > $placeholder_medial_consonant_w;
- $medial_consonant_h > $placeholder_medial_consonant_h;
- ::Null;
- # Na loses its tail with consonant cluster, vowels, or medial consonants below)
- $consonant_na } $kinzi? $vowel_above? [$halant $placeholder_medial_consonant_r $placeholder_medial_consonant_y $placeholder_medial_consonant_w $placeholder_medial_consonant_h $vowel_below] > $zawgyi_consonant_na_without_tail;
- # Nya + lower diacritic loses right side of tail
- $consonant_nya } $kinzi? $vowel_above? [$halant $placeholder_medial_consonant_r $placeholder_medial_consonant_y $placeholder_medial_consonant_w $placeholder_medial_consonant_h $vowel_below] > $zawgyi_consonant_nya_without_tail;
- # Nnya + lower diacritic loses right side of tail
- $consonant_nnya } $kinzi? $vowel_above? [$halant $placeholder_medial_consonant_r $placeholder_medial_consonant_y $placeholder_medial_consonant_w $placeholder_medial_consonant_h $vowel_below] > $zawgyi_consonant_nnya_without_tail;
- ::Null;
- # Move vowel u or uu after the base if medial r surrounds
- $placeholder_medial_consonant_r $cluster_base_vs $kinzi? $halant_cluster* $asat? $placeholder_medial_consonant_y? $placeholder_medial_consonant_w_cluster? $placeholder_medial_consonant_h_cluster? $vowel_above? $anusvara? ($dot_below_cluster)? { $vowel_u > $zawgyi_vowel_u_post;
- $placeholder_medial_consonant_r $cluster_base_vs $kinzi? $halant_cluster* $asat? $placeholder_medial_consonant_y? $placeholder_medial_consonant_w_cluster? $placeholder_medial_consonant_h_cluster? $vowel_above? $anusvara? ($dot_below_cluster)? { $vowel_uu > $zawgyi_vowel_uu_post;
- # Move vowel u or uu after the base if tall cluster base collides
- $cluster_base_tall $variation_selector? $kinzi? $halant_cluster* $asat? $placeholder_medial_consonant_y? $placeholder_medial_consonant_w_cluster? $placeholder_medial_consonant_h_cluster? $vowel_above? $anusvara? ($dot_below_cluster)? { $vowel_u > $zawgyi_vowel_u_post;
- $cluster_base_tall $variation_selector? $kinzi? $halant_cluster* $asat? $placeholder_medial_consonant_y? $placeholder_medial_consonant_w_cluster? $placeholder_medial_consonant_h_cluster? $vowel_above? $anusvara? ($dot_below_cluster)? { $vowel_uu > $zawgyi_vowel_uu_post;
- ::Null;
- # Wide ya yit: Wide base, nothing above or below
- $placeholder_medial_consonant_r } $cluster_base_wide [^ $kinzi_start $halant $placeholder_medial_consonant_y $placeholder_medial_consonant_w $placeholder_medial_consonant_h $vowel_above $vowel_below $anusvara $dot_below] > $zawgyi_medial_r_wide;
- # Narrow ya yit: Narrow base, nothing above or below
- $placeholder_medial_consonant_r } $cluster_base_narrow [^ $kinzi_start $halant $placeholder_medial_consonant_y $placeholder_medial_consonant_w $placeholder_medial_consonant_h $vowel_above $vowel_below $anusvara $dot_below] > $zawgyi_medial_r_narrow;
- # Wide ya yit with short top: Wide base, kinzi above, nothing below
- $placeholder_medial_consonant_r } $cluster_base_wide $kinzi_start [^ $halant $placeholder_medial_consonant_y $placeholder_medial_consonant_w $placeholder_medial_consonant_h $vowel_below $dot_below] > $zawgyi_medial_r_wide_short_top;
- # Wide ya yit with short top: Wide base, vowel or anusvara above, nothing below
- $placeholder_medial_consonant_r } $cluster_base_wide [$vowel_above $anusvara]+ [^$vowel_below $anusvara $dot_below] > $zawgyi_medial_r_wide_short_top;
- # Narrow ya yit with short top: Narrow base, kinzi above, nothing below
- $placeholder_medial_consonant_r } $cluster_base_narrow $kinzi_start [^ $halant $placeholder_medial_consonant_y $placeholder_medial_consonant_w $placeholder_medial_consonant_h $vowel_below $dot_below] > $zawgyi_medial_r_narrow_short_top;
- # Narrow ya yit with short top: Narrow base, vowel or anusvara above, nothing below
- $placeholder_medial_consonant_r } $cluster_base_narrow [$vowel_above $anusvara]+ [^$vowel_below $anusvara $dot_below] > $zawgyi_medial_r_narrow_short_top;
- # Narrow ya yit with short bottom: Narrow base, nothing above, medial w below
- $placeholder_medial_consonant_r } $cluster_base_narrow $placeholder_medial_consonant_w > $zawgyi_medial_r_narrow_short_bottom;
- # Wide ya yit with short bottom: Wide base, nothing above, medial w below
- $placeholder_medial_consonant_r } $cluster_base_wide $placeholder_medial_consonant_w > $zawgyi_medial_r_wide_short_bottom;
- # Na with dot below: dot moves to right
- $consonant_na $kinzi? $asat? $placeholder_medial_consonant_y? $placeholder_medial_consonant_w_cluster? $placeholder_medial_consonant_h_cluster? $vowel_above* $anusvara* $vowel_below? { $dot_below_cluster ($vowel_post_cluster?) > $1 $zawgyi_dot_below_right;
- # Consonant cluster with dot below: dot moves to right
- $halant_cluster+ $asat? $placeholder_medial_consonant_y? $placeholder_medial_consonant_w_cluster? $placeholder_medial_consonant_h_cluster? $vowel_above* $anusvara* $vowel_below? { $dot_below_cluster ($vowel_post_cluster?) > $1 $zawgyi_dot_below_right;
- # Anything else below with dot below: dot moves to right
- $placeholder_medial_consonant_y $placeholder_medial_consonant_w_cluster? $placeholder_medial_consonant_h_cluster? $vowel_above* $anusvara* $vowel_below? { $dot_below_cluster ($vowel_post_cluster?) > $1 $zawgyi_dot_below_right;
- $placeholder_medial_consonant_w_cluster $placeholder_medial_consonant_h_cluster? $vowel_above* $anusvara* $vowel_below? { $dot_below_cluster ($vowel_post_cluster?) > $1 $zawgyi_dot_below_right;
- $placeholder_medial_consonant_h_cluster $vowel_above* $anusvara* $vowel_below? { $dot_below_cluster ($vowel_post_cluster?) >
- $1 $zawgyi_dot_below_right;
- $vowel_below { $dot_below_cluster ($vowel_post_cluster?) > $1 $zawgyi_dot_below_right;
- # Post vowel with dot below: dot moves to right
- $vowel_post_prefix { $dot_below_cluster > $zawgyi_dot_below_right;
- ::Null;
- # Pre-defined ligatures
- \u103F > \u1086;
- \u104E\u1004\u103A\u1038 > \u104E;
- \u100B\u1039\u100B > \u1097;
- \u100B\u1039\u100C > \u1092;
- \u100F\u1039\u100D > \u1091;
- \u100D\u1039\u100E > \u106F;
- \u100D\u1039\u100D > \u106E;
- # Ha hto + u/uu ligatures
- $placeholder_medial_consonant_h $vowel_u > \u1088;
- # TODO bhamilton Is this really a thing??
- $placeholder_medial_consonant_h $vowel_uu > \u1089;
- # Stacked Consonants
- $halant \u101C > \u1085;
- $halant \u1019 > \u107C;
- # XXX TODO bhamilton which one?? U+107B and U+1093 look the same in zawgyi
- $halant \u1018 > \u1093;
- #$halant \u1018 > \u107B;
- $halant \u1017 > \u107A;
- $halant \u1016 > \u1079;
- $halant \u1015 > \u1078;
- $halant \u1014 > \u1077;
- $halant \u1013 > \u1076;
- $halant \u1012 > \u1075;
- # XXX TODO bhamilton U+1073 and U+1074 look the same, which to use?
- $halant \u1011 > \u1074;
- #$halant \u1011 > \u1073;
- $halant \u1010 \u103D > \u1096;
- # XXX TODO bhamilton U+1072 and U+1071 look the same
- $halant \u1010 > \u1072;
- #$halant \u1010 > \u1071;
- $halant \u100F > \u1070;
- $halant \u100C > \u106D;
- $halant \u100B > \u106C;
- $halant \u1008 > \u1069;
- $halant \u1007 > \u1068;
- # XXX TODO bhamilton U+1067 U+1066 look the same
- $halant \u1006 > \u1067;
- #$halant \u1006 > \u1066;
- $halant \u1005 > \u1065;
- $halant \u1003 > \u1063;
- $halant \u1002 > \u1062;
- $halant \u1001 > \u1061;
- $halant \u1000 > \u1060;
- # Special cases for 1025 vs 1009
- \u1009 \u1039 \u1016 > \u1025 \u1079;
- \u1009 \u1039 \u1017 > \u1025 \u107A;
- \u1009 \u1039 \u1015 > \u1025 \u1078;
- \u1009 \u1039 \u1013 > \u1025 \u1076;
- \u1009 \u1039 \u1007 > \u1025 \u1068;
- \u1009 \u1039 \u1005 > \u1025 \u1065;
- \u1009 \u1039 \u1002 > \u1025 \u1062;
- \u1009 \u1039 \u1001 > \u1025 \u1061;
- ::Null;
- # Zawgyi versions of each of these
- $asat > $zawgyi_asat;
- # Restore placeholders
- $placeholder_medial_consonant_r > $zawgyi_medial_r_narrow;
- $placeholder_medial_consonant_y > $zawgyi_medial_consonant_y;
- $placeholder_medial_consonant_w > $zawgyi_medial_consonant_w;
- $placeholder_medial_consonant_h > $zawgyi_medial_consonant_h;
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement