Advertisement
overloop

highlight_easy_japanese_text.js

Oct 14th, 2014
301
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. // http://www.rikai.com/library/kanjitables/kanji_codes.unicode.shtml
  2. var ranges = {hiragana:[0x3040,0x309f],katakana:[0x30a0,0x30ff],kanji:[0x4e00,0x9faf]};
  3.  
  4. function statRanges(text) {
  5.     var result = {hiragana:0,katakana:0,kanji:0};
  6.     for (var i=0;i<text.length;i++) {
  7.         var c = text.charCodeAt(i);
  8.         for (range in ranges) {
  9.             if (c >= ranges[range][0] && c<= ranges[range][1]) {
  10.                 result[range]++;
  11.             }
  12.         }
  13.     }
  14.     return result;
  15. }
  16.  
  17. function textToDict(text) {
  18.     var dict = [];
  19.     for (var i=0;i<text.length;i++) {
  20.         dict[text.charCodeAt(i)] = 1;
  21.     }
  22.     return dict;
  23. }  
  24.  
  25. function statKnown(text,dict) {
  26.     var known = 0;
  27.     var unknown = 0;
  28.     for (var i=0;i<text.length;i++) {
  29.         var c = text.charCodeAt(i);
  30.         // count kanji characters only
  31.         if (c >= ranges['kanji'][0] && c <= ranges['kanji'][1]) {
  32.             if (c in dict) {
  33.                 known++;
  34.             } else {
  35.                 unknown++;
  36.             }
  37.         }
  38.     }
  39.     return [known,unknown];
  40. }
  41.  
  42. /* http://en.wikipedia.org/wiki/Ky%C5%8Diku_kanji
  43.  * grades 1-3
  44.  */
  45. var knownKanji_ = '一二三四五六七八九十百千上下左右中大小月日年早木林山川土空田天生花草虫犬人名女男子目耳口手足見音力気円入出立休先夕本文字学校村町森正水火玉王石竹糸貝車金雨赤青白数多少万半形太細広長点丸交光角計直線矢弱強高同親母父姉兄弟妹自友体毛頭顔首心時曜朝昼夜分週春夏秋冬今新古間方北南東西遠近前後内外場地国園谷野原里市京風雪雲池海岩星室戸家寺通門道話言答声聞語読書記紙画絵図工教晴思考知才理算作元食肉馬牛魚鳥羽鳴麦米茶色黄黒来行帰歩走止活店買売午汽弓回会組船明社切電毎合当台楽公引科歌刀番用何丁世両主乗予事仕他代住使係倍全具写列助勉動勝化区医去反取受号向君味命和品員商問坂央始委守安定実客宮宿寒対局屋岸島州帳平幸度庫庭式役待急息悪悲想意感所打投拾持指放整旅族昔昭暑暗曲有服期板柱根植業様横橋次歯死氷決油波注泳洋流消深温港湖湯漢炭物球由申界畑病発登皮皿相県真着短研礼神祭福秒究章童笛第筆等箱級終緑練羊美習者育苦荷落葉薬血表詩調談豆負起路身転軽農返追送速進遊運部都配酒重鉄銀開院陽階集面題飲館駅鼻';
  46. var knownKanji = textToDict(knownKanji_);
  47. var minTextLength = 200;
  48. var minRatio = 0.65;
  49. $('p').each(function(i,item){
  50.     var text = $(item).text();
  51.     if (text.length>minTextLength) {
  52.         var stat = statKnown(text,knownKanji);
  53.         //console.log(stat);
  54.         if (stat[0]/(stat[0]+stat[1])>minRatio) {
  55.             $(item).css('background-color','#FFF266');
  56.         }
  57.     }
  58. });
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement