cullmann.io/themes/CodeIT/assets/lib/lunr/lunr.TinySegmenter.js

1531 lines
32 KiB
JavaScript
Raw Normal View History

2021-02-18 22:21:36 +01:00
/**
* export the module via AMD, CommonJS or as a browser global
* Export code from https://github.com/umdjs/umd/blob/master/returnExports.js
*/
(function (root, factory) {
if (typeof define === "function" && define.amd) {
// AMD. Register as an anonymous module.
define(factory);
} else if (typeof exports === "object") {
/**
* Node. Does not work with strict CommonJS, but
* only CommonJS-like environments that support module.exports,
* like Node.
*/
module.exports = factory();
} else {
// Browser globals (root is window)
factory()(root.lunr);
}
})(this, function () {
/**
* Just return a value to define the module export.
* This example returns an object, but the module
* can return a function as the exported value.
*/
return function (lunr) {
// TinySegmenter 0.1 -- Super compact Japanese tokenizer in Javascript
// (c) 2008 Taku Kudo <taku@chasen.org>
// TinySegmenter is freely distributable under the terms of a new BSD licence.
// For details, see http://chasen.org/~taku/software/TinySegmenter/LICENCE.txt
function TinySegmenter() {
var patterns = {
"[一二三四五六七八九十百千万億兆]": "M",
"[一-龠々〆ヵヶ]": "H",
"[ぁ-ん]": "I",
"[ァ-ヴーア-ン゙ー]": "K",
"[a-zA-Z--]": "A",
"[0-9-]": "N",
};
this.chartype_ = [];
for (var i in patterns) {
var regexp = new RegExp(i);
this.chartype_.push([regexp, patterns[i]]);
}
this.BIAS__ = -332;
this.BC1__ = { HH: 6, II: 2461, KH: 406, OH: -1378 };
this.BC2__ = {
AA: -3267,
AI: 2744,
AN: -878,
HH: -4070,
HM: -1711,
HN: 4012,
HO: 3761,
IA: 1327,
IH: -1184,
II: -1332,
IK: 1721,
IO: 5492,
KI: 3831,
KK: -8741,
MH: -3132,
MK: 3334,
OO: -2920,
};
this.BC3__ = {
HH: 996,
HI: 626,
HK: -721,
HN: -1307,
HO: -836,
IH: -301,
KK: 2762,
MK: 1079,
MM: 4034,
OA: -1652,
OH: 266,
};
this.BP1__ = { BB: 295, OB: 304, OO: -125, UB: 352 };
this.BP2__ = { BO: 60, OO: -1762 };
this.BQ1__ = {
BHH: 1150,
BHM: 1521,
BII: -1158,
BIM: 886,
BMH: 1208,
BNH: 449,
BOH: -91,
BOO: -2597,
OHI: 451,
OIH: -296,
OKA: 1851,
OKH: -1020,
OKK: 904,
OOO: 2965,
};
this.BQ2__ = {
BHH: 118,
BHI: -1159,
BHM: 466,
BIH: -919,
BKK: -1720,
BKO: 864,
OHH: -1139,
OHM: -181,
OIH: 153,
UHI: -1146,
};
this.BQ3__ = {
BHH: -792,
BHI: 2664,
BII: -299,
BKI: 419,
BMH: 937,
BMM: 8335,
BNN: 998,
BOH: 775,
OHH: 2174,
OHM: 439,
OII: 280,
OKH: 1798,
OKI: -793,
OKO: -2242,
OMH: -2402,
OOO: 11699,
};
this.BQ4__ = {
BHH: -3895,
BIH: 3761,
BII: -4654,
BIK: 1348,
BKK: -1806,
BMI: -3385,
BOO: -12396,
OAH: 926,
OHH: 266,
OHK: -2036,
ONN: -973,
};
this.BW1__ = {
",と": 660,
",同": 727,
B1あ: 1404,
B1同: 542,
"、と": 660,
"、同": 727,
"」と": 1682,
あっ: 1505,
いう: 1743,
いっ: -2055,
いる: 672,
うし: -4817,
うん: 665,
から: 3472,
がら: 600,
こう: -790,
こと: 2083,
こん: -1262,
さら: -4143,
さん: 4573,
した: 2641,
して: 1104,
すで: -3399,
そこ: 1977,
それ: -871,
たち: 1122,
ため: 601,
った: 3463,
つい: -802,
てい: 805,
てき: 1249,
でき: 1127,
です: 3445,
では: 844,
とい: -4915,
とみ: 1922,
どこ: 3887,
ない: 5713,
なっ: 3015,
など: 7379,
なん: -1113,
にし: 2468,
には: 1498,
にも: 1671,
に対: -912,
の一: -501,
の中: 741,
ませ: 2448,
まで: 1711,
まま: 2600,
まる: -2155,
やむ: -1947,
よっ: -2565,
れた: 2369,
れで: -913,
をし: 1860,
を見: 731,
亡く: -1886,
京都: 2558,
取り: -2784,
大き: -2604,
大阪: 1497,
平方: -2314,
引き: -1336,
日本: -195,
本当: -2423,
毎日: -2113,
目指: -724,
B1あ: 1404,
B1同: 542,
"」と": 1682,
};
this.BW2__ = {
"..": -11822,
11: -669,
"――": -5730,
"": -13175,
いう: -1609,
うか: 2490,
かし: -1350,
かも: -602,
から: -7194,
かれ: 4612,
がい: 853,
がら: -3198,
きた: 1941,
くな: -1597,
こと: -8392,
この: -4193,
させ: 4533,
され: 13168,
さん: -3977,
しい: -1819,
しか: -545,
した: 5078,
して: 972,
しな: 939,
その: -3744,
たい: -1253,
たた: -662,
ただ: -3857,
たち: -786,
たと: 1224,
たは: -939,
った: 4589,
って: 1647,
っと: -2094,
てい: 6144,
てき: 3640,
てく: 2551,
ては: -3110,
ても: -3065,
でい: 2666,
でき: -1528,
でし: -3828,
です: -4761,
でも: -4203,
とい: 1890,
とこ: -1746,
とと: -2279,
との: 720,
とみ: 5168,
とも: -3941,
ない: -2488,
なが: -1313,
など: -6509,
なの: 2614,
なん: 3099,
にお: -1615,
にし: 2748,
にな: 2454,
によ: -7236,
に対: -14943,
に従: -4688,
に関: -11388,
のか: 2093,
ので: -7059,
のに: -6041,
のの: -6125,
はい: 1073,
はが: -1033,
はず: -2532,
ばれ: 1813,
まし: -1316,
まで: -6621,
まれ: 5409,
めて: -3153,
もい: 2230,
もの: -10713,
らか: -944,
らし: -1611,
らに: -1897,
りし: 651,
りま: 1620,
れた: 4270,
れて: 849,
れば: 4114,
ろう: 6067,
われ: 7901,
を通: -11877,
んだ: 728,
んな: -4115,
一人: 602,
一方: -1375,
一日: 970,
一部: -1051,
上が: -4479,
会社: -1116,
出て: 2163,
分の: -7758,
同党: 970,
同日: -913,
大阪: -2471,
委員: -1250,
少な: -1050,
年度: -8669,
年間: -1626,
府県: -2363,
手権: -1982,
新聞: -4066,
日新: -722,
日本: -7068,
日米: 3372,
曜日: -601,
朝鮮: -2355,
本人: -2697,
東京: -1543,
然と: -1384,
社会: -1276,
立て: -990,
第に: -1612,
米国: -4268,
"": -669,
};
this.BW3__ = {
あた: -2194,
あり: 719,
ある: 3846,
"い.": -1185,
"い。": -1185,
いい: 5308,
いえ: 2079,
いく: 3029,
いた: 2056,
いっ: 1883,
いる: 5600,
いわ: 1527,
うち: 1117,
うと: 4798,
えと: 1454,
"か.": 2857,
"か。": 2857,
かけ: -743,
かっ: -4098,
かに: -669,
から: 6520,
かり: -2670,
"が,": 1816,
"が、": 1816,
がき: -4855,
がけ: -1127,
がっ: -913,
がら: -4977,
がり: -2064,
きた: 1645,
けど: 1374,
こと: 7397,
この: 1542,
ころ: -2757,
さい: -714,
さを: 976,
"し,": 1557,
"し、": 1557,
しい: -3714,
した: 3562,
して: 1449,
しな: 2608,
しま: 1200,
"す.": -1310,
"す。": -1310,
する: 6521,
"ず,": 3426,
"ず、": 3426,
ずに: 841,
そう: 428,
"た.": 8875,
"た。": 8875,
たい: -594,
たの: 812,
たり: -1183,
たる: -853,
"だ.": 4098,
"だ。": 4098,
だっ: 1004,
った: -4748,
って: 300,
てい: 6240,
てお: 855,
ても: 302,
です: 1437,
でに: -1482,
では: 2295,
とう: -1387,
とし: 2266,
との: 541,
とも: -3543,
どう: 4664,
ない: 1796,
なく: -903,
など: 2135,
"に,": -1021,
"に、": -1021,
にし: 1771,
にな: 1906,
には: 2644,
"の,": -724,
"の、": -724,
の子: -1000,
"は,": 1337,
"は、": 1337,
べき: 2181,
まし: 1113,
ます: 6943,
まっ: -1549,
まで: 6154,
まれ: -793,
らし: 1479,
られ: 6820,
るる: 3818,
"れ,": 854,
"れ、": 854,
れた: 1850,
れて: 1375,
れば: -3246,
れる: 1091,
われ: -605,
んだ: 606,
んで: 798,
カ月: 990,
会議: 860,
入り: 1232,
大会: 2217,
始め: 1681,
: 965,
新聞: -5055,
"日,": 974,
"日、": 974,
社会: 2024,
カ月: 990,
};
this.TC1__ = {
AAA: 1093,
HHH: 1029,
HHM: 580,
HII: 998,
HOH: -390,
HOM: -331,
IHI: 1169,
IOH: -142,
IOI: -1015,
IOM: 467,
MMH: 187,
OOI: -1832,
};
this.TC2__ = {
HHO: 2088,
HII: -1023,
HMM: -1154,
IHI: -1965,
KKH: 703,
OII: -2649,
};
this.TC3__ = {
AAA: -294,
HHH: 346,
HHI: -341,
HII: -1088,
HIK: 731,
HOH: -1486,
IHH: 128,
IHI: -3041,
IHO: -1935,
IIH: -825,
IIM: -1035,
IOI: -542,
KHH: -1216,
KKA: 491,
KKH: -1217,
KOK: -1009,
MHH: -2694,
MHM: -457,
MHO: 123,
MMH: -471,
NNH: -1689,
NNO: 662,
OHO: -3393,
};
this.TC4__ = {
HHH: -203,
HHI: 1344,
HHK: 365,
HHM: -122,
HHN: 182,
HHO: 669,
HIH: 804,
HII: 679,
HOH: 446,
IHH: 695,
IHO: -2324,
IIH: 321,
III: 1497,
IIO: 656,
IOO: 54,
KAK: 4845,
KKA: 3386,
KKK: 3065,
MHH: -405,
MHI: 201,
MMH: -241,
MMM: 661,
MOM: 841,
};
this.TQ1__ = {
BHHH: -227,
BHHI: 316,
BHIH: -132,
BIHH: 60,
BIII: 1595,
BNHH: -744,
BOHH: 225,
BOOO: -908,
OAKK: 482,
OHHH: 281,
OHIH: 249,
OIHI: 200,
OIIH: -68,
};
this.TQ2__ = { BIHH: -1401, BIII: -1033, BKAK: -543, BOOO: -5591 };
this.TQ3__ = {
BHHH: 478,
BHHM: -1073,
BHIH: 222,
BHII: -504,
BIIH: -116,
BIII: -105,
BMHI: -863,
BMHM: -464,
BOMH: 620,
OHHH: 346,
OHHI: 1729,
OHII: 997,
OHMH: 481,
OIHH: 623,
OIIH: 1344,
OKAK: 2792,
OKHH: 587,
OKKA: 679,
OOHH: 110,
OOII: -685,
};
this.TQ4__ = {
BHHH: -721,
BHHM: -3604,
BHII: -966,
BIIH: -607,
BIII: -2181,
OAAA: -2763,
OAKK: 180,
OHHH: -294,
OHHI: 2446,
OHHO: 480,
OHIH: -1573,
OIHH: 1935,
OIHI: -493,
OIIH: 626,
OIII: -4007,
OKAK: -8156,
};
this.TW1__ = { につい: -4681, 東京都: 2026 };
this.TW2__ = {
ある程: -2049,
いった: -1256,
ころが: -2434,
しょう: 3873,
その後: -4430,
だって: -1049,
ていた: 1833,
として: -4657,
ともに: -4517,
もので: 1882,
一気に: -792,
初めて: -1512,
同時に: -8097,
大きな: -1255,
対して: -2721,
社会党: -3216,
};
this.TW3__ = {
いただ: -1734,
してい: 1314,
として: -4314,
につい: -5483,
にとっ: -5989,
に当た: -6247,
"ので,": -727,
"ので、": -727,
のもの: -600,
れから: -3752,
十二月: -2287,
};
this.TW4__ = {
"いう.": 8576,
"いう。": 8576,
からな: -2348,
してい: 2958,
"たが,": 1516,
"たが、": 1516,
ている: 1538,
という: 1349,
ました: 5543,
ません: 1097,
ようと: -4258,
よると: 5865,
};
this.UC1__ = { A: 484, K: 93, M: 645, O: -505 };
this.UC2__ = { A: 819, H: 1059, I: 409, M: 3987, N: 5775, O: 646 };
this.UC3__ = { A: -1370, I: 2311 };
this.UC4__ = {
A: -2643,
H: 1809,
I: -1032,
K: -3450,
M: 3565,
N: 3876,
O: 6646,
};
this.UC5__ = { H: 313, I: -1238, K: -799, M: 539, O: -831 };
this.UC6__ = { H: -506, I: -253, K: 87, M: 247, O: -387 };
this.UP1__ = { O: -214 };
this.UP2__ = { B: 69, O: 935 };
this.UP3__ = { B: 189 };
this.UQ1__ = {
BH: 21,
BI: -12,
BK: -99,
BN: 142,
BO: -56,
OH: -95,
OI: 477,
OK: 410,
OO: -2422,
};
this.UQ2__ = { BH: 216, BI: 113, OK: 1759 };
this.UQ3__ = {
BA: -479,
BH: 42,
BI: 1913,
BK: -7198,
BM: 3160,
BN: 6427,
BO: 14761,
OI: -827,
ON: -3212,
};
this.UW1__ = {
",": 156,
"、": 156,
"「": -463,
: -941,
: -127,
: -553,
: 121,
: 505,
: -201,
: -547,
: -123,
: -789,
: -185,
: -847,
: -466,
: -470,
: 182,
: -292,
: 208,
: 169,
: -446,
: -137,
"・": -135,
: -402,
: -268,
: -912,
: 871,
: -460,
: 561,
: 729,
: -411,
: -141,
: 361,
: -408,
: -386,
: -718,
"「": -463,
"・": -135,
};
this.UW2__ = {
",": -829,
"、": -829,
: 892,
"「": -645,
"」": 3145,
: -538,
: 505,
: 134,
: -502,
: 1454,
: -856,
: -412,
: 1141,
: 878,
: 540,
: 1529,
: -675,
: 300,
: -1011,
: 188,
: 1837,
: -949,
: -291,
: -268,
: -981,
: 1273,
: 1063,
: -1764,
: 130,
: -409,
: -1273,
: 1261,
: 600,
: -1263,
: -402,
: 1639,
: -579,
: -694,
: 571,
: -2516,
: 2095,
: -587,
: 306,
: 568,
: 831,
: -758,
: -2150,
: -302,
: -968,
: -861,
: 492,
: -123,
: 978,
: 362,
: 548,
: -3025,
: -1566,
: -3414,
: -422,
: -1769,
: -865,
: -483,
: -1519,
: 760,
: 1023,
: -2009,
: -813,
: -1060,
: 1067,
: -1519,
: -1033,
: 1522,
: -1355,
: -1682,
: -1815,
: -1462,
: -630,
: -1843,
: -1650,
: -931,
: -665,
: -2378,
: -180,
: -1740,
: 752,
: 529,
: -1584,
: -242,
: -1165,
: -763,
: 810,
: 509,
: -1353,
: 838,
西: -744,
: -3874,
調: 1010,
: 1198,
: 3041,
: 1758,
: -1257,
"「": -645,
"」": 3145,
: 831,
: -587,
: 306,
: 568,
};
this.UW3__ = {
",": 4889,
1: -800,
"": -1723,
"、": 4889,
: -2311,
: 5827,
"」": 2670,
"〓": -3573,
: -2696,
: 1006,
: 2342,
: 1983,
: -4864,
: -1163,
: 3271,
: 1004,
: 388,
: 401,
: -3552,
: -3116,
: -1058,
: -395,
: 584,
: 3685,
: -5228,
: 842,
: -521,
: -1444,
: -1081,
: 6167,
: 2318,
: 1691,
: -899,
: -2788,
: 2745,
: 4056,
: 4555,
: -2171,
: -1798,
: 1199,
: -5516,
: -4384,
: -120,
: 1205,
: 2323,
: -788,
: -202,
: 727,
: 649,
: 5905,
: 2773,
: -1207,
: 6620,
: -518,
: 551,
: 1319,
: 874,
: -1350,
: 521,
: 1109,
: 1591,
: 2201,
: 278,
"・": -3794,
: -1619,
: -1759,
: -2087,
: 3815,
: 653,
: -758,
: -1193,
: 974,
: 2742,
: 792,
: 1889,
: -1368,
: 811,
: 4265,
: -361,
: -2439,
: 4858,
: 3593,
: 1574,
: -3030,
: 755,
: -1880,
: 5807,
: 3095,
: 457,
: 2475,
: 1129,
: 2286,
: 4437,
: 365,
: -949,
: -1872,
: 1327,
: -1038,
: 4646,
: -2309,
: -783,
: -1006,
: 483,
: 1233,
: 3588,
: -241,
: 3906,
: -837,
: 4513,
: 642,
: 1389,
: 1219,
: -241,
: 2016,
: -1356,
: -423,
: -1008,
: 1078,
: -513,
: -3102,
: 1155,
: 3197,
: -1804,
: 2416,
: -1030,
: 1605,
: 1452,
: -2352,
: -3885,
: 1905,
: -1291,
: 1822,
: -488,
: -3973,
: -2013,
: -1479,
: 3222,
: -1489,
: 1764,
: 2099,
: 5792,
: -661,
: -1248,
: -951,
: -937,
: 4125,
: 360,
: 3094,
: 364,
: -805,
: 5156,
: 2438,
: 484,
: 2613,
: -1694,
: -1073,
: 1868,
: -495,
: 979,
: 461,
: -3850,
: -273,
: 914,
: 1215,
: 7313,
: -1835,
: 792,
: 6293,
: -1528,
: 4231,
: 401,
: -960,
: 1201,
: 7767,
: 3066,
: 3663,
: 1384,
: -4229,
: 1163,
: 1255,
: 6457,
: 725,
: -2869,
: 785,
: 1044,
調: -562,
: -733,
: 1777,
: 1835,
: 1375,
: -1504,
: -1136,
: -681,
: 1026,
: 4404,
: 1200,
: 2163,
: 421,
: -1432,
: 1302,
: -1282,
: 2009,
: -1045,
: 2066,
: 1620,
"": -800,
"」": 2670,
"・": -3794,
: -1350,
: 551,
グ: 1319,
: 874,
: 521,
: 1109,
: 1591,
: 2201,
: 278,
};
this.UW4__ = {
",": 3930,
".": 3508,
"―": -4841,
"、": 3930,
"。": 3508,
: 4999,
"「": 1895,
"」": 3798,
"〓": -5156,
: 4752,
: -3435,
: -640,
: -2514,
: 2405,
: 530,
: 6006,
: -4482,
: -3821,
: -3788,
: -4376,
: -4734,
: 2255,
: 1979,
: 2864,
: -843,
: -2506,
: -731,
: 1251,
: 181,
: 4091,
: 5034,
: 5408,
: -3654,
: -5882,
: -1659,
: 3994,
: 7410,
: 4547,
: 5433,
: 6499,
: 1853,
: 1413,
: 7396,
: 8578,
: 1940,
: 4249,
: -4134,
: 1345,
: 6665,
: -744,
: 1464,
: 1051,
: -2082,
: -882,
: -5046,
: 4169,
: -2666,
: 2795,
: -1544,
: 3351,
: -2922,
: -9726,
: -14896,
: -2613,
: -4570,
: -1783,
: 13150,
: -2352,
: 2145,
: 1789,
: 1287,
: -724,
: -403,
: -1635,
: -881,
: -541,
: -856,
: -3637,
"・": -4371,
: -11870,
: -2069,
: 2210,
: 782,
: -190,
: -1768,
: 1036,
: 544,
: 950,
: -1286,
: 530,
: 4292,
: 601,
: -2006,
: -1212,
: 584,
: 788,
: 1347,
: 1623,
: 3879,
: -302,
: -740,
: -2715,
: 776,
: 4517,
: 1013,
: 1555,
: -1834,
: -681,
: -910,
: -851,
: 1500,
: -619,
: -1200,
: 866,
: -1410,
: -2094,
: -1413,
: 1067,
: 571,
: -4802,
: -1397,
: -1057,
: -809,
: 1910,
: -1328,
: -1500,
: -2056,
: -2667,
: 2771,
: 374,
: -4556,
: 456,
: 553,
: 916,
: -1566,
: 856,
: 787,
: 2182,
: 704,
: 522,
: -856,
: 1798,
: 1829,
: 845,
: -9066,
: -485,
: -442,
: -360,
: -1043,
: 5388,
: -2716,
: -910,
: -939,
: -543,
: -735,
: 672,
: -1267,
: -1286,
: -1101,
: -2900,
: 1826,
: 2586,
: 922,
: -3485,
: 2997,
: -867,
: -2112,
: 788,
: 2937,
: 786,
: 2171,
: 1146,
: -1169,
: 940,
: -994,
: 749,
: 2145,
: -730,
: -852,
: -792,
: 792,
: -1184,
: -244,
: -1000,
: 730,
: -1481,
: 1158,
: -1433,
: -3370,
: 929,
: -1291,
: 2596,
: -4866,
: 1192,
: -1100,
: -2213,
: 357,
: -2344,
: -2297,
: -2604,
: -878,
: -1659,
: -792,
: -1984,
: 1749,
: 2120,
"「": 1895,
"」": 3798,
"・": -4371,
: -724,
: -11870,
: 2145,
: 1789,
: 1287,
: -403,
: -1635,
: -881,
: -541,
: -856,
: -3637,
};
this.UW5__ = {
",": 465,
".": -299,
1: -514,
E2: -32768,
"]": -2762,
"、": 465,
"。": -299,
"「": 363,
: 1655,
: 331,
: -503,
: 1199,
: 527,
: 647,
: -421,
: 1624,
: 1971,
: 312,
: -983,
: -1537,
: -1371,
: -852,
: -1186,
: 1093,
: 52,
: 921,
: -18,
: -850,
: -127,
: 1682,
: -787,
: -1224,
: -635,
: -578,
: 1001,
: 502,
: 865,
: 3350,
: 854,
: -208,
: 429,
: 504,
: 419,
: -1264,
: 327,
: 241,
: 451,
: -343,
: -871,
: 722,
: -1153,
: -654,
: 3519,
: -901,
: 848,
: 2104,
: -1296,
: -548,
: 1785,
: -1304,
: -2991,
: 921,
: 1763,
: 872,
: -814,
: 1618,
: -1682,
: 218,
: -4353,
: 932,
: 1356,
: -1508,
: -1347,
: 240,
: -3912,
: -3149,
: 1319,
: -1052,
: -4003,
: -997,
: -278,
: -813,
: 1955,
: -2233,
: 663,
: -1073,
: 1219,
: -1018,
: -368,
: 786,
: 1191,
: 2368,
: -689,
"": -514,
: -32768,
"「": 363,
: 241,
: 451,
: -343,
};
this.UW6__ = {
",": 227,
".": 808,
1: -270,
E1: 306,
"、": 227,
"。": 808,
: -307,
: 189,
: 241,
: -73,
: -121,
: -200,
: 1782,
: 383,
: -428,
: 573,
: -1014,
: 101,
: -105,
: -253,
: -149,
: -417,
: -236,
: -206,
: 187,
: -135,
: 195,
: -673,
: -496,
: -277,
: 201,
: -800,
: 624,
: 302,
: 1792,
: -1212,
: 798,
: -960,
: 887,
: -695,
: 535,
: -697,
: 753,
: -507,
: 974,
: -822,
: 1811,
: 463,
: 1082,
"": -270,
: 306,
: -673,
: -496,
};
return this;
}
TinySegmenter.prototype.ctype_ = function (str) {
for (var i in this.chartype_) {
if (str.match(this.chartype_[i][0])) {
return this.chartype_[i][1];
}
}
return "O";
};
TinySegmenter.prototype.ts_ = function (v) {
if (v) {
return v;
}
return 0;
};
TinySegmenter.prototype.segment = function (input) {
if (input == null || input == undefined || input == "") {
return [];
}
var result = [];
var seg = ["B3", "B2", "B1"];
var ctype = ["O", "O", "O"];
var o = input.split("");
for (i = 0; i < o.length; ++i) {
seg.push(o[i]);
ctype.push(this.ctype_(o[i]));
}
seg.push("E1");
seg.push("E2");
seg.push("E3");
ctype.push("O");
ctype.push("O");
ctype.push("O");
var word = seg[3];
var p1 = "U";
var p2 = "U";
var p3 = "U";
for (var i = 4; i < seg.length - 3; ++i) {
var score = this.BIAS__;
var w1 = seg[i - 3];
var w2 = seg[i - 2];
var w3 = seg[i - 1];
var w4 = seg[i];
var w5 = seg[i + 1];
var w6 = seg[i + 2];
var c1 = ctype[i - 3];
var c2 = ctype[i - 2];
var c3 = ctype[i - 1];
var c4 = ctype[i];
var c5 = ctype[i + 1];
var c6 = ctype[i + 2];
score += this.ts_(this.UP1__[p1]);
score += this.ts_(this.UP2__[p2]);
score += this.ts_(this.UP3__[p3]);
score += this.ts_(this.BP1__[p1 + p2]);
score += this.ts_(this.BP2__[p2 + p3]);
score += this.ts_(this.UW1__[w1]);
score += this.ts_(this.UW2__[w2]);
score += this.ts_(this.UW3__[w3]);
score += this.ts_(this.UW4__[w4]);
score += this.ts_(this.UW5__[w5]);
score += this.ts_(this.UW6__[w6]);
score += this.ts_(this.BW1__[w2 + w3]);
score += this.ts_(this.BW2__[w3 + w4]);
score += this.ts_(this.BW3__[w4 + w5]);
score += this.ts_(this.TW1__[w1 + w2 + w3]);
score += this.ts_(this.TW2__[w2 + w3 + w4]);
score += this.ts_(this.TW3__[w3 + w4 + w5]);
score += this.ts_(this.TW4__[w4 + w5 + w6]);
score += this.ts_(this.UC1__[c1]);
score += this.ts_(this.UC2__[c2]);
score += this.ts_(this.UC3__[c3]);
score += this.ts_(this.UC4__[c4]);
score += this.ts_(this.UC5__[c5]);
score += this.ts_(this.UC6__[c6]);
score += this.ts_(this.BC1__[c2 + c3]);
score += this.ts_(this.BC2__[c3 + c4]);
score += this.ts_(this.BC3__[c4 + c5]);
score += this.ts_(this.TC1__[c1 + c2 + c3]);
score += this.ts_(this.TC2__[c2 + c3 + c4]);
score += this.ts_(this.TC3__[c3 + c4 + c5]);
score += this.ts_(this.TC4__[c4 + c5 + c6]);
// score += this.ts_(this.TC5__[c4 + c5 + c6]);
score += this.ts_(this.UQ1__[p1 + c1]);
score += this.ts_(this.UQ2__[p2 + c2]);
score += this.ts_(this.UQ3__[p3 + c3]);
score += this.ts_(this.BQ1__[p2 + c2 + c3]);
score += this.ts_(this.BQ2__[p2 + c3 + c4]);
score += this.ts_(this.BQ3__[p3 + c2 + c3]);
score += this.ts_(this.BQ4__[p3 + c3 + c4]);
score += this.ts_(this.TQ1__[p2 + c1 + c2 + c3]);
score += this.ts_(this.TQ2__[p2 + c2 + c3 + c4]);
score += this.ts_(this.TQ3__[p3 + c1 + c2 + c3]);
score += this.ts_(this.TQ4__[p3 + c2 + c3 + c4]);
var p = "O";
if (score > 0) {
result.push(word);
word = "";
p = "B";
}
p1 = p2;
p2 = p3;
p3 = p;
word += seg[i];
}
result.push(word);
return result;
};
lunr.TinySegmenter = TinySegmenter;
};
});