sr-test-clip / vocab.json
jamescalam's picture
Upload 9 files
01c40f5 verified
{
"!": 2,
"!</w>": 277,
"\"": 3,
"\"</w>": 281,
"#": 4,
"#</w>": 288,
"$": 5,
"$</w>": 274,
"%": 6,
"%</w>": 255,
"&": 7,
"&</w>": 258,
"'": 8,
"'</w>": 223,
"'s</w>": 363,
"(": 9,
"(</w>": 289,
")": 10,
")</w>": 279,
"*": 11,
"*</w>": 295,
"+": 12,
"+</w>": 294,
",": 13,
",</w>": 276,
",@</w>": 622,
"-": 14,
"-</w>": 247,
"-@</w>": 353,
".": 15,
".</w>": 236,
".@</w>": 569,
"/": 16,
"/</w>": 272,
"0": 17,
"0</w>": 285,
"1": 18,
"1</w>": 293,
"2": 19,
"2</w>": 286,
"3": 20,
"3</w>": 291,
"4": 21,
"4</w>": 284,
"5": 22,
"5</w>": 275,
"6": 23,
"6</w>": 290,
"7": 24,
"7</w>": 280,
"8": 25,
"8</w>": 240,
"9": 26,
"9</w>": 218,
":": 27,
":</w>": 292,
";": 28,
";</w>": 228,
"<": 29,
"<</w>": 265,
"<|endoftext|>": 1,
"<|startoftext|>": 0,
"=": 30,
"=</w>": 264,
">": 31,
"></w>": 273,
"?": 32,
"?</w>": 296,
"@": 33,
"@,@</w>": 623,
"@-@</w>": 354,
"@.@</w>": 570,
"@</w>": 231,
"[": 34,
"[</w>": 283,
"\\": 35,
"\\</w>": 297,
"]": 36,
"]</w>": 278,
"^": 37,
"^</w>": 282,
"_": 38,
"_</w>": 269,
"`": 39,
"`</w>": 267,
"a": 40,
"a</w>": 184,
"ab": 408,
"able</w>": 675,
"about</w>": 721,
"ac": 326,
"ace</w>": 659,
"ach": 678,
"ach</w>": 736,
"ack": 949,
"ack</w>": 551,
"ad": 407,
"ad</w>": 918,
"af": 455,
"after</w>": 532,
"ag": 397,
"aga": 730,
"against</w>": 928,
"age</w>": 566,
"air": 925,
"al": 318,
"al</w>": 330,
"albu": 813,
"album</w>": 874,
"ale</w>": 923,
"ali": 527,
"all": 762,
"all</w>": 449,
"ally</w>": 476,
"als</w>": 660,
"also</w>": 528,
"although</w>": 957,
"am": 333,
"ame</w>": 450,
"amer": 773,
"americ": 805,
"ames</w>": 906,
"ami": 968,
"an": 301,
"an</w>": 332,
"ance</w>": 689,
"and": 793,
"and</w>": 311,
"ang": 735,
"ani": 854,
"ann": 832,
"ans</w>": 761,
"ant": 1005,
"ant</w>": 663,
"any</w>": 767,
"ap": 421,
"app": 714,
"ar": 305,
"ard</w>": 543,
"are": 802,
"are</w>": 454,
"ari": 726,
"arm": 817,
"ary</w>": 518,
"as": 412,
"as</w>": 321,
"at": 322,
"at</w>": 328,
"ate</w>": 439,
"ated</w>": 441,
"ater</w>": 575,
"ates</w>": 684,
"ating</w>": 778,
"ation</w>": 457,
"ations</w>": 948,
"att": 512,
"au": 448,
"ause</w>": 997,
"b": 41,
"b</w>": 199,
"ba": 687,
"back</w>": 992,
"bar": 821,
"bas": 855,
"batt": 897,
"be": 361,
"be</w>": 467,
"bec": 558,
"became</w>": 935,
"because</w>": 1004,
"bed</w>": 1003,
"been</w>": 562,
"before</w>": 815,
"began</w>": 995,
"being</w>": 844,
"bel": 788,
"ber": 664,
"ber</w>": 483,
"betw": 740,
"between</w>": 742,
"bi": 628,
"bil": 1000,
"bl": 797,
"ble</w>": 709,
"bli": 700,
"bo": 429,
"bor": 880,
"both</w>": 846,
"bri": 588,
"bro": 758,
"bu": 388,
"buil": 814,
"but</w>": 499,
"by</w>": 369,
"c": 42,
"c</w>": 178,
"ca": 394,
"cal": 820,
"called</w>": 1014,
"cam": 932,
"can": 590,
"can</w>": 790,
"cap": 753,
"car": 644,
"cation</w>": 1001,
"ce</w>": 378,
"ced</w>": 899,
"cent": 605,
"century</w>": 964,
"cer": 827,
"ces": 776,
"ces</w>": 651,
"ch": 345,
"ch</w>": 374,
"cha": 969,
"char": 573,
"charac": 915,
"chi": 752,
"chur": 983,
"ci": 451,
"city</w>": 784,
"cla": 717,
"clu": 882,
"co": 524,
"col": 582,
"com": 383,
"comm": 549,
"comp": 557,
"comple": 989,
"con": 364,
"consi": 900,
"cont": 560,
"contin": 958,
"continu": 1009,
"cor": 706,
"coun": 656,
"cra": 1023,
"cre": 655,
"cri": 756,
"cro": 862,
"cu": 809,
"cul": 881,
"cur": 990,
"d": 43,
"d</w>": 167,
"da": 497,
"day</w>": 755,
"de": 352,
"de</w>": 415,
"dec": 604,
"ded</w>": 447,
"del": 988,
"dent</w>": 1002,
"der": 487,
"der</w>": 526,
"des</w>": 822,
"descri": 922,
"desig": 1017,
"devel": 908,
"develop": 956,
"di": 340,
"dif": 933,
"ding</w>": 442,
"direc": 872,
"dis": 972,
"do": 595,
"do</w>": 974,
"don</w>": 963,
"dra": 998,
"dre": 921,
"ds</w>": 422,
"du": 425,
"duc": 587,
"during</w>": 616,
"dy</w>": 727,
"e": 44,
"e</w>": 171,
"ea": 344,
"eam</w>": 1006,
"ear": 445,
"ear</w>": 580,
"early</w>": 904,
"ears</w>": 765,
"east</w>": 1021,
"ec": 338,
"ect</w>": 886,
"ected</w>": 871,
"ed": 803,
"ed</w>": 302,
"een</w>": 693,
"ef": 816,
"ei": 640,
"el": 331,
"el</w>": 565,
"ell</w>": 621,
"ely</w>": 583,
"em": 360,
"ember</w>": 639,
"ement</w>": 811,
"emp": 779,
"en": 308,
"en</w>": 343,
"ence</w>": 563,
"end</w>": 733,
"eng": 633,
"ent": 376,
"ent</w>": 381,
"enti": 829,
"ents</w>": 555,
"ep": 530,
"episo": 931,
"episode</w>": 1016,
"er": 303,
"er</w>": 312,
"ere</w>": 385,
"ern</w>": 691,
"ers</w>": 398,
"es": 435,
"es</w>": 323,
"est": 674,
"est</w>": 836,
"et</w>": 535,
"eu": 896,
"ev": 539,
"ever</w>": 754,
"ex": 417,
"f": 45,
"f</w>": 187,
"fa": 630,
"fac": 893,
"fe": 484,
"fe</w>": 851,
"fer": 610,
"fi": 350,
"fiel": 996,
"fil": 672,
"film</w>": 858,
"fin": 553,
"fir": 470,
"first</w>": 509,
"fo": 954,
"fol": 692,
"follow": 786,
"following</w>": 978,
"for": 396,
"for</w>": 366,
"fore</w>": 782,
"form": 645,
"found</w>": 1015,
"four</w>": 938,
"fre": 1012,
"fro": 401,
"from</w>": 403,
"ft</w>": 707,
"fu": 665,
"g": 46,
"g</w>": 175,
"ga": 631,
"game</w>": 697,
"gan</w>": 835,
"ge": 574,
"ge</w>": 482,
"gen": 625,
"gener": 824,
"ger": 913,
"gh": 400,
"gh</w>": 481,
"ght</w>": 493,
"gi": 516,
"gin": 771,
"go": 494,
"gover": 987,
"gr": 955,
"gra": 611,
"gre": 612,
"gro": 785,
"gs</w>": 975,
"gu": 426,
"h": 47,
"h</w>": 168,
"ha": 367,
"had</w>": 459,
"har": 971,
"has</w>": 556,
"have</w>": 547,
"he</w>": 410,
"hea": 720,
"hel": 750,
"her": 830,
"her</w>": 522,
"hi": 325,
"high": 917,
"him</w>": 739,
"his</w>": 406,
"histor": 850,
"ho": 577,
"hou": 940,
"how": 804,
"however</w>": 867,
"hu": 993,
"i": 48,
"i</w>": 185,
"ic": 409,
"ic</w>": 783,
"ical</w>": 713,
"id</w>": 682,
"ies</w>": 508,
"il": 379,
"im": 465,
"in": 300,
"in</w>": 314,
"inc": 491,
"inclu": 614,
"including</w>": 926,
"indi": 847,
"ine</w>": 490,
"ined</w>": 694,
"inf": 901,
"ing": 453,
"ing</w>": 315,
"ings</w>": 670,
"ins</w>": 680,
"inst</w>": 927,
"inter": 609,
"into</w>": 649,
"inv": 943,
"ir": 428,
"ir</w>": 456,
"is": 395,
"is</w>": 349,
"iso": 890,
"it": 319,
"it</w>": 390,
"ite</w>": 624,
"ited</w>": 666,
"ith</w>": 370,
"its</w>": 480,
"ity</w>": 443,
"j": 49,
"j</w>": 203,
"jo": 585,
"joh": 941,
"ju": 619,
"k": 50,
"k</w>": 183,
"ke": 661,
"ke</w>": 505,
"ked</w>": 653,
"king</w>": 564,
"kno": 705,
"known</w>": 879,
"ks</w>": 559,
"l": 51,
"l</w>": 172,
"la": 348,
"lan": 606,
"land</w>": 561,
"lar": 646,
"lar</w>": 698,
"later</w>": 725,
"ld</w>": 462,
"le": 382,
"le</w>": 357,
"lea": 466,
"led</w>": 520,
"les</w>": 658,
"ley</w>": 1020,
"li": 336,
"like</w>": 936,
"lin": 864,
"line</w>": 841,
"ling</w>": 869,
"lion</w>": 979,
"ll</w>": 876,
"lo": 358,
"loc": 729,
"low": 567,
"low</w>": 887,
"ls</w>": 929,
"lu": 432,
"ly</w>": 342,
"m": 52,
"m</w>": 173,
"ma": 365,
"made</w>": 794,
"man": 444,
"man</w>": 603,
"many</w>": 801,
"mar": 474,
"mat": 823,
"may</w>": 734,
"me": 475,
"me</w>": 967,
"ment</w>": 572,
"mi": 492,
"mil": 615,
"min": 568,
"mis": 861,
"mo": 399,
"mon": 597,
"mor": 1018,
"more</w>": 637,
"most</w>": 652,
"mp": 985,
"ms</w>": 916,
"mu": 519,
"musi": 808,
"music</w>": 961,
"n": 53,
"n</w>": 174,
"na": 517,
"nam": 810,
"national</w>": 849,
"ne": 393,
"ned</w>": 620,
"new</w>": 598,
"ni": 546,
"ning</w>": 702,
"no": 387,
"no</w>": 759,
"nor": 576,
"north</w>": 891,
"not</w>": 515,
"nov": 945,
"ns</w>": 981,
"num": 763,
"number</w>": 924,
"o": 54,
"o</w>": 182,
"ob": 909,
"oc": 511,
"od</w>": 825,
"of": 513,
"of</w>": 309,
"offi": 914,
"og": 738,
"ok</w>": 749,
"ol": 351,
"old</w>": 718,
"olog": 903,
"om": 339,
"ome</w>": 571,
"on": 307,
"on</w>": 313,
"ond</w>": 747,
"one</w>": 452,
"ong</w>": 472,
"only</w>": 683,
"ons</w>": 601,
"oo": 552,
"op": 405,
"oper": 912,
"or": 310,
"or</w>": 334,
"ore</w>": 529,
"ori": 591,
"origin": 960,
"ors</w>": 962,
"other</w>": 540,
"ou": 324,
"ould</w>": 531,
"oun": 411,
"ound</w>": 635,
"our": 548,
"our</w>": 608,
"ous</w>": 578,
"out</w>": 478,
"over": 831,
"over</w>": 669,
"ow": 416,
"own</w>": 617,
"p": 55,
"p</w>": 189,
"pa": 593,
"par": 427,
"part</w>": 892,
"pas": 999,
"pe": 514,
"pen": 859,
"peop": 1013,
"per": 437,
"peri": 895,
"ph": 769,
"phi": 1007,
"pi": 607,
"pl": 488,
"pla": 506,
"play": 632,
"po": 377,
"poin": 856,
"poli": 902,
"por": 473,
"port</w>": 843,
"posi": 905,
"pp": 770,
"pr": 888,
"pre": 424,
"pres": 618,
"pri": 667,
"pro": 386,
"produc": 774,
"ps</w>": 703,
"pu": 485,
"publi": 728,
"q": 56,
"q</w>": 210,
"qu": 431,
"r": 57,
"r</w>": 176,
"ra": 355,
"ran": 523,
"re": 304,
"re</w>": 671,
"rea": 746,
"rec": 436,
"recor": 708,
"red</w>": 764,
"ree</w>": 638,
"relea": 795,
"rema": 980,
"ren": 629,
"res</w>": 613,
"ri": 329,
"rit": 599,
"ro": 317,
"ron": 951,
"rou": 498,
"ru": 419,
"ry</w>": 885,
"s": 58,
"s</w>": 170,
"sa": 510,
"sc": 414,
"sch": 775,
"scri": 745,
"se": 380,
"se</w>": 373,
"sea": 677,
"season</w>": 781,
"sec": 554,
"second</w>": 837,
"sed</w>": 446,
"sel": 699,
"ser": 486,
"series</w>": 826,
"ses</w>": 647,
"set</w>": 1019,
"sever": 877,
"several</w>": 920,
"sh": 359,
"sh</w>": 541,
"she</w>": 594,
"shed</w>": 719,
"shi": 533,
"ship</w>": 748,
"sho": 828,
"si": 327,
"side</w>": 839,
"sig": 627,
"sin": 791,
"sing": 857,
"sing</w>": 679,
"sion</w>": 504,
"sk": 889,
"sm": 796,
"so": 602,
"so</w>": 477,
"soci": 934,
"sol": 977,
"som": 984,
"some</w>": 757,
"son": 800,
"son</w>": 501,
"song</w>": 751,
"sou": 641,
"south</w>": 884,
"sp": 384,
"spec": 581,
"ss</w>": 489,
"ssi": 732,
"st": 316,
"st</w>": 335,
"sta": 690,
"star": 744,
"state</w>": 898,
"ste": 724,
"sted</w>": 853,
"ster</w>": 883,
"stern</w>": 994,
"sti": 643,
"stor": 537,
"str": 589,
"stri": 863,
"stru": 695,
"struc": 772,
"sts</w>": 766,
"stu": 807,
"su": 368,
"sub": 787,
"suc": 947,
"such</w>": 812,
"sul": 930,
"sup": 860,
"sur": 642,
"sy": 673,
"t": 59,
"t</w>": 180,
"ta": 438,
"tain</w>": 1010,
"te": 544,
"te</w>": 550,
"ted</w>": 402,
"tel": 937,
"ten": 648,
"ten</w>": 798,
"ter": 433,
"ter</w>": 420,
"ters</w>": 838,
"th": 298,
"th</w>": 391,
"than</w>": 723,
"that</w>": 362,
"the": 375,
"the</w>": 299,
"their</w>": 507,
"them</w>": 819,
"then</w>": 875,
"ther</w>": 430,
"there</w>": 760,
"these</w>": 848,
"they</w>": 525,
"this</w>": 495,
"tho": 919,
"thou": 636,
"though</w>": 741,
"three</w>": 716,
"throu": 731,
"through</w>": 878,
"ti": 306,
"tic": 536,
"tic</w>": 681,
"ties</w>": 704,
"tim": 469,
"time</w>": 600,
"ting</w>": 542,
"tion": 404,
"tion</w>": 347,
"tional</w>": 634,
"tions</w>": 479,
"tish</w>": 982,
"tiv": 946,
"tive</w>": 596,
"tle</w>": 986,
"tly</w>": 894,
"to": 468,
"to</w>": 320,
"ton</w>": 818,
"tor": 780,
"tor</w>": 950,
"tow": 976,
"tr": 460,
"tra": 592,
"trac": 1022,
"tran": 840,
"tre": 865,
"tri": 685,
"tro": 715,
"try</w>": 1011,
"ts</w>": 413,
"tu": 650,
"tur": 496,
"ture</w>": 842,
"tw": 434,
"two</w>": 545,
"ty</w>": 584,
"u": 60,
"u</w>": 186,
"ul": 461,
"um": 500,
"um</w>": 743,
"un": 346,
"und</w>": 952,
"under</w>": 944,
"uni": 696,
"united</w>": 942,
"up</w>": 657,
"ur": 337,
"ure</w>": 777,
"uring</w>": 586,
"ury</w>": 806,
"us</w>": 534,
"use</w>": 991,
"used</w>": 701,
"v": 61,
"v</w>": 196,
"ve": 910,
"ve</w>": 371,
"ved</w>": 503,
"vel": 688,
"ven</w>": 911,
"ver": 392,
"ver</w>": 440,
"very</w>": 970,
"ves</w>": 737,
"vi": 356,
"vie": 710,
"ving</w>": 686,
"vision</w>": 873,
"vo": 852,
"w": 62,
"w</w>": 194,
"wa": 464,
"war": 768,
"war</w>": 866,
"ward</w>": 965,
"was</w>": 341,
"way</w>": 654,
"we": 502,
"well</w>": 833,
"were</w>": 423,
"west</w>": 959,
"wh": 389,
"when</w>": 626,
"where</w>": 792,
"whi": 418,
"which</w>": 463,
"while</w>": 668,
"who</w>": 579,
"wi": 538,
"wil": 712,
"win": 907,
"with": 799,
"with</w>": 372,
"wn</w>": 676,
"wor": 471,
"work</w>": 966,
"world</w>": 834,
"would</w>": 662,
"writ": 722,
"ws</w>": 953,
"x": 63,
"x</w>": 200,
"y": 64,
"y</w>": 169,
"year</w>": 789,
"years</w>": 845,
"ying</w>": 868,
"yo": 1008,
"yp": 870,
"ys</w>": 711,
"z": 65,
"z</w>": 193,
"zed</w>": 973,
"|": 66,
"|</w>": 268,
"}": 67,
"}</w>": 227,
"~": 68,
"~</w>": 287,
"¡": 69,
"¡</w>": 206,
"¢": 70,
"¢</w>": 221,
"£": 71,
"£</w>": 243,
"¤": 72,
"¤</w>": 192,
"¥": 73,
"¥</w>": 212,
"¦": 74,
"¦</w>": 215,
"§": 75,
"§</w>": 226,
"¨": 76,
"¨</w>": 177,
"©": 77,
"©</w>": 188,
"ª": 78,
"ª</w>": 202,
"«": 79,
"«</w>": 205,
"¬": 80,
"¬</w>": 232,
"®": 81,
"®</w>": 248,
"¯": 82,
"¯</w>": 190,
"°": 83,
"°</w>": 201,
"±": 84,
"±</w>": 179,
"²": 85,
"²</w>": 246,
"³": 86,
"³</w>": 235,
"´": 87,
"´</w>": 257,
"µ": 88,
"µ</w>": 225,
"¶": 89,
"¶</w>": 214,
"·": 90,
"·</w>": 211,
"¸": 91,
"¸</w>": 217,
"¹": 92,
"¹</w>": 207,
"º": 93,
"º</w>": 204,
"»": 94,
"»</w>": 198,
"¼": 95,
"¼</w>": 224,
"½": 96,
"½</w>": 271,
"¾": 97,
"¾</w>": 191,
"¿": 98,
"¿</w>": 249,
"Â": 99,
"Ã": 100,
"Ä": 101,
"Å": 102,
"Æ": 103,
"Ç": 104,
"È": 105,
"É": 106,
"Ê": 107,
"Ë": 108,
"Ì": 109,
"Í": 110,
"Î": 111,
"Ï": 112,
"Ð": 113,
"Ñ": 114,
"Ö": 115,
"×": 116,
"Ø": 117,
"Ù": 118,
"Ü": 119,
"à": 120,
"á": 121,
"â": 122,
"âĢ": 458,
"âĢĵ</w>": 521,
"âĢĶ</w>": 939,
"ã": 123,
"ä": 124,
"å": 125,
"æ": 126,
"ç": 127,
"è": 128,
"é": 129,
"ë": 130,
"ì": 131,
"ï": 132,
"Ģ": 133,
"Ģ</w>": 252,
"ģ": 134,
"ģ</w>": 260,
"Ĥ": 135,
"Ĥ</w>": 195,
"ĥ": 136,
"ĥ</w>": 208,
"Ħ": 137,
"Ħ</w>": 239,
"ħ": 138,
"ħ</w>": 270,
"Ĩ": 139,
"Ĩ</w>": 238,
"ĩ": 140,
"ĩ</w>": 197,
"Ī": 141,
"Ī</w>": 256,
"ī": 142,
"ī</w>": 253,
"Ĭ": 143,
"Ĭ</w>": 209,
"ĭ": 144,
"ĭ</w>": 259,
"Į": 145,
"Į</w>": 219,
"į": 146,
"į</w>": 181,
"İ": 147,
"İ</w>": 261,
"ı": 148,
"ı</w>": 222,
"IJ": 149,
"IJ</w>": 216,
"ij": 150,
"ij</w>": 262,
"Ĵ": 151,
"Ĵ</w>": 234,
"ĵ": 152,
"ĵ</w>": 263,
"Ķ": 153,
"Ķ</w>": 229,
"ķ": 154,
"ķ</w>": 242,
"ĸ": 155,
"ĸ</w>": 266,
"Ĺ": 156,
"Ĺ</w>": 230,
"ĺ": 157,
"ĺ</w>": 237,
"Ļ": 158,
"Ļ</w>": 233,
"ļ": 159,
"ļ</w>": 250,
"Ľ": 160,
"Ľ</w>": 254,
"ľ": 161,
"ľ</w>": 251,
"Ŀ": 162,
"Ŀ</w>": 245,
"ŀ": 163,
"ŀ</w>": 244,
"Ł": 164,
"Ł</w>": 220,
"ł": 165,
"ł</w>": 241,
"Ń": 166,
"Ń</w>": 213
}