SkillAgentSearch skills...

Hanzi

HanziJS is a Chinese character and NLP module for Chinese language processing for Node.js

Install / Use

/learn @nieldlr/Hanzi
About this skill

Quality Score

0/100

Supported Platforms

Universal

README

HanziJS

NPM version Build Status Coverage Status codecov

HanziJS is a Chinese character and NLP module for Chinese language processing for Node.js. It is primarily written to help provide a framework for Chinese language learners to explore Chinese.

At present features include:

  • Character decomposition into components
  • Dictionary definition lookup using CC-CEDICT
  • Phonetic Regularity Computation
  • Example Word Calculations

Future features planned:

  • Chinese sentence segmentation
  • Determine regularities and other variables between characters & their components.

Currently the data was generated by Gavin Grover http://groovy.codeplex.com/wikipage?title=cjk-decomp

Install

npm install hanzi

How to use

Initiate HanziJS. Required.

//Require
var hanzi = require("hanzi");
//Initiate
hanzi.start();

Functions

hanzi.decompose(character, type of decomposition);

A function that takes a Chinese character and returns an object with decomposition data. Type of decomposition is optional.

Type of decomposition levels:

  • 1 - "Once" (only decomposes character once),
  • 2 - "Radical" (decomposes character into its lowest radical components),
  • 3 - "Graphical" (decomposes into lowest forms, will be mostly strokes and small indivisable units)
var decomposition = hanzi.decompose('爱');
console.log(decomposition);

{ character: '爱',
  components1: [ 'No glyph available', '友' ],
  components2: [ '爫', '冖', '𠂇', '又' ],
  components3: [ '爫', '冖', '𠂇', '㇇', '㇏' ] }

//Example of forced level decomposition

var decomposition = hanzi.decompose('爱', 2);
console.log(decomposition);

{ character: '爱', components: [ '爫', '冖', '𠂇', '又' ] }

hanzi.decomposeMany(character string, type of decomposition);

A function that takes a string of characters and returns one object for all characters.

var decomposition = hanzi.decomposeMany('爱橄黃');
console.log(decomposition);

{ '爱':
   { character: '爱',
     components1: [ 'No glyph available', '友' ],
     components2: [ '爫', '冖', '𠂇', '又' ],
     components3: [ '爫', '冖', '𠂇', '㇇', '㇏' ] },
  '橄':
   { character: '橄',
     components1: [ '木', '敢' ],
     components2: [ '木', 'No glyph available', '耳', '⺙' ],
     components3: [ '一', '丨', '八', '匚', '二', '丨', '二', '丿', '一', '乂' ] },
  '黃':
   { character: '黃',
     components1: [ '廿', 'No glyph available' ],
     components2: [ '黃' ],
     components3: [ '卄', '一', '一', '二', '丨', '凵', '八' ] } }

hanzi.ifComponentExists(character/component);

Check if a component/character exists in the data. Returns boolean value.

console.log(hanzi.ifComponentExists('乂'));

true

console.log(hanzi.ifComponentExists('$'));

false

hanzi.definitionLookup(character/word, script type);

Returns a dictionary entry object. Script type is optional.

Script type parameters:

  • 's' - Simplified
  • 't' - Traditional
console.log(hanzi.definitionLookup('雪'));

[ { traditional: '雪',
    simplified: '雪',
    pinyin: 'Xue3',
    definition: 'surname Xue' },
  { traditional: '雪',
    simplified: '雪',
    pinyin: 'xue3',
    definition: 'snow/snowfall/CL:場|场[chang2]/to have the appearance of snow/to wipe away, off or out/to clean' } ]

hanzi.dictionarySearch(characters, search type);

Searches the dictionary based on input. Search type changes what data it returns. Defaults to

Search type paramaters:

  • 'only' - this parameter returns only entries with the characters specfied. This is a means to find all compounds words with the characters specified.
  • null - returns all occurences of the character.
console.log(hanzi.dictionarySearch('雪'));

[ [ { traditional: '下雪',
      simplified: '下雪',
      pinyin: 'xia4 xue3',
      definition: 'to snow' } ],
  [ { traditional: '似雪',
      simplified: '似雪',
      pinyin: 'si4 xue3',
      definition: 'snowy' } ],
  [ { traditional: '冰天雪地',
      simplified: '冰天雪地',
      pinyin: 'bing1 tian1 xue3 di4',
      definition: 'a world of ice and snow' } ],
  [ { traditional: '冰雪',
      simplified: '冰雪',
      pinyin: 'bing1 xue3',
      definition: 'ice and snow' } ],
  [ { traditional: '冰雪皇后',
      simplified: '冰雪皇后',
      pinyin: 'bing1 xue3 huang2 hou4',
      definition: 'Dairy Queen (brand)' } ],
  [ { traditional: '冰雪聰明',
      simplified: '冰雪聪明',
      pinyin: 'bing1 xue3 cong1 ming5',
      definition: 'exceptionally intelligent (idiom)' } ],
  [ { traditional: '各人自掃門前雪,莫管他家瓦上霜',
      simplified: '各人自扫门前雪,莫管他家瓦上霜',
      pinyin: 'ge4 ren2 zi4 sao3 men2 qian2 xue3 , mo4 guan3 ta1 jia1 wa3 shang4 shuang1',
      definition: 'sweep the snow from your own door step, don\'t worry about the frost on your neighbor\'s roof (idiom)' } ],
  [ { traditional: '哈巴雪山',
      simplified: '哈巴雪山',
      pinyin: 'Ha1 ba1 xue3 shan1',
      definition: 'Mt Haba (Nakhi: golden flower), in Lijiang 麗江|丽江, northwest Yunnan' } ],
  [ { traditional: '單板滑雪',
      simplified: '单板滑雪',
      pinyin: 'dan1 ban3 hua2 xue3',
      definition: 'to snowboard' } ],
  [ { traditional: '報仇雪恥',
      simplified: '报仇雪耻',
      pinyin: 'bao4 chou2 xue3 chi3',
      definition: 'to take revenge and erase humiliation (idiom)' } ],

[....] //Truncated for display purposes

console.log(hanzi.dictionarySearch('心的小孩真', 'only'));

[ [ { traditional: '孩',
      simplified: '孩',
      pinyin: 'hai2',
      definition: 'child' } ],
  [ { traditional: '小',
      simplified: '小',
      pinyin: 'xiao3',
      definition: 'small/tiny/few/young' } ],
  [ { traditional: '小孩',
      simplified: '小孩',
      pinyin: 'xiao3 hai2',
      definition: 'child/CL:個|个[ge4]' } ],
  [ { traditional: '小小',
      simplified: '小小',
      pinyin: 'xiao3 xiao3',
      definition: 'very small/very few/very minor' } ],
  [ { traditional: '小心',
      simplified: '小心',
      pinyin: 'xiao3 xin1',
      definition: 'to be careful/to take care' } ],
  [ { traditional: '小的',
      simplified: '小的',
      pinyin: 'xiao3 de5',
      definition: 'I (when talking to a superior)' } ],
  [ { traditional: '心',
      simplified: '心',
      pinyin: 'xin1',
      definition: 'heart/mind/intention/centre/core/CL:顆|颗[ke1],個|个[ge4]' } ],
  [ { traditional: '的',
      simplified: '的',
      pinyin: 'de5',
      definition: 'of/~\'s (possessive particle)/(used after an attribute)/(used to form a nominal expression)/(used at the end of a declarative sentence for emphasis)' },
    { traditional: '的',
      simplified: '的',
      pinyin: 'di2',
      definition: 'really and truly' },
    { traditional: '的',
      simplified: '的',
      pinyin: 'di4',
      definition: 'aim/clear' } ],
  [ { traditional: '真',
      simplified: '真',
      pinyin: 'zhen1',
      definition: 'really/truly/indeed/real/true/genuine' } ],
  [ { traditional: '真心',
      simplified: '真心',
      pinyin: 'zhen1 xin1',
      definition: 'sincere/heartfelt/CL:片[pian4]' } ],
  [ { traditional: '真真',
      simplified: '真真',
      pinyin: 'zhen1 zhen1',
      definition: 'really/in fact/genuinely/scrupulously' } ] ]

hanzi.getExamples(character);

This function does a dictionarySearch(), then compares that to the Leiden University corpus for vocabulary frequency, then sorts the dictionary entries into three categories in an array: [high frequency, medium frequency and low frequency].

The frequency categories are determined relative to the frequency distribution of the dictionarySearch data compared to the corpus.

console.log(hanzi.getExamples('橄'));

[ [ { traditional: '橄欖',
      simplified: '橄榄',
      pinyin: 'gan3 lan3',
      definition: 'Chinese olive/olive' },
    { traditional: '橄欖油',
      simplified: '橄榄油',
      pinyin: 'gan3 lan3 you2',
      definition: 'olive oil' } ],
  [ { traditional: '橄欖球',
      simplified: '橄榄球',
      pinyin: 'gan3 lan3 qiu2',
      definition: 'football played with oval-shaped ball (rugby, American football, Australian rules etc)' } ],
  [ { traditional: '橄欖枝',
      simplified: '橄榄枝',
      pinyin: 'gan3 lan3 zhi1',
      definition: 'olive branch/symbol of peace' },
    { traditional: '橄欖樹',
      simplified: '橄榄树',
      pinyin: 'gan3 lan3 shu4',
      definition: 'olive tree' },
    { traditional: '橄欖石',
      simplified: '橄榄石',
      pinyin: 'gan3 lan3 shi2',
      definition: 'olivine (rock-forming mineral magnesium-iron silicate (Mg,Fe)2SiO4)/peridot' } ] ]

hanzi.segment(phrase); - NEW in v0.5.0

Returns an array of characters that are segmented based on a longest match lookup.

console.log(hanzi.segment("我們都是陌生人。"));

[ '我們', '都', '是', '陌生人', '。' ]

hanzi.getPinyin(character);

Returns all possible pinyin data for a character.

console.log(hanzi.getPinyin('的'));

[ 'de5', 'di2', 'di4' ]

hanzi.getCharacterFrequency(character);

Returns frequency data for a character based on the Junda corpus. The data is in simplified characters, but I made the function script agnostic. So both traditional and simplified will return the same data.

console.log(hanzi.getCharacterFrequency('热'));

{ number: '530',
  character: '热',
  count: '31190',
  percentage: '76.4970999352',
  pinyin: 're4',
  meaning: 'heat/to heat up/fervent/hot (of weather)/warm up' }

hanzi.getCharacterInFrequencyListByPosition(position); - NEW in v0.7.0

Gets a character based on its position the frequency list. This only goes up to 9933 based on the Junda Frequency list.

console.log(hanzi.getCharac
View on GitHub
GitHub Stars406
CategoryDevelopment
Updated1mo ago
Forks60

Languages

JavaScript

Security Score

95/100

Audited on Mar 2, 2026

No findings