This documentation contains work-in-progress information for future Elastic Stack and Cloud releases. Use the version selector to view supported release docs. It also contains some Elastic Cloud serverless information. Check out our serverless docs for more details.
smartcn_stop token filter
editsmartcn_stop
token filter
editThe smartcn_stop
token filter filters out stopwords defined by smartcn
analyzer (_smartcn_
), and any other custom stopwords specified by the user.
This filter only supports the predefined _smartcn_
stopwords list.
If you want to use a different predefined list, then use the
stop
token filter instead.
PUT smartcn_example { "settings": { "index": { "analysis": { "analyzer": { "smartcn_with_stop": { "tokenizer": "smartcn_tokenizer", "filter": [ "porter_stem", "my_smartcn_stop" ] } }, "filter": { "my_smartcn_stop": { "type": "smartcn_stop", "stopwords": [ "_smartcn_", "stack", "的" ] } } } } } } GET smartcn_example/_analyze { "analyzer": "smartcn_with_stop", "text": "哈喽,我们是 Elastic 我们是 Elastic Stack(Elasticsearch、Kibana、Beats 和 Logstash)的开发公司。从股票行情到 Twitter 消息流,从 Apache 日志到 WordPress 博文,我们可以帮助人们体验搜索的强大力量,帮助他们以截然不同的方式探索和分析数据" }
The above request returns:
{ "tokens": [ { "token": "哈", "start_offset": 0, "end_offset": 1, "type": "word", "position": 0 }, { "token": "喽", "start_offset": 1, "end_offset": 2, "type": "word", "position": 1 }, { "token": "我们", "start_offset": 3, "end_offset": 5, "type": "word", "position": 3 }, { "token": "是", "start_offset": 5, "end_offset": 6, "type": "word", "position": 4 }, { "token": "elast", "start_offset": 7, "end_offset": 14, "type": "word", "position": 5 }, { "token": "我们", "start_offset": 17, "end_offset": 19, "type": "word", "position": 6 }, { "token": "是", "start_offset": 19, "end_offset": 20, "type": "word", "position": 7 }, { "token": "elast", "start_offset": 21, "end_offset": 28, "type": "word", "position": 8 }, { "token": "elasticsearch", "start_offset": 35, "end_offset": 48, "type": "word", "position": 11 }, { "token": "kibana", "start_offset": 49, "end_offset": 55, "type": "word", "position": 13 }, { "token": "beat", "start_offset": 56, "end_offset": 61, "type": "word", "position": 15 }, { "token": "和", "start_offset": 62, "end_offset": 63, "type": "word", "position": 16 }, { "token": "logstash", "start_offset": 64, "end_offset": 72, "type": "word", "position": 17 }, { "token": "开发", "start_offset": 74, "end_offset": 76, "type": "word", "position": 20 }, { "token": "公司", "start_offset": 76, "end_offset": 78, "type": "word", "position": 21 }, { "token": "从", "start_offset": 79, "end_offset": 80, "type": "word", "position": 23 }, { "token": "股票", "start_offset": 80, "end_offset": 82, "type": "word", "position": 24 }, { "token": "行情", "start_offset": 82, "end_offset": 84, "type": "word", "position": 25 }, { "token": "到", "start_offset": 84, "end_offset": 85, "type": "word", "position": 26 }, { "token": "twitter", "start_offset": 86, "end_offset": 93, "type": "word", "position": 27 }, { "token": "消息", "start_offset": 94, "end_offset": 96, "type": "word", "position": 28 }, { "token": "流", "start_offset": 96, "end_offset": 97, "type": "word", "position": 29 }, { "token": "从", "start_offset": 98, "end_offset": 99, "type": "word", "position": 31 }, { "token": "apach", "start_offset": 100, "end_offset": 106, "type": "word", "position": 32 }, { "token": "日志", "start_offset": 107, "end_offset": 109, "type": "word", "position": 33 }, { "token": "到", "start_offset": 109, "end_offset": 110, "type": "word", "position": 34 }, { "token": "wordpress", "start_offset": 111, "end_offset": 120, "type": "word", "position": 35 }, { "token": "博", "start_offset": 121, "end_offset": 122, "type": "word", "position": 36 }, { "token": "文", "start_offset": 122, "end_offset": 123, "type": "word", "position": 37 }, { "token": "我们", "start_offset": 124, "end_offset": 126, "type": "word", "position": 39 }, { "token": "可以", "start_offset": 126, "end_offset": 128, "type": "word", "position": 40 }, { "token": "帮助", "start_offset": 128, "end_offset": 130, "type": "word", "position": 41 }, { "token": "人们", "start_offset": 130, "end_offset": 132, "type": "word", "position": 42 }, { "token": "体验", "start_offset": 132, "end_offset": 134, "type": "word", "position": 43 }, { "token": "搜索", "start_offset": 134, "end_offset": 136, "type": "word", "position": 44 }, { "token": "强大", "start_offset": 137, "end_offset": 139, "type": "word", "position": 46 }, { "token": "力量", "start_offset": 139, "end_offset": 141, "type": "word", "position": 47 }, { "token": "帮助", "start_offset": 142, "end_offset": 144, "type": "word", "position": 49 }, { "token": "他们", "start_offset": 144, "end_offset": 146, "type": "word", "position": 50 }, { "token": "以", "start_offset": 146, "end_offset": 147, "type": "word", "position": 51 }, { "token": "截然不同", "start_offset": 147, "end_offset": 151, "type": "word", "position": 52 }, { "token": "方式", "start_offset": 152, "end_offset": 154, "type": "word", "position": 54 }, { "token": "探索", "start_offset": 154, "end_offset": 156, "type": "word", "position": 55 }, { "token": "和", "start_offset": 156, "end_offset": 157, "type": "word", "position": 56 }, { "token": "分析", "start_offset": 157, "end_offset": 159, "type": "word", "position": 57 }, { "token": "数据", "start_offset": 159, "end_offset": 161, "type": "word", "position": 58 } ] }