#精确值不需要做分词的处理
#对个别字段进行个性化分词
#charactor filter,字符串过滤规则,自带的有html_script,pattern_replace,mapping
#tokenizer:分词器,主要有whitespace,standard,uax_url_email, pattern, keyword, path hierarchy
#filter,跟char_filter一样都能自定义,自带的token filter有lowercase,stop,synonym
#去除标签
POST _analyze
{
"tokenizer": "keyword",
"char_filter": ["html_strip"],
"text":"<div>hello world</div>"
}
#自定义替换规则
POST _analyze
{
"tokenizer": "standard",
"char_filter": [
{
"type": "mapping",
"mappings": ["- => _",":) => happy","(: => sad"]
}
],
"text":":)123213-243243-ddsfsfs-(:"
}
#正则
POST _analyze
{
"tokenizer": "standard",
"char_filter": [{
"type":"pattern_replace",
"pattern":"http://(.*)",
"replacement":"$1"
}],
"text": ["http://www.baidu.com","http://sb.sx.com"]
}
#whitespace 和 stop
POST _analyze
{
"tokenizer": "whitespace",
"filter": ["stop","lowercase"],
"char_filter": [{
"type":"pattern_replace",
"pattern":"[?!.]",
"replacement":""
}],
"text": ["What are you saying? The dog on the tree?"]
}
#自定义analyzer
DELETE my_index
PUT my_index
{
"settings": {
"analysis": {
"analyzer": {
"my_custom_analyzer": {
"type": "custom",
"char_filter": [
"emotions"
],
"tokenizer": "pun",
"filter": [
"lowercase",
"english_stop"
]
}
},
"tokenizer": {
"pun": {
"type": "pattern",
"pattern": "[?!.,。 ]"
}
},
"char_filter": {
"emotions": {
"type": "mapping",
"mappings": [
"- => _",
":) => happy",
"(: => sad"
]
}
},
"filter": {
"english_stop": {
"type": "stop",
"stopwords": "_english_"
}
}
}
}
}
POST my_index/_analyze
{
"analyzer": "my_custom_analyzer",
"text": ":):(I am so happy to use my custom analyzer!!!!.....。。。。"
}
DELETE test
PUT test
{
"settings": {
"analysis": {
"analyzer": {
"my_hanlp_analyzer": {
"tokenizer": "hanlp_test",
"filter":"keep_n_v"
}
},
"tokenizer": {
"hanlp_test": {
"type": "hanlp_nlp",
"enable_stop_dictionary": true,
"enable_custom_config": true,
"enable_place_recognize": true,
"enable_name_recognize": true,
"enable_part_of_speech_tagging": true,
"enable_japanese_name_recognize": true
}
},
"filter": {
"keep_n_v":{
"type": "keep_types",
"types": [
"n","v"
]
}
}
}
}
}
POST test/_analyze
{
"text": "夜里。夜深人静。房间之中,那副画蠢蠢欲动。蔓延在整个房间之中的树枝开始渐渐开出了樱花。几乎在一夜之间,所有的枝头都挂满了樱花。睡梦之中,卡卡西只觉得一股疲倦感渐渐涌了上来。左手腕上的的白色狐狸面具慢慢消散,最后竟是消失不见。“八坂先生”巳弥满眼心疼地看着那道影子,缓缓靠了上去。随即想到了什么,跳上了樱花树。那满是樱花的树上,巳弥伸出了自己的手臂。黑影缓缓地抬起了头。一切好像又回到了从前。",
"tokenizer": "hanlp_test",
"filter": {
"type": "keep_types",
"types": [
"n",
"nz",
"v",
"vi",
"vl",
"a"
]
}
}














网友评论