我有点怀疑你在刷屏

ElasticSearch term搜索建议 中文ik没有建议

Elasticsearch | 作者 tenyears | 发布于2019年07月29日 | 阅读数:1921

我的setting和mapping
 
PUT /address
{

"settings": {
"index": {
"max_result_window": 10000000
},
"refresh_interval": "5s",
"number_of_shards": 1,
"number_of_replicas": 1,
"analysis": {
"filter": {
"pinyin_full_filter": {
"keep_joined_full_pinyin": "true",
"lowercase": "true",
"keep_original": "false",
"keep_first_letter": "false",
"keep_separate_first_letter": "false",
"type": "pinyin",
"keep_none_chinese": "false",
"limit_first_letter_length": "50",
"keep_full_pinyin": "true"
},
"pinyin_simple_filter": {
"type": "pinyin",
"keep_joined_full_pinyin": "true",
"lowercase": "true",
"none_chinese_pinyin_tokenize": "false",
"padding_char": " ",
"keep_original": "true",
"keep_first_letter": "true",
"keep_separate_first_letter": "false",

"keep_full_pinyin": "false"
}
},
"analyzer": {
"pinyinFullIndexAnalyzer": {
"filter": ["asciifolding", "lowercase", "pinyin_full_filter"],
"type": "custom",
"tokenizer": "ik_max_word"
},
"ik_pinyin_analyzer": {
"filter": ["asciifolding", "lowercase", "pinyin_full_filter", "word_delimiter"],
"type": "custom",
"tokenizer": "ik_smart"
},
"ikIndexAnalyzer": {
"filter": ["asciifolding", "lowercase"],
"type": "custom",
"tokenizer": "ik_max_word"
},
"pinyiSimpleIndexAnalyzer": {
"type": "custom",
"tokenizer": "ik_max_word",
"filter": ["pinyin_simple_filter", "lowercase"]
}
}
}

},
"mappings":{
"student":{
"properties":{
"address":{
"type":"text",
"analyzer":"ikIndexAnalyzer",
"fields":{
"ik":{
"type":"text",
"analyzer":"ikIndexAnalyzer"
},
"spy":{
"type":"text",
"analyzer":"pinyiSimpleIndexAnalyzer"
},
"fpy":{
"type":"text",
"analyzer":"pinyinFullIndexAnalyzer"
}
}
}
}
}
}
}

我导入了全国 省和地级市的数据
 pinyin全拼搜索建议 
 
GET /address/_search
{
"suggest": {
"mysuggest": {
"text":"guangdongdd",

"term":{
"field":"address.spy"
}

}
}
}
结果如下:pinyin可以建议 guangdong
 
{
"took" : 3,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 0,
"relation" : "eq"
},
"max_score" : null,
"hits" : [ ]
},
"suggest" : {
"mysuggest" : [
{
"text" : "guangdongdd",
"offset" : 0,
"length" : 11,
"options" : [
{
"text" : "guangdong",
"score" : 0.7777778,
"freq" : 2
}
]
}
]
}
}

 
使用ik分词器搜索如下
 
GET /address/_search
{
"suggest": {
"mysuggest": {
"text":"广东神",

"term":{
"field":"address.ik"
}

}
}
}

 
结果如下: 使用ik不能搜索出搜索建议
 
 
{
"took" : 0,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 0,
"relation" : "eq"
},
"max_score" : null,
"hits" : [ ]
},
"suggest" : {
"mysuggest" : [
{
"text" : "广东",
"offset" : 0,
"length" : 2,
"options" : [ ]
},
{
"text" : "神",
"offset" : 2,
"length" : 1,
"options" : [ ]
}
]
}
}
求个大佬指点一下
 
已邀请:

tanqian

赞同来自:

中文支持不太好,目前尝试了官网几个 api,只有 completion 调通了,支持 中文/拼音/拼音首字母 前缀匹配,官网地址:https://www.elastic.co/guide/e ... .html
 
拼音分词器我用的 medcl的:https://github.com/medcl/elast ... inyin
mapping 定义:
{
"settings": {
"analysis": {
"tokenizer": {
"my_pinyin": {
"type": "pinyin",
"keep_separate_first_letter": true,
"keep_full_pinyin": true,
"keep_original": true,
"limit_first_letter_length": 16,
"lowercase": true,
"remove_duplicated_term": true
}
},
"analyzer": {
"pinyin_analyzer": {
"tokenizer": "my_pinyin"
}
}
}
},
"mappings": {
"properties": {
"body": {
"type": "text",
"analyzer": "ik_max_word",
"fields": {
"suggest_text": {
"type": "completion",
"analyzer": "standard",
"preserve_separators": false
},
"pinyin": {
"type": "completion",
"analyzer": "pinyin_analyzer",
"preserve_separators": false
}
}
}
}
}
}

 
 

 

要回复问题请先登录注册