ES配置IK分词器失败

laoyang360 - 《一本书讲透Elasticsearch》作者，Elastic认证工程师 [死磕Elasitcsearch]知识星球地址：http://t.cn/RmwM3N9；微信公众号：铭毅天下; 博客：https://elastic.blog.csdn.net

赞同来自: menzhipeng 、AuroraXuesen

最核心：

curl -XPOST http://localhost:9200/index/fulltext/_mapping -H 'Content-Type:application/json' -d'

{

        "properties": {

            "content": {

                "type": "text",

                "analyzer": "ik_max_word",

                "search_analyzer": "ik_max_word"

            }

        }



}'

参考下Medcl大的github：https://github.com/medcl/elasticsearch-analysis-ik

rochy - rochy_he

赞同来自: menzhipeng

mapping 里面可以直接设置 ik_max_word 或者 ik_smart 两种模式进行分词；
你可以将你的配置贴出来看一下

rochy - rochy_he

赞同来自: menzhipeng

{

  "order": 0,

  "template": "*",

  "settings": {

    "index": {

      "refresh_interval": "5s",

      "number_of_shards": "3",

      "max_result_window": 10000,

      "translog": {

        "flush_threshold_size": "500mb",

        "sync_interval": "30s",

        "durability": "async"

      },

      "merge": {

        "scheduler": {

          "max_merge_count": "100",

          "max_thread_count": "1"

        }

      },

      "number_of_replicas": "0",

      "unassigned": {

        "node_left": {

          "delayed_timeout": "1m"

        }

      }

    }

  },

  "mappings": {

    "_doc": {

      "_all": {

        "enabled": false

      },

      "dynamic_templates": [

        {

          "id_field": {

            "mapping": {

              "type": "keyword",

              "store": "true"

            },

            "match": "*_id"

          }

        },

        {

          "no_field": {

            "mapping": {

              "type": "keyword",

              "store": "true"

            },

            "match": "*_no"

          }

        },

        {

          "geo_field": {

            "mapping": {

              "type": "geo_point",

              "store": "true"

            },

            "match": "*_geo"

          }

        },

        {

          "ip_field": {

            "mapping": {

              "type": "ip",

              "store": "true"

            },

            "match": "*_ip"

          }

        },

        {

          "len_field": {

            "mapping": {

              "type": "integer",

              "store": "true"

            },

            "match": "*_len"

          }

        },

        {

          "typ_field": {

            "mapping": {

              "type": "keyword",

              "store": "true"

            },

            "match": "*_typ*"

          }

        },

        {

          "sta_field": {

            "mapping": {

              "type": "keyword",

              "store": "true"

            },

            "match": "*_sta"

          }

        },

        {

          "lvl_field": {

            "mapping": {

              "type": "keyword",

              "store": "true"

            },

            "match": "*_lvl"

          }

        },

        {

          "flg_field": {

            "mapping": {

              "type": "keyword",

              "store": "true"

            },

            "match": "*_flg"

          }

        },

        {

          "dtm_field": {

            "mapping": {

              "type": "date",

              "format": "yyyy-MM-dd HH:mm:ss||yyyy-MM-dd||yyyy-MM||yyyy||yyyy/MM/dd||yyyy/MM||epoch_millis",

              "store": "true"

            },

            "match": "*_dtm"

          }

        },

        {

          "ns_field": {

            "mapping": {

              "index": "false",

              "doc_values": "false",

              "norms": "false",

              "fielddata": "false",

              "store": "false"

            },

            "match": "*_ns"

          }

        },

        {

          "bin_field": {

            "mapping": {

              "type": "binary",

              "doc_values": "false",

              "norms": "false",

              "fielddata": "false",

              "store": "false"

            },

            "match": "*_bin"

          }

        },

        {

          "raw_field": {

            "mapping": {

              "type": "binary",

              "doc_values": "false",

              "norms": "false",

              "fielddata": "false",

              "store": "false"

            },

            "match": "*_raw"

          }

        },

        {

          "std_field": {

            "mapping": {

              "store": "true",

              "analyzer": "standard",

              "type": "text"

            },

            "match": "*_std"

          }

        },

        {

          "url_field": {

            "mapping": {

              "store": "true",

              "type": "keyword",

              "doc_values": "false",

              "norms": "false",

              "fielddata": "false"

            },

            "match": "*_url"

          }

        },

        {

          "file_field": {

            "mapping": {

              "type": "attachment",

              "fields": {

                "content": {

                  "store": "false",

                  "type": "text"

                },

                "author": {

                  "store": "true",

                  "type": "text"

                },

                "title": {

                  "store": "true",

                  "type": "text"

                },

                "keywords": {

                  "store": "true",

                  "type": "text"

                },

                "content_length": {

                  "store": "true"

                },

                "language": {

                  "store": "true"

                },

                "date": {

                  "store": "true",

                  "type": "date"

                },

                "content_type": {

                  "store": "true"

                }

              }

            },

            "match": "*_file"

          }

        },

        {

          "string_field": {

            "mapping": {

              "type": "text",

              "analyzer": "ik_max_word",

              "search_analyzer": "ik_max_word",

              "search_quote_analyzer": "ik_max_word",

              "term_vector": "with_positions_offsets",

              "fields": {

                "orginal": {

                  "type": "keyword",

                  "ignore_above": "36"

                }

              }

            },

            "match_mapping_type": "string"

          }

        }

      ]

    }

  },

  "aliases": {}

}

你可以使用索引模板来进行设置，也可以指定字段

rochy - rochy_he

赞同来自: menzhipeng

问题1：
关于分词的大部分 Mapping 是无法修改的，即便能修改也是没有意义的；
因为修改前的数据并不会因为你的修改而生效，你必需重新覆盖写入才能生效，所以依旧推荐新建索引，然后使用 reindex 同步数据。

问题2：
设置 mapping 的时候需要指定 type，对于 ES6.X 而言，默认的 type 为 _doc

问题3：
搜索的时候可以不需要指定 type，对于 ES6.X 而言，一个索引只能有一个 type，指定与不指定都是一样的。

menzhipeng

请教一下怎么设置？我不确定我的ik插件是否可用，目前中文分词还是用的单个汉字分开的样子

menzhipeng

进行单字段设置分词器，报错了

menzhipeng

字段分词已经设置成功，附带着有几个问题： 1. 在有数据的情况下设置分词不成功，这个是es有限制还是说有其他的办法可以实现 2. mapping 中的 type 是否一定要设置，我通过Java实体的
@Document(indexName = "articleindex",type = "articletype") 来设置type ，这个是否是必须的
3. 查询使用TransportClient 来实现，设置查询时的代码准备是下面这样：
builder = this.transportClient.prepareSearch(index) ；
this.transportClient.setTypes(type) ；
现在不确定 setType这个是否是必须要设置的，和问题二有关联

menzhipeng

好，现在我的项目在进行全文检索的测试阶段，如果测试通过进入生产之前，按照你说的应该是在es中重建索引，配置好分词再进行数据的添加工作。
那么如何把旧索引数据移入新索引中，并且不影响新索引结构

rochy - rochy_he

如果两个索引仅仅是 mapping 发生了变化，数据结构相同，那么直接使用 _reindex api 即可
具体你可以参考：https://www.elastic.co/guide/e ... .html

menzhipeng

偶尔启动项目会出现报错，这是什么问题导致的呢

10 个回复

发起人

活动推荐

相关问题

问题状态

ES配置IK分词器失败

与内容相关的链接

10 个回复

发起人

活动推荐

相关问题

问题状态