用了Elasticsearch,一口气上5T
快照恢复

快照恢复

Easysearch 跨版本兼容性测试,还原 Elasticsearch 各版本快照数据

Easysearchliaosy 发表了文章 • 0 个评论 • 3380 次浏览 • 2023-06-17 12:50 • 来自相关话题

本文主要测试验证 Elasticsearch 各版本快照在 Easysearch 中进行数据恢复。

准备测试数据

索引

别名

模版

生命周期策略

创建快照

PUT /_snapshot/my_backup
{
  "type": "fs",
  "settings": {
    "location": "/infini/test/es_backup"
  }
}

PUT /_snapshot/my_backup/snapshot_1
{
  "indices": "*",
  "ignore_unavailable": false,
  "include_global_state": false
}

GET /_snapshot/my_backup/snapshot_1
  • ignore_unavailable:如果 indices 列表中的索引不存在,则是否忽略该索引而不是使快照失败。默认值为 false 。
  • include_global_state:是否在快照中包含集群状态(包括索引模版、生命周期配置、持久化配置等)。默认值为 true ,建议设为 false。

恢复快照

POST /_snapshot/my_backup/snapshot_1/_restore
{
  "indices": "*",
  "ignore_unavailable": false,
  "include_global_state": false,
  "include_aliases": true,
  "ignore_index_settings": [
    "index.lifecycle.indexing_complete"
  ]
}
  • ignore_unavailable:如果 indices 列表中的索引不存在,则是否忽略该索引而不是使还原操作失败。默认值为 false 。
  • include_global_state:是否还原群集状态。默认值为 false 。
  • include_aliases:是否恢复别名及其关联索引。默认值为 true 。
  • index.lifecycle.indexing_complete 配置不支持,忽略掉。

数据验证

索引

通过 gateway 进行数据比对

path.data: data
path.logs: log

#show progress bar
#progress_bar.enabled: true

elasticsearch:
  - name: source
    enabled: true
    endpoints:
      - http://192.168.3.185:29200
  - name: target
    enabled: true
    endpoints:
      - https://192.168.3.185:9205
    basic_auth:
      username: admin
      password: admin

pipeline:
  - name: index_diff_service
    auto_start: true
    processor:
      - dag:
          mode: wait_all
          parallel:
            - dump_hash: #dump es1's doc
                sort_document_fields: true
                indices: ".infini_activities-000004"   ##需要比对的索引名
                scroll_time: "10m"
                elasticsearch: "source"
                #              query_string: "_id:c8es70pu46lgfdgmja9g-1646117763293610802-2"
                #              fields: "doc_hash"
                output_queue: "source_docs"
                batch_size: 5000
                slice_size: 1
            #              hash_func: "xxhash64"
            - dump_hash: #dump es2's doc
                indices: ".infini_activities-000004"
                scroll_time: "10m"
                #              fields: "doc_hash"
                #              query_string: "_id:c8es70pu46lgfdgmja9g-1646117763293610802-2"
                batch_size: 5000
                slice_size: 1
                #              hash_func: "xxhash64"
                elasticsearch: "target"
                output_queue: "target_docs"
          end:
            - index_diff:
                diff_queue: "diff_result"
                buffer_size: 10
                text_report: true #如果要存 es,这个开关关闭,开启 pipeline 的 diff_result_ingest 任务
                source_queue: "source_docs"
                target_queue: "target_docs"
#pipeline:
#  - name: diff_result_ingest
#    processor:
#      - json_indexing:
#          index_name: "diff_result"
#          elasticsearch: "source"
#          input_queue: "diff_result"

./gateway-linux-amd64 -config data_check.yml

别名

模版

PUT _template/.infini_activities-rollover
{
  "order": 100000,
  "index_patterns": [
    ".infini_activities*"
  ],
  "settings": {
    "index": {
      "format": "7",
      "lifecycle": {
        "name": "ilm_.infini_metrics-30days-retention",
        "rollover_alias": ".infini_activities"
      },
      "codec": "best_compression",
      "number_of_shards": "1",
      "translog": {
        "durability": "async"
      }
    }
  },
  "mappings": {
    "dynamic_templates": [
      {
        "strings": {
          "mapping": {
            "ignore_above": 256,
            "type": "keyword"
          },
          "match_mapping_type": "string"
        }
      }
    ]
  },
  "aliases": {}
}

PUT _template/.infini
{
  "order": 0,
  "index_patterns": [
    ".infini_*"
  ],
  "settings": {
    "index": {
      "max_result_window": "10000000",
      "mapping": {
        "total_fields": {
          "limit": "20000"
        }
      },
      "analysis": {
        "analyzer": {
          "suggest_text_search": {
            "filter": [
              "word_delimiter"
            ],
            "tokenizer": "classic"
          }
        }
      },
      "number_of_shards": "1"
    }
  },
  "mappings": {
    "dynamic_templates": [
      {
        "strings": {
          "mapping": {
            "ignore_above": 256,
            "type": "keyword"
          },
          "match_mapping_type": "string"
        }
      }
    ]
  },
  "aliases": {}
}

生命周期策略

PUT _ilm/policy/ilm_.infini_metrics-30days-retention
{
  "policy": {
    "phases": {
      "hot": {
        "min_age": "0ms",
        "actions": {
          "rollover": {
            "max_size": "50gb",
            "max_age": "30d"
          },
          "set_priority": {
            "priority": 100
          }
        }
      },
      "delete": {
        "min_age": "30d",
        "actions": {
          "delete": {
          }
        }
      }
    }
  }
}

注:不支持 "delete_searchable_snapshot": true 配置

测试结果

源集群(Elasticsearch) 目标集群(Easysearch) 测试结果
7.10.2 1.0.0 索引文档一致,别名恢复成功
7.10.1 1.0.0 索引文档一致,别名恢复成功
7.10.0 1.0.0 索引文档一致,别名恢复成功
7.9.2 1.0.0 索引文档一致,别名恢复成功
7.9.0 1.0.0 索引文档一致,别名恢复成功
7.8.1 1.0.0 索引文档一致,别名恢复成功
7.5.2 1.0.0 索引文档一致,别名恢复成功
6.8.12 1.0.0 索引文档一致,别名恢复成功
6.5.4 1.0.0 索引文档一致,别名恢复成功

关于 Easysearch

about easysearch

INFINI Easysearch 是一个分布式的近实时搜索与分析引擎,核心引擎基于开源的 Apache Lucene。 Easysearch 衍生自基于开源协议 Apache 2.0 的 Elasticsearch 7.10 版本。 Easysearch 的目标是提供一个轻量级的 Elasticsearch 可替代版本,并继续完善和支持更多的企业级功能。 与 Elasticsearch 相比,Easysearch 更关注在搜索业务场景的优化和继续保持其产品的简洁与易用性。

详情参见:官方文档

Easysearch 跨版本兼容性测试,还原 Elasticsearch 各版本快照数据

Easysearchliaosy 发表了文章 • 0 个评论 • 3380 次浏览 • 2023-06-17 12:50 • 来自相关话题

本文主要测试验证 Elasticsearch 各版本快照在 Easysearch 中进行数据恢复。

准备测试数据

索引

别名

模版

生命周期策略

创建快照

PUT /_snapshot/my_backup
{
  "type": "fs",
  "settings": {
    "location": "/infini/test/es_backup"
  }
}

PUT /_snapshot/my_backup/snapshot_1
{
  "indices": "*",
  "ignore_unavailable": false,
  "include_global_state": false
}

GET /_snapshot/my_backup/snapshot_1
  • ignore_unavailable:如果 indices 列表中的索引不存在,则是否忽略该索引而不是使快照失败。默认值为 false 。
  • include_global_state:是否在快照中包含集群状态(包括索引模版、生命周期配置、持久化配置等)。默认值为 true ,建议设为 false。

恢复快照

POST /_snapshot/my_backup/snapshot_1/_restore
{
  "indices": "*",
  "ignore_unavailable": false,
  "include_global_state": false,
  "include_aliases": true,
  "ignore_index_settings": [
    "index.lifecycle.indexing_complete"
  ]
}
  • ignore_unavailable:如果 indices 列表中的索引不存在,则是否忽略该索引而不是使还原操作失败。默认值为 false 。
  • include_global_state:是否还原群集状态。默认值为 false 。
  • include_aliases:是否恢复别名及其关联索引。默认值为 true 。
  • index.lifecycle.indexing_complete 配置不支持,忽略掉。

数据验证

索引

通过 gateway 进行数据比对

path.data: data
path.logs: log

#show progress bar
#progress_bar.enabled: true

elasticsearch:
  - name: source
    enabled: true
    endpoints:
      - http://192.168.3.185:29200
  - name: target
    enabled: true
    endpoints:
      - https://192.168.3.185:9205
    basic_auth:
      username: admin
      password: admin

pipeline:
  - name: index_diff_service
    auto_start: true
    processor:
      - dag:
          mode: wait_all
          parallel:
            - dump_hash: #dump es1's doc
                sort_document_fields: true
                indices: ".infini_activities-000004"   ##需要比对的索引名
                scroll_time: "10m"
                elasticsearch: "source"
                #              query_string: "_id:c8es70pu46lgfdgmja9g-1646117763293610802-2"
                #              fields: "doc_hash"
                output_queue: "source_docs"
                batch_size: 5000
                slice_size: 1
            #              hash_func: "xxhash64"
            - dump_hash: #dump es2's doc
                indices: ".infini_activities-000004"
                scroll_time: "10m"
                #              fields: "doc_hash"
                #              query_string: "_id:c8es70pu46lgfdgmja9g-1646117763293610802-2"
                batch_size: 5000
                slice_size: 1
                #              hash_func: "xxhash64"
                elasticsearch: "target"
                output_queue: "target_docs"
          end:
            - index_diff:
                diff_queue: "diff_result"
                buffer_size: 10
                text_report: true #如果要存 es,这个开关关闭,开启 pipeline 的 diff_result_ingest 任务
                source_queue: "source_docs"
                target_queue: "target_docs"
#pipeline:
#  - name: diff_result_ingest
#    processor:
#      - json_indexing:
#          index_name: "diff_result"
#          elasticsearch: "source"
#          input_queue: "diff_result"

./gateway-linux-amd64 -config data_check.yml

别名

模版

PUT _template/.infini_activities-rollover
{
  "order": 100000,
  "index_patterns": [
    ".infini_activities*"
  ],
  "settings": {
    "index": {
      "format": "7",
      "lifecycle": {
        "name": "ilm_.infini_metrics-30days-retention",
        "rollover_alias": ".infini_activities"
      },
      "codec": "best_compression",
      "number_of_shards": "1",
      "translog": {
        "durability": "async"
      }
    }
  },
  "mappings": {
    "dynamic_templates": [
      {
        "strings": {
          "mapping": {
            "ignore_above": 256,
            "type": "keyword"
          },
          "match_mapping_type": "string"
        }
      }
    ]
  },
  "aliases": {}
}

PUT _template/.infini
{
  "order": 0,
  "index_patterns": [
    ".infini_*"
  ],
  "settings": {
    "index": {
      "max_result_window": "10000000",
      "mapping": {
        "total_fields": {
          "limit": "20000"
        }
      },
      "analysis": {
        "analyzer": {
          "suggest_text_search": {
            "filter": [
              "word_delimiter"
            ],
            "tokenizer": "classic"
          }
        }
      },
      "number_of_shards": "1"
    }
  },
  "mappings": {
    "dynamic_templates": [
      {
        "strings": {
          "mapping": {
            "ignore_above": 256,
            "type": "keyword"
          },
          "match_mapping_type": "string"
        }
      }
    ]
  },
  "aliases": {}
}

生命周期策略

PUT _ilm/policy/ilm_.infini_metrics-30days-retention
{
  "policy": {
    "phases": {
      "hot": {
        "min_age": "0ms",
        "actions": {
          "rollover": {
            "max_size": "50gb",
            "max_age": "30d"
          },
          "set_priority": {
            "priority": 100
          }
        }
      },
      "delete": {
        "min_age": "30d",
        "actions": {
          "delete": {
          }
        }
      }
    }
  }
}

注:不支持 "delete_searchable_snapshot": true 配置

测试结果

源集群(Elasticsearch) 目标集群(Easysearch) 测试结果
7.10.2 1.0.0 索引文档一致,别名恢复成功
7.10.1 1.0.0 索引文档一致,别名恢复成功
7.10.0 1.0.0 索引文档一致,别名恢复成功
7.9.2 1.0.0 索引文档一致,别名恢复成功
7.9.0 1.0.0 索引文档一致,别名恢复成功
7.8.1 1.0.0 索引文档一致,别名恢复成功
7.5.2 1.0.0 索引文档一致,别名恢复成功
6.8.12 1.0.0 索引文档一致,别名恢复成功
6.5.4 1.0.0 索引文档一致,别名恢复成功

关于 Easysearch

about easysearch

INFINI Easysearch 是一个分布式的近实时搜索与分析引擎,核心引擎基于开源的 Apache Lucene。 Easysearch 衍生自基于开源协议 Apache 2.0 的 Elasticsearch 7.10 版本。 Easysearch 的目标是提供一个轻量级的 Elasticsearch 可替代版本,并继续完善和支持更多的企业级功能。 与 Elasticsearch 相比,Easysearch 更关注在搜索业务场景的优化和继续保持其产品的简洁与易用性。

详情参见:官方文档