能有影响elasticsearch score的方法有很多,官方推荐的是使用内置的painless脚本语言结合function_score来重新定义score。由于本人开发的项目其算法是由java语言开发的,于是决定尝试原生脚本开发。 elasticsearch脚本由plugin-descriptor.properties文件以及运行jar包组成,plugin-descriptor.properties主要用来定义版本信息、对应es的版本信息等属性。
官方的例子
public class ExpertScriptPlugin extends Plugin implements ScriptPlugin {
@Override
public ScriptEngineService getScriptEngineService(Settings settings) {
return new MyExpertScriptEngine();
}
/** An example {@link ScriptEngineService} that uses Lucene segment details to implement pure document frequency scoring. */
// tag::expert_engine
private static class MyExpertScriptEngine implements ScriptEngineService {
@Override
public String getType() {
return "expert_scripts";
}
@Override
public Function<Map<String,Object>,SearchScript> compile(String scriptName, String scriptSource, Map<String, String> params) {
// we use the script "source" as the script identifier
if ("pure_df".equals(scriptSource)) {
return p -> new SearchScript() {
final String field;
final String term;
{
if (p.containsKey("field") == false) {
throw new IllegalArgumentException("Missing parameter [field]");
}
if (p.containsKey("term") == false) {
throw new IllegalArgumentException("Missing parameter [term]");
}
field = p.get("field").toString();
term = p.get("term").toString();
}
@Override
public LeafSearchScript getLeafSearchScript(LeafReaderContext context) throws IOException {
PostingsEnum postings = context.reader().postings(new Term(field, term));
if (postings == null) {
// the field and/or term don't exist in this segment, so always return 0
return () -> 0.0d;
}
return new LeafSearchScript() {
int currentDocid = -1;
@Override
public void setDocument(int docid) {
// advance has undefined behavior calling with a docid <= its current docid
if (postings.docID() < docid) {
try {
postings.advance(docid);
} catch (IOException e) {
throw new UncheckedIOException(e);
}
}
currentDocid = docid;
}
@Override
public double runAsDouble() {
if (postings.docID() != currentDocid) {
// advance moved past the current doc, so this doc has no occurrences of the term
return 0.0d;
}
try {
return postings.freq();
} catch (IOException e) {
throw new UncheckedIOException(e);
}
}
};
}
@Override
public boolean needsScores() {
return false;
}
};
}
throw new IllegalArgumentException("Unknown script name " + scriptSource);
}
@Override
@SuppressWarnings("unchecked")
public SearchScript search(CompiledScript compiledScript, SearchLookup lookup, @Nullable Map<String, Object> params) {
Function<Map<String,Object>,SearchScript> scriptFactory = (Function<Map<String,Object>,SearchScript>) compiledScript.compiled();
return scriptFactory.apply(params);
}
@Override
public ExecutableScript executable(CompiledScript compiledScript, @Nullable Map<String, Object> params) {
throw new UnsupportedOperationException();
}
@Override
public boolean isInlineScriptEnabled() {
return true;
}
@Override
public void close() {}
}
}
代码解读: 本例在elasticsearch源码中,https://github.com/elastic/elasticsearch/tree/master/plugins/examples/script-expert-scoring
MyExpertScriptEngine类是其中最重要的类,用于实现脚本参数定义,编译,以及打分机制的实现。其中compile方法返回我们定义好打分逻辑的java function。search方法用于我们在搜索过程中实施定义好的打分逻辑。 怎奈笔者对于函数式编程知道的不多(后续需要补课),其实评分逻辑也可以在search方法中实现,于是有了下面的一段代码。
public class fieldaddScriptPlugin extends Plugin implements ScriptPlugin {
@Override
public ScriptEngineService getScriptEngineService(Settings settings) {
return new MyExpertScriptEngine();
}
private static class MyExpertScriptEngine implements ScriptEngineService {
@Override
public String getType() {
return "expert_scripts";
}
@Override
public Object compile(String scriptName, String scriptSource, Map<String, String> params) {
if ("example_add".equals(scriptSource)) {
return scriptSource;
}
throw new IllegalArgumentException("Unknown script name " + scriptSource);
}
@Override
@SuppressWarnings("unchecked")
public SearchScript search(CompiledScript compiledScript, SearchLookup lookup, @Nullable Map<String, Object> vars) {
/**
* 校验输入参数,DSL中params 参数列表
*/
final long inc;
final String fieldname;
if (vars == null || vars.containsKey("inc") == false) {
inc = 0;
} else {
inc = ((Number) vars.get("inc")).longValue();
}
if (vars == null || vars.containsKey("fieldname") == false) {
throw new IllegalArgumentException("Missing parameter [fieldname]");
} else {
fieldname = (String) vars.get("fieldname");
}
return new SearchScript() {
@Override
public LeafSearchScript getLeafSearchScript(LeafReaderContext context) throws IOException {
final LeafSearchLookup leafLookup = lookup.getLeafSearchLookup(context);
return new LeafSearchScript() {
@Override
public void setDocument(int doc) {
if (leafLookup != null) {
leafLookup.setDocument(doc);
}
}
@Override
public double runAsDouble() {
long values = 0;
/**
* 获取document中字段内容
*/
for (Object v : (List<?>) leafLookup.doc().get(fieldname)) {
values = ((Number) v).longValue() + values;
}
return values + inc;
}
};
}
@Override
public boolean needsScores() {
return false;
}
};
}
这段代码的逻辑是把给定的字段(字段类型long)的每个元素相加后再加上给定的增量参数最后形成score分值。为了实现上述逻辑需要实现参数获取、根据给定的字段名获取内容列表量的关键件。下面结合代码说说这两个步骤如何实现的。
search方法中Map<String, Object> vars参数对应DSL中"params"参数,用于接受实际给定的运行时参数。SearchLookup lookup参数由系统传入,通过lookup.getLeafSearchLookup(context)获取LeafSearchLookup通过该对象可以获取给定字段的值。
对于elasticsearch 2.x以前的版本可以通过NativeScriptFactory实现原生脚本。
public class MyNativeScriptPlugin extends Plugin implements ScriptPlugin {
private final static Logger LOGGER = LogManager.getLogger(MyFirstPlugin.class);
public MyNativeScriptPlugin() {
super();
LOGGER.warn("This is MyNativeScriptPlugin");
}
@Override
public List<NativeScriptFactory> getNativeScripts() {
return Collections.singletonList(new MyNativeScriptFactory());
}
public static class MyNativeScriptFactory implements NativeScriptFactory {
@Override
public ExecutableScript newScript(@Nullable Map<String, Object> params) {
// return new MyNativeScript();
return new AbstractDoubleSearchScript(){
@Override
public double runAsDouble() {
int b=0;
if(params.get("add")!=null){
b= (int) params.get("add");
}
String s = source().get("last").toString();
double a = s.length()+b;
return a; }
};
}
@Override
public boolean needsScores() {
return false;
}
@Override
public String getName() {
return "my_script";
}
}
}
工程组织 elasticsearch工程使用gradle进行依赖管理和生命周期管理,为此es项目自己也开发了esplugin的gradle插件,但不兼容gradle4.2以上的版本。参考github中的成熟插件,使用maven组织工程。
主要涉及两个文件 pom.xml plugin.xml 工程利用maven-assembly-plugin打包jar。
本例github地址:https://github.com/jiashiwen/elasticsearchpluginsample 欢迎点赞或拍砖
本文地址:http://elasticsearch.cn/article/450