Lucene5.5入门第八篇——使用QueryParser实现高级查询

Lucene | 作者 kl | 发布于2016年06月24日 | | 阅读数：11060
前言



为了解决复杂的查询业务，Lucene给我们提供了一个查询语义分析器，一套完整的语法规则，能够满足大部分的查询需求，而不用关心底层是使用什么Query实现类，就好比写sql一样。 Lucene推荐我们使用QueryParser，而不是各种Query的实现类。但是，QueryParser不能满足所有的查询有求，比如多文档域联合查询 。有时候还是需要使用到Query的相关实现类，好了，下面我们就来看看QueryParser能够解析什么语法，解决什么问题，以及多文档域的查询 





直接上代码



每个语法都可以多测试一遍，看看结果，能够加深你的理解，因为这边测试的实在是多，测试结果我就不贴了；       



ps:各个查询语义可以交叉使用的,下面代码有部分也用到了，但是这边因为是写的例子，为了能更好的区分每个语义的作用，所有没有做太多的尝试



/**

 * @author kl by 2016/3/20

 * @boke www.kailing.pub

 */

public class QueryTest {

    //索引目录

    String indexDir="E:\\LuceneIndex";

    //测试数据目录

    String dataDir="E:\\LuceneTestData";

    /**

     * Lucence5.5返回IndexWriter实例

     * @param directory

     * @return

     */

    public IndexWriter getIndexWriter(Directory directory){

        Analyzer analyzer=new StandardAnalyzer();

        IndexWriterConfig writerConfig=new IndexWriterConfig(analyzer);

        IndexWriter writer=null;

        try {

            writer =new IndexWriter(directory,writerConfig);

        }catch (Exception e){

            e.printStackTrace();

        }

        return writer;

    }

    public Directory getDirctory(String indexDir){

        Directory directory=null;

        try {

            directory= FSDirectory.open(Paths.get(indexDir));

        }catch (IOException e){

            e.printStackTrace();

        }

        return directory;

    }

    @Test

    public void TestIndexer()throws Exception{

        File[] files= new File(dataDir).listFiles();

        IndexWriter writer=getIndexWriter(getDirctory(indexDir));

        for(File file:files){

            Document doc=new Document();

            doc.add(new TextField("filePath",file.getCanonicalPath(), Field.Store.YES));

            doc.add(new TextField("context",new  FileReader(file)));

            writer.addDocument(doc);

        }

        System.out.println("总共添加了"+writer.numDocs()+"个文档");

        writer.close();

    }

    @Test

    public void testSearcher()throws  Exception{

        IndexReader reader= DirectoryReader.open(getDirctory(indexDir));

        IndexSearcher searcher=new IndexSearcher(reader);

        QueryParser queryParser=new QueryParser("context",new StandardAnalyzer());

       Query queryw=queryParser.parse("Licensor");//完整匹配分词查询

        /**

         * 通配符 ？，*的使用

         */

         Query queryy=queryParser.parse("Lice?sor");//使用？匹配单个字符查询

         Query queryx=queryParser.parse("L*r");//使用*匹配多个字符查询

        /**

         * 布尔运算AND, OR，NOT,+,-的使用,注意：一定要是大写的AND和OR,NOT

         */

        Query queryo=queryParser.parse("Licensor OR ce*");//使用OR联合多关键字查询,也可用空格代替OR

        Query queryoo=queryParser.parse(" Licensor ce*");//这个和使用OR一样的效果

        Query queryjia=queryParser.parse("+Licensor Wildcard");//+代表必须的条件，搜索文档必须包含Licensor 可能有Wildcard

        Query querya=queryParser.parse("Licensor AND ce* AND Licenso?");//使用AND取多个关键字的并集查询

        Query queryNot=queryParser.parse("'Lincensor Apache' NOT 'Apache Licensor'");//搜索Lincensor Apache而不是Apache Licensor

        Query queryjian=queryParser.parse("'Lincensor Apache' - 'Apache Licensor'");//"-"同NOT的效果一样



        /**

         * 使用正则表达式查询

         */

        Query queryRegular=queryParser.parse("/[Lab]icensor/");//这个匹配Lincensor，aicensor，bicensor分词

        Query queryRegularr=queryParser.parse("/[Lab]icenso[a-z]/");//根据需要可以更灵活的使用

        /**

         * 使用~模糊匹配查询

         * 这个要和*号的用法区分下，*号完整通配多个字符查询，而~不是简单的通配，这个模糊匹配和Lucene的评分有关

         */

        Query queryFuzzy=queryParser.parse("icensor~");//可以查到Licensor关键字，而queryParser.parse("icensor*")查不到

        Query queryFuzzyparam=queryParser.parse("Licens~1");//~后面可加0-2的整数来制定模糊匹配度，默认不加为1

        Query queryFuzzyParam=queryParser.parse("Licens cens ~0");//~还可以模糊匹配差异化N字符数的多个关键字

        /**

         * 范围查询,多用于数字和时间的查询

         */

        Query queryRange =queryParser.parse("{abc TO Licens}");//{}abc与Licenszhi间的文件，不包含

        Query queryRangex =queryParser.parse("[abc TO Licens]");//{}abc与Licenszhi间的文件,包含本身

        /**

         * 关键字加权处理查询

         */

        //默认为1，可加权可降权，可通过加权处理给匹配的结果排序

        Query queryBoosting  =queryParser.parse("Licensor Wildcard^4 ");



        /**

         * Grouping组合查询

         */

        Query queryGrouping  =queryParser.parse("(+Licensor  +Wildcard) AND easier");//可使用（）组合多个条件查询



         //ps: 查询部分字符需要转义处理，如（+ - && || ! ( ) { } [ ] ^ " ~ * ? : \ /）



        /**

         * 使用MultiFieldQueryParser进行多个文档域查询

         */

        Map boost=new HashMap();

        boost.put("filePath",1.5F);//设置文档域的权值

        boost.put("context",2F);

        QueryParser multiField=new MultiFieldQueryParser(new String[]{"filePath","context"},new StandardAnalyzer(),boost);

        Query queryq=multiField.parse("lucenetestdata");



        TopDocs topDocs= searcher.search(queryq,10);

        System.out.println("查询结果共有"+topDocs.totalHits+"条");

        for(ScoreDoc scoreDoc:topDocs.scoreDocs){

            Document document=searcher.doc(scoreDoc.doc);

            System.out.println(document.get("filePath")+"--评分："+scoreDoc.score);

        }

    }



} 

ps:代码中有大量注释，有些不一定理解到位了，深入了解 请参考官方说明：



https://lucene.apache.org/core ... rches
原文地址：http://www.kailing.pub/article/index/arcid/79.html
[尊重社区原创，转载请保留或注明出处]
本文地址：http://elasticsearch.cn/article/91
es lucene
0
0 个评论

要回复文章请先登录或注册
Lucene5.5入门第八篇——使用QueryParser实现高级查询

0 个评论

发起人

活动推荐