elasticsearch教程(二）java集成elasticsearch -凯发真人娱乐





    org.apache.tika

    tika-core

    1.12





    org.apache.tika

    tika-parsers

    1.12









    com.bbossgroups.plugins

    bboss-elasticsearch-rest-jdbc

    5.5.7






    com.hankcs

    hanlp

    portable-1.7.1






    org.apache.httpcomponents

    httpclient

    4.5.5

注意：与spring时需要注意版本号，版本太高会造成jar包冲突，tika-parsers 依赖poi.jar包,所以项目中不需要单独添加poi.jar，会造成冲突。

完整的项目elasticsearch-common

pom.xml内容



         xsi:schemalocation="http://maven.apache.org/pom/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">

    4.0.0
    com.hd

    elasticsearch-common

    1.0-snapshot

    war
    elasticsearch-common maven webapp

    http://www.example.com
    

        utf-8

        1.7

        1.7

        5.1.40

        1.0.29

        4.2.3.release

        3.0.1

        2.8.8

        2.5

        2.8.2

        5.3.5.final

        4.3.11.final

        1.3.2

        2.6.11

    
    

        

            junit

            junit

            4.11

            test

        

        

            javax.el

            javax.el-api

            3.0.0

            test

        

        

            org.glassfish

            javax.el

            3.0.0

            test

        

      

        

        

            javax.servlet

            javax.servlet-api

            ${servlet.version}

            provided

        

        

            javax.servlet

            jsp-api

            2.0

            provided

        

        

            javax.servlet

            jstl

            1.2

        

        

        

        

            org.apache.logging.log4j

            log4j-core

            ${log4j2.version}

        

        

            org.apache.logging.log4j

            log4j-jcl

            ${log4j2.version}

        

        

            org.apache.logging.log4j

            log4j-slf4j-impl

            ${log4j2.version}

        

        

        

        

            org.springframework

            spring-core

        

        

            org.springframework

            spring-context

        

        

            org.springframework

            spring-beans

        

        

            org.springframework

            spring-expression

        

        

            org.springframework

            spring-jdbc

        

        

            org.springframework

            spring-orm

            ${spring.version}

        

        

            org.springframework

            spring-tx

        

        

            org.springframework

            spring-aop

        

        

            org.springframework

            spring-web

        

        

            org.springframework

            spring-webmvc

        

        

            org.springframework

            spring-test

        

        

            org.springframework

            spring-aspects

        

        

            org.springframework

            spring-context-support

        
        

        

            commons-io

            commons-io

            ${commons-io.version}

        

        

            commons-fileupload

            commons-fileupload

            1.3.1

        
        

            org.hibernate

            hibernate-core

            ${hibernate.version}

        

        

        

            mysql

            mysql-connector-java

            ${mysql.version}

        

        

            com.alibaba

            druid

            ${druid.version}

        
        

        

            com.fasterxml.jackson.core

            jackson-databind

            ${jackson.version}

        

        

        

            com.alibaba

            fastjson

            1.2.54

        

        

        

            org.apache.httpcomponents

            httpclient

            4.5.5

        
        

        

            org.apache.tika

            tika-core

            1.12

        

        

            org.apache.tika

            tika-parsers

            1.12

        

        
        

        

            com.bbossgroups.plugins

            bboss-elasticsearch-rest-jdbc

            5.5.7

        
        

        

            com.hankcs

            hanlp

            portable-1.7.1

        
        

        

            org.apache.shiro

            shiro-spring

            ${shiro.version}

            

                

                    slf4j-api

                    org.slf4j

                

            

        
        

        

            org.hibernate

            hibernate-validator

            ${hibernate-validator.version}

        
        

            net.sf.ehcache

            ehcache-core

            ${ehcache.version}

        

        

            com.googlecode.ehcache-spring-annotations

            ehcache-spring-annotations

            1.2.0

        
    
    

        elasticsearch-common

        

            

                org.apache.maven.plugins

                maven-compiler-plugin

                3.5.1

                

                    ${maven.compiler.source}

                    ${maven.compiler.target}

                    ${project.build.sourceencoding}

                

            

            

            

                org.apache.maven.plugins

                maven-surefire-plugin

                2.4.2

                

                    true

                

            

            

            

                org.eclipse.jetty

                jetty-maven-plugin

                

                9.3.10.v20160621

                

                    9967

                    stop

                    0

                    

                        8878

                    

                    

                        /

                    

                

            

            

            

                org.apache.tomcat.maven

                tomcat7-maven-plugin

                2.2

                

                    8878

                    /

                    utf-8

                    tomcat7

                

                

                

                

                

                

                

                

                

                

            
            

        

    
    

    

        

            aliyun

            aliyun

            http://maven.aliyun.com/nexus/content/groups/public

        

    
    

    

        

            

                org.springframework

                spring-framework-bom

                ${spring.version}

                pom

                import

elasticsearch.properties文件内容

#elasticuser=elastic

#elasticpassword=hzhh123
elasticsearch.rest.hostnames=127.0.0.1:9200

#elasticsearch.rest.hostnames=192.168.200.82:9200,192.168.200.83:9200,192.168.200.85:9200

elasticsearch.dateformat=yyyy.mm.dd

elasticsearch.timezone=asia/shanghai

elasticsearch.ttl=2d

#在控制台输出脚本调试开关showtemplate,false关闭，true打开，同时log4j至少是info级别

elasticsearch.showtemplate=true

#elasticsearch.discoverhost=true
http.timeoutconnection = 400000

http.timeoutsocket = 400000

http.connectionrequesttimeout=400000

http.retrytime = 1

http.maxlinelength = -1

http.maxheadercount = 200

http.maxtotal = 400

http.defaultmaxperroute = 200

elasticsearch.xml





    

    

        
            

                 

            
            

                

            
            

                

            

            
            

                 

            
            

                 

            

            

                 

            
            

                 

            
            

                 

            
            

                 

            

            

                 

            
        

    

    

    
              class="org.frameworkset.elasticsearch.elasticsearch"

              init-method="configure"

              destroy-method="stop"

              f:elasticsearchpropes="attr:elasticsearchpropes"/>

httpclient.xml



    

    
              f:timeoutconnection = "${http.timeoutconnection}"

              f:timeoutsocket = "${http.timeoutsocket}"

              f:connectionrequesttimeout="${http.connectionrequesttimeout}"

              f:retrytime = "${http.retrytime}"

              f:maxlinelength = "${http.maxlinelength}"

              f:maxheadercount = "${http.maxheadercount}"

              f:maxtotal = "${http.maxtotal}"

              f:defaultmaxperroute = "${http.defaultmaxperroute}"

              class="org.frameworkset.spi.remote.http.clientconfiguration">

search.xml



    

    

        
        "settings": {

            "number_of_shards": 6,

            "index.refresh_interval": "5s"

        },

        "mappings": {

            "document": {

                "properties": {

                    "title": {

                        "type": "text",

                        "analyzer": "ik_max_word"

                    },

                    "contentbody": {

                        "type": "text",

                        "analyzer": "ik_max_word"

                    },

                    "fileid": {

                        "type": "text"

                    },

                    "description": {

                        "type": "text",

                        "analyzer": "ik_max_word"

                    },

                    "tags": {

                        "type": "text"

                    },

                    "typeid": {

                        "type": "text"

                    },

                    "classicid": {

                        "type": "text"

                    },

                    "url": {

                        "type": "text"

                    },

                    "agentstarttime": {

                        "type": "date"

                        ## ,"format":"yyyy-mm-dd hh:mm:ss.sss||yyyy-mm-dd't'hh:mm:ss.sss||yyyy-mm-dd hh:mm:ss||epoch_millis"

                    },

                    "name": {

                        "type": "keyword"

                    }

            }

        }

    }

    }]]>

    
    

    

        
        "query": {

            "bool": {

                "filter": [

                    {  ## 多值检索，查找多个应用名称对应的文档记录

                    "terms": {

                        "applicationname.keyword": [#[applicationname1],#[applicationname2]]

            }

        },

    {   ## 时间范围检索，返回对应时间范围内的记录，接受long型的值

    "range": {

    "agentstarttime": {

    "gte": #[starttime],##统计开始时间

    "lt": #[endtime]  ##统计截止时间

    }

    }

    }

    ]

    }

    },

    ## 最多返回1000条记录

    "size":1000

    }]]>

    
    

    

        
       "query": {

            "bool": {

                "filter": [

                    {

                    "term": {

                        "classicid": #[classicid]

                   }

                }],

                "must": [

                 {

                   "multi_match": {

                        "query": #[keywords],

                        "fields": ["contentbody","title","description"]

                    }

                 }

                ]

           }

          },

        ## 分页起点

        "from":#[from] ,

        ## 最多返回size条记录

        "size":#[size],

        "highlight": {

            "pre_tags": [

            ""

            ],

            "post_tags": [

            ""

            ],

            "fields": {

            "*": {}

            },

            "fragment_size": 2147483647

        }

    }]]>

    

    

        
       "query": {

            "bool": {

                "filter": [

                    {

                    "term": {

                        "classicid": #[classicid]

                   }

                }]

           }

          },

        ## 分页起点

        "from":#[from] ,

        ## 最多返回size条记录

        "size":#[size],

        "highlight": {

            "pre_tags": [

            ""

            ],

            "post_tags": [

            ""

            ],

            "fields": {

            "*": {}

            },

            "fragment_size": 2147483647

        }

    }]]>

    
    

        
       "query": {

            "bool": {

                "filter": [

                    {

                    "term": {

                        "typeid": #[typeid]

                   }

                }],

                "must": [

                 {

                   "multi_match": {

                        "query": #[keywords],

                        "fields": ["contentbody","title","description"]

                    }

                 }

                ]

           }

          },

        ## 分页起点

        "from":#[from] ,

        ## 最多返回size条记录

        "size":#[size],

        "highlight": {

            "pre_tags": [

            ""

            ],

            "post_tags": [

            ""

            ],

            "fields": {

            "*": {}

            },

            "fragment_size": 2147483647

        }

    }]]>

    

    

        
       "query": {

            "bool": {

                "filter": [

                    {

                    "term": {

                        "typeid": #[typeid]

                   }

                }]

           }

          },

        ## 分页起点

        "from":#[from] ,

        ## 最多返回size条记录

        "size":#[size],

        "highlight": {

            "pre_tags": [

            ""

            ],

            "post_tags": [

            ""

            ],

            "fields": {

            "*": {}

            },

            "fragment_size": 2147483647

        }

    }]]>

    
    

    

        
        "query": {

            "bool": {

                "filter": [

                    {  ## 多值检索，查找多个应用名称对应的文档记录

                    "terms": {

                        "applicationname.keyword":[

                            #if($applicationnames && $applicationnames.size() > 0)

                        #foreach($applicationname in $applicationnames)

                        #if($velocitycount > 0),#end "$applicationname"

                        #end

                        #end

                    ]

                    }

                },

                    {   ## 时间范围检索，返回对应时间范围内的记录，接受long型的值

                    "range": {

                        "agentstarttime": {

                            "gte": #[starttime],##统计开始时间

                    "lt": #[endtime]  ##统计截止时间

                    }

                }

                }

                ]

            }

        },

        ## 最多返回1000条记录

        "size":1000

    }]]>

hanlp.properties

#本配置文件中的路径的根目录，根目录 其他路径=完整路径（支持相对路径，请参考：https://github.com/hankcs/hanlp/pull/254）

#windows用户请注意，路径分隔符统一使用/

root=h:/doc/java/hzhh123

#root=/home/data/software/devsoft/java/hanlp
#好了，以上为唯一需要修改的部分，以下配置项按需反注释编辑。
#核心词典路径

coredictionarypath=data/dictionary/corenaturedictionary.txt

#2元语法词典路径

bigramdictionarypath=data/dictionary/corenaturedictionary.ngram.txt

#自定义词典路径，用;隔开多个自定义词典，空格开头表示在同一个目录，使用“文件名 词性”形式则表示这个词典的词性默认是该词性。优先级递减。

#所有词典统一使用utf-8编码，每一行代表一个单词，格式遵从[单词] [词性a] [a的频次] [词性b] [b的频次] ... 如果不填词性则表示采用词典的默认词性。

customdictionarypath=data/dictionary/custom/customdictionary.txt; 现代汉语补充词库.txt; 全国地名大全.txt ns; 人名词典.txt; 机构名词典.txt; 上海地名.txt ns;data/dictionary/person/nrf.txt nrf;

#停用词词典路径

corestopworddictionarypath=data/dictionary/stopwords.txt

#同义词词典路径

coresynonymdictionarydictionarypath=data/dictionary/synonym/coresynonym.txt

#人名词典路径

persondictionarypath=data/dictionary/person/nr.txt

#人名词典转移矩阵路径

persondictionarytrpath=data/dictionary/person/nr.tr.txt

#繁简词典根目录

tcdictionaryroot=data/dictionary/tc

#hmm分词模型

hmmsegmentmodelpath=data/model/segment/hmmsegmentmodel.bin

#分词结果是否展示词性

showtermnature=true

#io适配器，实现com.hankcs.hanlp.corpus.io.iioadapter接口以在不同的平台（hadoop、redis等）上运行hanlp

#默认的io适配器如下，该适配器是基于普通文件系统的。

#ioadapter=com.hankcs.hanlp.corpus.io.fileioadapter

#感知机词法分析器

perceptroncwsmodelpath=data/model/perceptron/pku199801/cws.bin

perceptronposmodelpath=data/model/perceptron/pku199801/pos.bin

perceptronnermodelpath=data/model/perceptron/pku199801/ner.bin

#crf词法分析器

crfcwsmodelpath=data/model/crf/pku199801/cws.txt

crfposmodelpath=data/model/crf/pku199801/pos.txt

crfnermodelpath=data/model/crf/pku199801/ner.txt

#更多配置项请参考 https://github.com/hankcs/hanlp/blob/master/src/main/java/com/hankcs/hanlp/hanlp.java#l59 自行添加

注意：参考https://github.com/hankcs/hanlp，下载data.zip文件，解压到h:/doc/java/hzhh123下

hanlp.java

package com.hd.util;
import com.hankcs.hanlp.hanlp;

import com.hankcs.hanlp.corpus.document.sentence.sentence;

import com.hankcs.hanlp.corpus.document.sentence.word.iword;

import com.hankcs.hanlp.model.crf.crflexicalanalyzer;
import java.io.ioexception;

import java.util.arraylist;

import java.util.list;
/**

 * hzhh123

 * 2019/3/25 14:05

 *

 * @desciption 自然语言处理 中文分词 词性标注 命名实体识别 依存句法分析

 * 新词发现 关键词短语提取 自动摘要 文本分类聚类 拼音简繁

 * @link https://github.com/hankcs/hanlp

 */

public class hanlputil {
    /**

     * @param content

     * @return

     * @description 提取摘要

     */

    public static list summary(string content) {

        list summary = hanlp.extractsummary(content, 3);

        return summary;

    }
    /**

     * @param content

     * @return

     * @desciption 提取短语

     */

    public static list phrase(string content) {

        return hanlp.extractphrase(content, 5);

    }
    /**

     * @param document

     * @return

     * @throws ioexception

     * @desciption 找出相关词性聚合成一个list

     */

    public static list findwordsandcollectbylabel(list document) throws ioexception {

        /* 对词性进行分析，找出合适的词性 */

        crflexicalanalyzer analyzer = new crflexicalanalyzer();

        sentence analyzewords = analyzer.analyze(string.valueof(document));
        list wordsbylabell = analyzewords.findwordsbylabel("n");

        list wordsbylabel2 = analyzewords.findwordsbylabel("ns");

        list wordsbylabel3 = analyzewords.findwordsbylabel("t");

        list wordsbylabel4 = analyzewords.findwordsbylabel("j");

        list wordsbylabel5 = analyzewords.findwordsbylabel("vn");

        list wordsbylabel6 = analyzewords.findwordsbylabel("nr");

        list wordsbylabel7 = analyzewords.findwordsbylabel("nt");

        list wordsbylabel8 = analyzewords.findwordsbylabel("nz");
        wordsbylabell.addall(wordsbylabel2);

        wordsbylabell.addall(wordsbylabel3);

        wordsbylabell.addall(wordsbylabel4);

        wordsbylabell.addall(wordsbylabel5);

        wordsbylabell.addall(wordsbylabel6);

        wordsbylabell.addall(wordsbylabel7);

        wordsbylabell.addall(wordsbylabel8);
        list words = new arraylist<>();
        for (iword word : wordsbylabell) {

            words.add(word.getvalue());

        }
        return words;

    }
    public static void main(string[] args) {

        string document = "算法可大致分为基本算法、数据结构的算法、数论算法、计算几何的算法、图的算法、动态规划以及数值分析、加密算法、排序算法、检索算法、随机化算法、并行算法、厄米变形模型、随机森林算法。\n"  

                "算法可以宽泛的分为三类，\n"  

                "一，有限的确定性算法，这类算法在有限的一段时间内终止。他们可能要花很长时间来执行指定的任务，但仍将在一定的时间内终止。这类算法得出的结果常取决于输入值。\n"  

                "二，有限的非确定算法，这类算法在有限的时间内终止。然而，对于一个（或一些）给定的数值，算法的结果并不是唯一的或确定的。\n"  

                "三，无限的算法，是那些由于没有定义终止定义条件，或定义的条件无法由输入的数据满足而不终止运行的算法。通常，无限算法的产生是由于未能确定的定义终止条件。";

        list sentencelist = phrase(document);

        //  list sentencelist = summary(document);

        system.out.println(sentencelist);
    }

}

responseentity.java

package com.hd.util;
import java.util.list;
/**

 * hzhh123

 * 2019/3/22 11:51

 * @descript elasticsearch分页查询查询返回结果内容

 */

public class elasticsearchresponseentity {

    private int from=0;

    private int size=10;

    private long total;

    private list records;
    public elasticsearchresponseentity(int from, int size) {

        this.from = from;

        this.size = size;

    }
    public int getfrom() {

        return from;

    }
    public void setfrom(int from) {

        this.from = from;

    }
    public int getsize() {

        return size;

    }
    public void setsize(int size) {

        this.size = size;

    }
    public long gettotal() {

        return total;

    }
    public void settotal(long total) {

        this.total = total;

    }
    public list getrecords() {

        return records;

    }
    public void setrecords(list records) {

        this.records = records;

    }

}

elasticsearchclentutil.java

package com.hd.util;
import org.frameworkset.elasticsearch.elasticsearchexception;

import org.frameworkset.elasticsearch.elasticsearchhelper;

import org.frameworkset.elasticsearch.client.clientinterface;

import org.frameworkset.elasticsearch.entity.esbasedata;

import org.frameworkset.elasticsearch.entity.esdatas;
import java.util.hashmap;

import java.util.iterator;

import java.util.list;

import java.util.map;
/**

 * hzhh123

 * 

 * es 增删改查实现

 * @link  https://gitee.com/bboss/bboss-elastic

 * 


 */

public class elasticsearchclentutil {

    private string mappath;
    public elasticsearchclentutil(string mappath) {

        this.mappath = mappath;

    }
    /**

     * @param indexname    索引名称

     * @param indexmapping 表结构名称

     * @return

     * @description 创建索引库

     */

    public string createindex(string indexname, string indexmapping) throws exception {

        //加载配置文件，单实例多线程安全的

        clientinterface clientutil = elasticsearchhelper.getconfigrestclientutil(mappath);

        //判断索引表是否存在

        boolean exist = clientutil.existindice(indexname);

        if (exist) {

            //创建一个mapping之前先删除

            clientutil.dropindice(indexname);

        }

        //创建mapping

        return clientutil.createindicemapping(indexname, indexmapping);

    }
    /**

     * @desciption 删除索引

     * @param indexname

     * @return

     */

    public string dropindex(string indexname){

        //加载配置文件，单实例多线程安全的

        clientinterface clientutil = elasticsearchhelper.getconfigrestclientutil(mappath);

        return clientutil.dropindice(indexname);

    }
    /**

     * @param indexname 索引库名称

     * @param indextype 索引类型

     * @param id        索引id

     * @return

     * @description 删除文档索引

     */

    public string deletedocment(string indexname, string indextype, string id) throws elasticsearchexception {

        //加载配置文件，单实例多线程安全的

        clientinterface clientutil = elasticsearchhelper.getconfigrestclientutil(mappath);

        return clientutil.deletedocument(indexname, indextype, id);

    }
    /**

     * @param indexname 索引库名称

     * @param indextype 索引类型

     * @param bean

     * @return

     * @description 添加文档

     */

    public string adddocument(string indexname, string indextype,t bean){

        //创建创建/修改/获取/删除文档的客户端对象，单实例多线程安全

        clientinterface clientutil = elasticsearchhelper.getconfigrestclientutil(mappath);

        return clientutil.adddocument(indexname,indextype,bean);

    }
    /**

     *

     * @param path _search为检索操作action

     * @param templatename esmapper/search.xml中定义的dsl语句

     * @param queryfiled 查询参数

     * @param keywords 查询参数值

     * @param from 分页查询的起始记录,默认为0

     * @param size 分页大小，默认为10

     * @return

     */

    public elasticsearchresponseentity searchdocumentbykeywords(string path, string templatename, string queryfiled, string keywords,

                                                                   string from, string size, class  beanclass) {

        //加载配置文件，单实例多线程安全的

        clientinterface clientutil = elasticsearchhelper.getconfigrestclientutil(mappath);

        map params = new hashmap();

        params.put(queryfiled, keywords);

        //设置分页参数

        params.put("from",from);

        params.put("size",size);

        elasticsearchresponseentity responseentity = new elasticsearchresponseentity(integer.parseint(from),integer.parseint(size));

        //执行查询，search为索引表，_search为检索操作action

        esdatas esdatas =  //esdatas包含当前检索的记录集合，最多1000条记录，由dsl中的size属性指定

                clientutil.searchlist(path,//search为索引表，_search为检索操作action

                        templatename,//esmapper/search.xml中定义的dsl语句

                        params,//变量参数

                        beanclass);//返回的文档封装对象类型
        //获取结果对象列表，最多返回1000条记录

        list documentlist = esdatas.getdatas();

        system.out.println(documentlist==null);

        //获取总记录数

        long totalsize = esdatas.gettotalsize();

        responseentity.settotal(totalsize);

        for(int i = 0; documentlist != null && i < documentlist.size(); i   ) {//遍历检索结果列表

            t doc = documentlist.get(i);

            //记录中匹配上检索条件的所有字段的高亮内容

            map> highlights = doc.gethighlight();

            iterator>> entries = highlights.entryset().iterator();

            while (entries.hasnext()) {

                map.entry> entry = entries.next();

                string fieldname = entry.getkey();

                system.out.print(fieldname   ":");

                list