 
    
        
            
                
                
                    
                        
                            初探Lucene
                        
                    
                    
                                学习地址:
 https://segmentfault.com/a/1190000003101607
 http://yijun1171.github.io/2014/12/06/Lucene%E5%AD%A6%E4%B9%A0%E7%AC%94%E8%AE%B0/
 http://www.cnblogs.com/forfuture1978/archive/2009/12/14/1623594.html
 
 添加依赖:
 
      -             <dependency>
 
-                 <groupId>org.apache.lucene</groupId>
 
-                 <artifactId>lucene-core</artifactId>
 
-                 <version>4.3.1</version>
 
-             </dependency>
 
-             <dependency>
 
-                 <groupId>org.apache.lucene</groupId>
 
-                 <artifactId>lucene-queryparser</artifactId>
 
-                 <version>4.3.1</version>
 
-             </dependency>
 
-             <dependency>
 
-                 <groupId>org.apache.lucene</groupId>
 
-                 <artifactId>lucene-queries</artifactId>
 
-                 <version>4.3.1</version>
 
-             </dependency>
 
-             <dependency>
 
-                 <groupId>org.apache.lucene</groupId>
 
-                 <artifactId>lucene-highlighter</artifactId>
 
-                 <version>4.3.1</version>
 
-             </dependency>
 
-             <dependency>
 
-                 <groupId>org.apache.lucene</groupId>
 
-                 <artifactId>lucene-analyzers-smartcn</artifactId>
 
-                 <version>4.3.1</version>
 
-             </dependency>
 
-             <dependency>
 
-                 <groupId>org.apache.lucene</groupId>
 
-                 <artifactId>lucene-analyzers-common</artifactId>
 
-                 <version>4.3.1</version>
 
-             </dependency>   
  索引基本使用
 1.创建索引和搜索
   -    import java.io.File;    
-    import java.io.IOException;    
-        
-    import org.apache.lucene.analysis.Analyzer;    
-    import org.apache.lucene.analysis.standard.StandardAnalyzer;    
-    import org.apache.lucene.document.Document;    
-    import org.apache.lucene.document.Field.Store;    
-    import org.apache.lucene.document.IntField;    
-    import org.apache.lucene.document.StringField;    
-    import org.apache.lucene.document.TextField;    
-    import org.apache.lucene.index.DirectoryReader;    
-    import org.apache.lucene.index.IndexWriter;    
-    import org.apache.lucene.index.IndexWriterConfig;    
-    import org.apache.lucene.index.IndexWriterConfig.OpenMode;    
-    import org.apache.lucene.queryparser.classic.ParseException;    
-    import org.apache.lucene.queryparser.classic.QueryParser;    
-    import org.apache.lucene.search.IndexSearcher;    
-    import org.apache.lucene.search.Query;    
-    import org.apache.lucene.search.TopDocs;    
-    import org.apache.lucene.store.Directory;    
-    import org.apache.lucene.store.FSDirectory;    
-    import org.apache.lucene.util.Version;    
-        
-    public class Index {    
-        public static void main(String[] args) {    
-            Index index = new Index();    
-            index.createIndex();    
-            index.search();    
-        }    
-        
-        public void createIndex() {    
-        
-            // 创建一个分词器(指定Lucene版本)    
-            Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_43);    
-            // IndexWriter配置信息(指定Lucene版本和分词器)    
-            IndexWriterConfig indexWriterConfig = new IndexWriterConfig(Version.LUCENE_43, analyzer);    
-            // 设置索引的打开方式    
-            indexWriterConfig.setOpenMode(OpenMode.CREATE_OR_APPEND);    
-            // 创建Directory对象和IndexWriter对象    
-            Directory directory = null;    
-            IndexWriter indexWriter = null;    
-            try {    
-                directory = FSDirectory.open(new File("Lucene_index/test"));    
-        
-                // 检查Directory对象是否处于锁定状态(如果锁定则进行解锁)    
-                if (IndexWriter.isLocked(directory)) {    
-                    IndexWriter.unlock(directory);    
-                }    
-        
-                indexWriter = new IndexWriter(directory, indexWriterConfig);    
-            } catch (IOException e) {    
-                e.printStackTrace();    
-            }    
-        
-            // 创建测试文档并为其添加域    
-            Document doc1 = new Document();    
-            doc1.add(new StringField("id", "abcde", Store.YES)); // 添加一个id域,域值为abcde    
-            doc1.add(new TextField("content", "使用Lucene实现全文检索", Store.YES)); // 文本域    
-            doc1.add(new IntField("num", 1, Store.YES)); // 添加数值域    
-        
-            // 将文档写入索引    
-            try {    
-                indexWriter.addDocument(doc1);    
-            } catch (IOException e) {    
-                e.printStackTrace();    
-            }    
-        
-            Document doc2 = new Document();    
-            doc2.add(new StringField("id", "yes", Store.YES));    
-            doc2.add(new TextField("content", "Docker容器技术简介", Store.YES));    
-            doc2.add(new IntField("num", 2, Store.YES));    
-            try {    
-                indexWriter.addDocument(doc2);    
-            } catch (IOException e) {    
-                e.printStackTrace();    
-            }    
-        
-            // 将IndexWriter提交    
-            try {    
-                indexWriter.commit();    
-            } catch (IOException e) {    
-                e.printStackTrace();    
-            } finally {    
-                try {    
-                    indexWriter.close();    
-                    directory.close();    
-                } catch (IOException e) {    
-                    e.printStackTrace();    
-                }    
-            }    
-        }    
-        
-        public void search() {    
-            Directory directory = null;    
-            DirectoryReader dReader = null;    
-            try {    
-                directory = FSDirectory.open(new File("Lucene_index/test")); // 索引文件    
-                dReader = DirectoryReader.open(directory); // 读取索引文件    
-                IndexSearcher searcher = new IndexSearcher(dReader); // 创建IndexSearcher对象    
-        
-                Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_43); // 指定分词技术(标准分词-与创建索引时使用的分词技术一致)    
-        
-                // 创建查询字符串(指定搜索域和采用的分词技术)    
-                QueryParser parser = new QueryParser(Version.LUCENE_43, "content", analyzer);    
-                Query query = parser.parse("Docker"); // 创建Query对象(指定搜索词)    
-        
-                // 检索索引(指定前10条)    
-                TopDocs topDocs = searcher.search(query, 10);    
-                if (topDocs != null) {    
-                    System.out.println("符合条件的文档总数为:" + topDocs.totalHits);    
-                    for (int i = 0; i < topDocs.scoreDocs.length; i++) {    
-                        Document doc = searcher.doc(topDocs.scoreDocs[i].doc);    
-                        System.out.println(    
-                                "id = " + doc.get("id") + ",content = " + doc.get("content") + ",num = " + doc.get("num"));    
-                    }    
-                }    
-            } catch (IOException e) {    
-                e.printStackTrace();    
-            } catch (ParseException e) {    
-                e.printStackTrace();    
-            } finally {    
-                try {    
-                    dReader.close();    
-                    directory.close();    
-                } catch (IOException e) {    
-                    e.printStackTrace();    
-                }    
-            }    
-        }    
-    }    
 2.分词器对比
   -    import java.io.IOException;    
-    import java.io.StringReader;    
-        
-    import org.apache.lucene.analysis.Analyzer;    
-    import org.apache.lucene.analysis.TokenStream;    
-    import org.apache.lucene.analysis.cjk.CJKAnalyzer;    
-    import org.apache.lucene.analysis.core.KeywordAnalyzer;    
-    import org.apache.lucene.analysis.core.SimpleAnalyzer;    
-    import org.apache.lucene.analysis.core.StopAnalyzer;    
-    import org.apache.lucene.analysis.core.WhitespaceAnalyzer;    
-    import org.apache.lucene.analysis.standard.StandardAnalyzer;    
-    import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;    
-    import org.apache.lucene.util.Version;    
-    import org.wltea.analyzer.lucene.IKAnalyzer;    
-        
-    public class AnalyzerTest {    
-        public static void main(String[] args) {    
-            AnalyzerTest test=new AnalyzerTest();    
-            test.testAnalyzer();    
-        }    
-        
-        public void testAnalyzer() {    
-        
-            final String str = "今天的生活是因为你三年前的选择,而今天的选择,将决定你三年后的生活。";    
-            Analyzer analyzer = null;    
-        
-            analyzer = new StandardAnalyzer(Version.LUCENE_43); // 标准分词    
-            print(analyzer, str);    
-            analyzer = new IKAnalyzer(); // 第三方中文分词    
-            print(analyzer, str);    
-            analyzer = new WhitespaceAnalyzer(Version.LUCENE_43); // 空格分词    
-            print(analyzer, str);    
-            analyzer = new SimpleAnalyzer(Version.LUCENE_43); // 简单分词    
-            print(analyzer, str);    
-            analyzer = new CJKAnalyzer(Version.LUCENE_43); // 二分法分词    
-            print(analyzer, str);    
-            analyzer = new KeywordAnalyzer(); // 关键字分词    
-            print(analyzer, str);    
-            analyzer = new StopAnalyzer(Version.LUCENE_43); // 被忽略词分词器    
-            print(analyzer, str);    
-        
-        }    
-        
-        /**   
-         * 该方法用于打印分词器及其分词结果   
-         *    
-         * @param analyzer   
-         *            分词器   
-         * @param str   
-         *            需要分词的字符串   
-         */    
-        public void print(Analyzer analyzer, String str) {    
-        
-            StringReader stringReader = new StringReader(str);    
-            try {    
-                TokenStream tokenStream = analyzer.tokenStream("", stringReader); // 分词    
-                tokenStream.reset();    
-        
-                CharTermAttribute term = tokenStream.getAttribute(CharTermAttribute.class); // 获取分词结果的CharTermAttribute    
-                System.out.println("分词技术:" + analyzer.getClass());    
-                while (tokenStream.incrementToken()) {    
-                    System.out.print(term.toString() + "|");    
-                }    
-                System.out.println();    
-            } catch (IOException e) {    
-                e.printStackTrace();    
-            }    
-        }    
-    }    
 结果:
 分词技术:class org.apache.lucene.analysis.standard.StandardAnalyzer
 今|天|的|生|活|是|因|为|你|三|年|前|的|选|择|而|今|天|的|选|择|将|决|定|你|三|年|后|的|生|活|
 分词技术:class org.wltea.analyzer.lucene.IKAnalyzer
 今天|的|生活|是因为|因为|你|三年|三|年前|年|前|的|选择|而今|今天|的|选择|将|决定|你|三年|三|年后|年|后|的|生活|
 分词技术:class org.apache.lucene.analysis.core.WhitespaceAnalyzer
 今天的生活是因为你三年前的选择,而今天的选择,将决定你三年后的生活。|
 分词技术:class org.apache.lucene.analysis.core.SimpleAnalyzer
 今天的生活是因为你三年前的选择|而今天的选择|将决定你三年后的生活|
 分词技术:class org.apache.lucene.analysis.cjk.CJKAnalyzer
 今天|天的|的生|生活|活是|是因|因为|为你|你三|三年|年前|前的|的选|选择|而今|今天|天的|的选|选择|将决|决定|定你|你三|三年|年后|后的|的生|生活|
 分词技术:class org.apache.lucene.analysis.core.KeywordAnalyzer
 今天的生活是因为你三年前的选择,而今天的选择,将决定你三年后的生活。|
 分词技术:class org.apache.lucene.analysis.core.StopAnalyzer
 今天的生活是因为你三年前的选择|而今天的选择|将决定你三年后的生活|
   
 
 
            
                     
                    正文到此结束