PanGu4Lucene 是Lucene.net + Pangu 的应用示例

创建索引

IndexWriter writer = new IndexWriter(indexDir, new PanGuAnalyzer(), true);
writer.Optimize();
writer.Close();

插入数据

public static int IndexString(String indexDir, string url, string title, DateTime time, string content)
{
    //IndexWriter writer = new IndexWriter(indexDir, new Lucene.Net.Analysis.KTDictSeg.KTDictSegAnalyzer(), false);

    Document doc = new Document();

    Field field = new Field("url", url, Field.Store.YES, Field.Index.NO);
    doc.Add(field);
    field = new Field("title", title, Field.Store.YES, Field.Index.TOKENIZED);
    doc.Add(field);
    field = new Field("time", time.ToString("yyyyMMdd"), Field.Store.YES, Field.Index.UN_TOKENIZED);
    doc.Add(field);
    field = new Field("contents", content, Field.Store.YES, Field.Index.TOKENIZED);
    doc.Add(field);

    writer.AddDocument(doc);

    int num = writer.DocCount();
    //writer.Optimize();
    //writer.Close();
    return num;
}

 

 

对要搜索的词分词


static public string GetKeyWordsSplitBySpace(string keywords, PanGuTokenizer ktTokenizer)
{
    StringBuilder result = new StringBuilder();

    ICollection<WordInfo> words = ktTokenizer.SegmentToWordInfos(keywords);

    foreach (WordInfo word in words)
    {
        if (word == null)
        {
            continue;
        }

        result.AppendFormat("{0}^{1}.0 ", word.Word, (int)Math.Pow(3, word.Rank));
    }

    return result.ToString().Trim();
}

 

搜索


    public static List<TNews> Search(String indexDir, String q, int pageLen, int pageNo, out int recCount)
    {
        string keywords = q;

        IndexSearcher search = new IndexSearcher(indexDir);
        q = GetKeyWordsSplitBySpace(q, new PanGuTokenizer());
        QueryParser queryParser = new QueryParser("contents", new PanGuAnalyzer(true));
        Query query = queryParser.Parse(q);

        QueryParser titleQueryParser = new QueryParser("title", new PanGuAnalyzer(true));
        Query titleQuery = titleQueryParser.Parse(q);

        BooleanQuery bq = new BooleanQuery();
        bq.Add(query, BooleanClause.Occur.SHOULD);
        bq.Add(titleQuery, BooleanClause.Occur.SHOULD);

        Hits hits = search.Search(bq);

        List<TNews> result = new List<TNews>();

        recCount = hits.Length();
        int i = (pageNo - 1) * pageLen;

        while (i < recCount && result.Count < pageLen)
        {
            TNews news = null;

            try
            {
                news = new TNews();
                news.Title = hits.Doc(i).Get("title");
                news.Content = hits.Doc(i).Get("contents");
                news.Url = hits.Doc(i).Get("url");
                String strTime = hits.Doc(i).Get("time");
                news.Time = DateTime.ParseExact(strTime, "yyyyMMdd", null);

                PanGu.HighLight.SimpleHTMLFormatter simpleHTMLFormatter =
                    new PanGu.HighLight.SimpleHTMLFormatter("<font color=\"red\">", "</font>");

                PanGu.HighLight.Highlighter highlighter =
                    new PanGu.HighLight.Highlighter(simpleHTMLFormatter,
                    new Segment());
                highlighter.FragmentSize = 50;

                news.Abstract = highlighter.GetBestFragment(keywords, news.Content);
                news.TitleHighLighter = highlighter.GetBestFragment(keywords, news.Title);
                if (string.IsNullOrEmpty(news.TitleHighLighter))
                {
                    news.TitleHighLighter = news.Title;
                }

                //// 高亮显示设置
                ////TermQuery tQuery = new TermQuery(new Term("contents", q));

                //SimpleHTMLFormatter simpleHTMLFormatter = new SimpleHTMLFormatter("<font color=\"red\">", "</font>");
                //Highlighter highlighter = new Highlighter(simpleHTMLFormatter, new QueryScorer(query));
                ////关键内容显示大小设置
                //highlighter.SetTextFragmenter(new SimpleFragmenter(50));
                ////取出高亮显示内容
                //Lucene.Net.Analysis.KTDictSeg.KTDictSegAnalyzer analyzer = new Lucene.Net.Analysis.KTDictSeg.KTDictSegAnalyzer();
                //TokenStream tokenStream = analyzer.TokenStream("contents", new StringReader(news.Content));
                //news.Abstract = highlighter.GetBestFragment(tokenStream, news.Content);

            }
            catch (Exception e)
            {
                Console.WriteLine(e.Message);
            }
            finally
            {
                result.Add(news);
                i++;
            }
        }

        search.Close();
        return result;
    }
}

 

效果

image

 

PanGu4Lucene 示例安装说明


1. 下载News.xml 下载地址
http://pangusegment.codeplex.com/Release/ProjectReleases.aspx?ReleaseId=31482
2. 进入目录Bin,并运行PanGu.Lucene.ImportTool.exe 点击创建索引按钮,并导入
news.xml
3. 运行网站

Last edited Dec 29, 2010 at 2:43 AM by eaglet2006, version 1

Comments

No comments yet.