diff --git a/TextLocator/Core/AppConst.cs b/TextLocator/Core/AppConst.cs index ec5136be34f85733555fcc0614186dcdf5ece18e..f6bb3c41157d1bbfb604379754a1e1c7766cef63 100644 --- a/TextLocator/Core/AppConst.cs +++ b/TextLocator/Core/AppConst.cs @@ -83,10 +83,11 @@ namespace TextLocator.Core public static readonly string APP_INDEX_DIR = Path.Combine(APP_DIR, "Index"); /// /// 分词器 - /// new Lucene.Net.Analysis.Cn.ChineseAnalyzer(); + /// new Lucene.Net.Analysis.Cn.ChineseAnalyzer(); // 中文分词器 /// new Lucene.Net.Analysis.Standard.StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_30);// 用standardAnalyzer分词器 + /// new Lucene.Net.Analysis.PanGuAnalyzer(); // PanGu分词器 /// - public static readonly Analyzer INDEX_ANALYZER = new JiebaAnalyzer(); //new Lucene.Net.Analysis.PanGuAnalyzer(); + public static readonly Analyzer INDEX_ANALYZER = new JiebaAnalyzer(); // Jieba分词器 /// /// 分割器 /// @@ -95,11 +96,11 @@ namespace TextLocator.Core /// /// 匹配Lucene.NET内置关键词 /// - public static readonly Regex REGEX_BUILT_IN_SYMBOL = new Regex("AND|OR|NOT|\\&\\&|\\|\\||\"|\\~|\\:"); + public static readonly Regex REGEX_BUILT_IN_SYMBOL = new Regex("AND|OR|NOT|\\&\\&|\\|\\||\"|\\~"); // \\: /// /// 匹配支持的通配符 /// - public static readonly Regex REGEX_JUDGMENT = new Regex(@"\.|\\w|\\W|\\s|\\S|\\d|\\D|\\b|\\B|\\f|\\n|\\r|\\t|\\v|\^|\$|\*|\?|\+|\-|\{|\}|\[|\]|\(|\)|\\|\||\!"); + //public static readonly Regex REGEX_JUDGMENT = new Regex(@"\.|\\w|\\W|\\s|\\S|\\d|\\D|\\b|\\B|\\f|\\n|\\r|\\t|\\v|\^|\$|\*|\?|\+|\-|\{|\}|\[|\]|\(|\)|\\|\||\!"); /// /// 匹配空白和换行 /// @@ -121,6 +122,10 @@ namespace TextLocator.Core /// public static readonly Regex REGEX_CONTENT_PAGE = new Regex(@"----\d+----"); + /// + /// 正则搜索前缀 + /// + public const string REGEX_SEARCH_PREFIX = "re:"; /// /// 索引写入器 /// diff --git a/TextLocator/Index/IndexCore.cs b/TextLocator/Index/IndexCore.cs index a14ccb153e41757da02f1140ef82f7b7eb13c3b1..42e70b24f997566dc534c190c22f9484c7ca9814 100644 --- a/TextLocator/Index/IndexCore.cs +++ b/TextLocator/Index/IndexCore.cs @@ -651,26 +651,29 @@ namespace TextLocator.Index { // 1、---- 关键词 string keyword = param.Keywords[i]; - text += keyword + ","; + text += keyword.Replace(AppConst.REGEX_SEARCH_PREFIX, "") + ","; // 2、---- 搜索域 bool hasFileName = param.SearchRegion == SearchRegion.文件名和内容 || param.SearchRegion == SearchRegion.仅文件名; bool hasContent = param.SearchRegion == SearchRegion.文件名和内容 || param.SearchRegion == SearchRegion.仅文件内容; // 3.1、---- 关键词正则 或 标记为正则 - if (AppConst.REGEX_JUDGMENT.IsMatch(keyword)) + //if (AppConst.REGEX_JUDGMENT.IsMatch(keyword)) + if (keyword.StartsWith(AppConst.REGEX_SEARCH_PREFIX)) { keywordType = "正则"; + + string reg = keyword.Replace(AppConst.REGEX_SEARCH_PREFIX, ""); // 文件名搜索 if (hasFileName) { - RegexQuery query = new RegexQuery(new Lucene.Net.Index.Term("FileName", keyword)); + RegexQuery query = new RegexQuery(new Lucene.Net.Index.Term("FileName", reg)); boolQuery.Add(query, Lucene.Net.Search.Occur.SHOULD); } // 文件内容搜索 if (hasContent) { - RegexQuery query = new RegexQuery(new Lucene.Net.Index.Term("Content", keyword)); + RegexQuery query = new RegexQuery(new Lucene.Net.Index.Term("Content", reg)); boolQuery.Add(query, Lucene.Net.Search.Occur.SHOULD); } } @@ -678,7 +681,7 @@ namespace TextLocator.Index else { // 关键词再次分词(用于短语查询),UI选中精确搜索时,文本框输入内容不分词,业务处理中查询需要按照短语分词查询 - string[] phrases = AppConst.INDEX_SEGMENTER.CutForSearch(keyword).ToArray(); + string[] phrases = IndexCore.GetKeywords(keyword).ToArray();// AppConst.INDEX_SEGMENTER.CutForSearch(keyword).ToArray(); // 【内部函数】域组合查询内部函数 void FieldCombineQuery(string fieldName) @@ -827,6 +830,15 @@ namespace TextLocator.Index SearchRegion = param.SearchRegion }; + /*if ("正则".Equals(keywordType)) + { + string keyword = param.Keywords[0]; + + param.Keywords = new List(); + param.Keywords.Add(keyword.Replace(AppConst.REGEX_SEARCH_PREFIX, "")); + fileInfo.Keywords = param.Keywords; + }*/ + // 词频统计(所有关键词匹配次数) // fileInfo.MatchCount = GetMatchCount(fileInfo); @@ -920,9 +932,11 @@ namespace TextLocator.Index { if (string.IsNullOrEmpty(keyword)) continue; // 关键词是正则表达式 - if (AppConst.REGEX_JUDGMENT.IsMatch(keyword)) + if (keyword.StartsWith(AppConst.REGEX_SEARCH_PREFIX)) + //if (AppConst.REGEX_JUDGMENT.IsMatch(keyword)) { - Regex regex = new Regex(keyword, RegexOptions.IgnoreCase); + string reg = keyword.Replace(AppConst.REGEX_SEARCH_PREFIX, ""); + Regex regex = new Regex(reg, RegexOptions.IgnoreCase); Match matches = regex.Match(content); if (matches.Success) { @@ -1142,5 +1156,31 @@ namespace TextLocator.Index return finishCount * 1.00F / totalCount * 1.00F * 100.00F; } #endregion + + /// + /// 文本分词 + /// + /// + /// + public static List GetKeywords(string q) + { + /*// 标准分词器分词 + List keyworkds = new List(); + Lucene.Net.Analysis.Analyzer analyzer = new Lucene.Net.Analysis.Standard.StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_30); + using (Lucene.Net.Analysis.TokenStream tokenStream = analyzer.TokenStream(null, new StringReader(q))) + { + Lucene.Net.Analysis.Tokenattributes.ITermAttribute termAttribute = null; + bool hasNext = tokenStream.IncrementToken(); + while (hasNext) + { + termAttribute = tokenStream.GetAttribute(); + keyworkds.Add(termAttribute.Term); + hasNext = tokenStream.IncrementToken(); + } + } + return keyworkds;*/ + // Jieba分词器分词 + return AppConst.INDEX_SEGMENTER.CutForSearch(q).ToList(); + } } } diff --git a/TextLocator/MainWindow.xaml.cs b/TextLocator/MainWindow.xaml.cs index d30dc540033d603459f05d869832d878bb8e858e..ce8ac3a8f9724f1b8f3b2e21fc55f75c35932ae7 100644 --- a/TextLocator/MainWindow.xaml.cs +++ b/TextLocator/MainWindow.xaml.cs @@ -1435,7 +1435,8 @@ namespace TextLocator keywords.Add(searchText); } // 正则表达式 - else if (AppConst.REGEX_JUDGMENT.IsMatch(searchText)) + //else if (AppConst.REGEX_JUDGMENT.IsMatch(searchText)) + else if (searchText.StartsWith(AppConst.REGEX_SEARCH_PREFIX)) { keywords.Add(searchText); } @@ -1443,7 +1444,7 @@ namespace TextLocator else { // 分词列表 - List segmentList = AppConst.INDEX_SEGMENTER.CutForSearch(searchText).ToList(); + List segmentList = IndexCore.GetKeywords(searchText);//AppConst.INDEX_SEGMENTER.CutForSearch(searchText).ToList(); // 合并关键列表 keywords = keywords.Union(segmentList).ToList(); } diff --git a/TextLocator/Properties/AssemblyInfo.cs b/TextLocator/Properties/AssemblyInfo.cs index 677010cbc06dd52f9c4bd7834f1c7c54183e4e13..59b6bfda471c14d07262d7c35073c0830a82a0e0 100644 --- a/TextLocator/Properties/AssemblyInfo.cs +++ b/TextLocator/Properties/AssemblyInfo.cs @@ -50,9 +50,9 @@ using System.Windows; //通过使用 "*",如下所示: // [assembly: AssemblyVersion("1.0.*")] // 大版本,强制更新最小版本 -[assembly: AssemblyVersion("2.1.28.8")] +[assembly: AssemblyVersion("2.1.30.0")] // 小版本,选择更新版本 -[assembly: AssemblyFileVersion("2.1.28.8")] +[assembly: AssemblyFileVersion("2.1.30.0")] // Version minVersion = System.Reflection.Assembly.GetExecutingAssembly().GetName().Version; // Version version = new Version(FileVersionInfo.GetVersionInfo(System.Windows.Forms.Application.ExecutablePath).ProductVersion); diff --git a/TextLocator/Service/WordFileService.cs b/TextLocator/Service/WordFileService.cs index 3089562e6cef4f29481626a6fa52285592faaffb..0f70219709bb19fc32cea1a37fbea120eb79afbe 100644 --- a/TextLocator/Service/WordFileService.cs +++ b/TextLocator/Service/WordFileService.cs @@ -175,7 +175,7 @@ namespace TextLocator.Service foreach (XmlNode textNode in textNodes) { - builder.Append(textNode.InnerText); + builder.AppendLine(textNode.InnerText); } builder.AppendLine(); } diff --git a/TextLocator/Util/FileContentUtil.cs b/TextLocator/Util/FileContentUtil.cs index f9ff71f92ca143b42e7de8134792945cbdc427c9..990b73743c6a6616e04d49bd9b4212cfdee7f2c7 100644 --- a/TextLocator/Util/FileContentUtil.cs +++ b/TextLocator/Util/FileContentUtil.cs @@ -82,7 +82,12 @@ namespace TextLocator.Util // 拿出Run的Text string text = position.GetTextInRun(LogicalDirection.Forward); // 关键词匹配查找 - Regex regex = new Regex(keyword, RegexOptions.IgnoreCase); + string reg = keyword; + if (keyword.StartsWith(AppConst.REGEX_SEARCH_PREFIX)) + { + reg = keyword.Replace(AppConst.REGEX_SEARCH_PREFIX, ""); + } + Regex regex = new Regex(reg, RegexOptions.IgnoreCase); Match matches = regex.Match(text); if (matches.Success) { @@ -161,8 +166,13 @@ namespace TextLocator.Util // 遍历关键词列表 foreach (string keyword in keywords) { + string reg = keyword; + if (keyword.StartsWith(AppConst.REGEX_SEARCH_PREFIX)) + { + reg = keyword.Replace(AppConst.REGEX_SEARCH_PREFIX, ""); + } // 定义关键词正则 - Regex regex = new Regex(keyword, RegexOptions.IgnoreCase); + Regex regex = new Regex(reg, RegexOptions.IgnoreCase); // 匹配集合 MatchCollection collection = regex.Matches(content); // 遍历命中列表 diff --git a/images/Keywords3.png b/images/Keywords3.png index 0ca42473b086a976f9904d850a4faabe7e1def8d..54c58395772f669753173f46f700f98c3498d612 100644 Binary files a/images/Keywords3.png and b/images/Keywords3.png differ