diff --git a/TextLocator/Core/AppConst.cs b/TextLocator/Core/AppConst.cs
index ec5136be34f85733555fcc0614186dcdf5ece18e..f6bb3c41157d1bbfb604379754a1e1c7766cef63 100644
--- a/TextLocator/Core/AppConst.cs
+++ b/TextLocator/Core/AppConst.cs
@@ -83,10 +83,11 @@ namespace TextLocator.Core
public static readonly string APP_INDEX_DIR = Path.Combine(APP_DIR, "Index");
///
/// 分词器
- /// new Lucene.Net.Analysis.Cn.ChineseAnalyzer();
+ /// new Lucene.Net.Analysis.Cn.ChineseAnalyzer(); // 中文分词器
/// new Lucene.Net.Analysis.Standard.StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_30);// 用standardAnalyzer分词器
+ /// new Lucene.Net.Analysis.PanGuAnalyzer(); // PanGu分词器
///
- public static readonly Analyzer INDEX_ANALYZER = new JiebaAnalyzer(); //new Lucene.Net.Analysis.PanGuAnalyzer();
+ public static readonly Analyzer INDEX_ANALYZER = new JiebaAnalyzer(); // Jieba分词器
///
/// 分割器
///
@@ -95,11 +96,11 @@ namespace TextLocator.Core
///
/// 匹配Lucene.NET内置关键词
///
- public static readonly Regex REGEX_BUILT_IN_SYMBOL = new Regex("AND|OR|NOT|\\&\\&|\\|\\||\"|\\~|\\:");
+ public static readonly Regex REGEX_BUILT_IN_SYMBOL = new Regex("AND|OR|NOT|\\&\\&|\\|\\||\"|\\~"); // \\:
///
/// 匹配支持的通配符
///
- public static readonly Regex REGEX_JUDGMENT = new Regex(@"\.|\\w|\\W|\\s|\\S|\\d|\\D|\\b|\\B|\\f|\\n|\\r|\\t|\\v|\^|\$|\*|\?|\+|\-|\{|\}|\[|\]|\(|\)|\\|\||\!");
+ //public static readonly Regex REGEX_JUDGMENT = new Regex(@"\.|\\w|\\W|\\s|\\S|\\d|\\D|\\b|\\B|\\f|\\n|\\r|\\t|\\v|\^|\$|\*|\?|\+|\-|\{|\}|\[|\]|\(|\)|\\|\||\!");
///
/// 匹配空白和换行
///
@@ -121,6 +122,10 @@ namespace TextLocator.Core
///
public static readonly Regex REGEX_CONTENT_PAGE = new Regex(@"----\d+----");
+ ///
+ /// 正则搜索前缀
+ ///
+ public const string REGEX_SEARCH_PREFIX = "re:";
///
/// 索引写入器
///
diff --git a/TextLocator/Index/IndexCore.cs b/TextLocator/Index/IndexCore.cs
index a14ccb153e41757da02f1140ef82f7b7eb13c3b1..42e70b24f997566dc534c190c22f9484c7ca9814 100644
--- a/TextLocator/Index/IndexCore.cs
+++ b/TextLocator/Index/IndexCore.cs
@@ -651,26 +651,29 @@ namespace TextLocator.Index
{
// 1、---- 关键词
string keyword = param.Keywords[i];
- text += keyword + ",";
+ text += keyword.Replace(AppConst.REGEX_SEARCH_PREFIX, "") + ",";
// 2、---- 搜索域
bool hasFileName = param.SearchRegion == SearchRegion.文件名和内容 || param.SearchRegion == SearchRegion.仅文件名;
bool hasContent = param.SearchRegion == SearchRegion.文件名和内容 || param.SearchRegion == SearchRegion.仅文件内容;
// 3.1、---- 关键词正则 或 标记为正则
- if (AppConst.REGEX_JUDGMENT.IsMatch(keyword))
+ //if (AppConst.REGEX_JUDGMENT.IsMatch(keyword))
+ if (keyword.StartsWith(AppConst.REGEX_SEARCH_PREFIX))
{
keywordType = "正则";
+
+ string reg = keyword.Replace(AppConst.REGEX_SEARCH_PREFIX, "");
// 文件名搜索
if (hasFileName)
{
- RegexQuery query = new RegexQuery(new Lucene.Net.Index.Term("FileName", keyword));
+ RegexQuery query = new RegexQuery(new Lucene.Net.Index.Term("FileName", reg));
boolQuery.Add(query, Lucene.Net.Search.Occur.SHOULD);
}
// 文件内容搜索
if (hasContent)
{
- RegexQuery query = new RegexQuery(new Lucene.Net.Index.Term("Content", keyword));
+ RegexQuery query = new RegexQuery(new Lucene.Net.Index.Term("Content", reg));
boolQuery.Add(query, Lucene.Net.Search.Occur.SHOULD);
}
}
@@ -678,7 +681,7 @@ namespace TextLocator.Index
else
{
// 关键词再次分词(用于短语查询),UI选中精确搜索时,文本框输入内容不分词,业务处理中查询需要按照短语分词查询
- string[] phrases = AppConst.INDEX_SEGMENTER.CutForSearch(keyword).ToArray();
+ string[] phrases = IndexCore.GetKeywords(keyword).ToArray();// AppConst.INDEX_SEGMENTER.CutForSearch(keyword).ToArray();
// 【内部函数】域组合查询内部函数
void FieldCombineQuery(string fieldName)
@@ -827,6 +830,15 @@ namespace TextLocator.Index
SearchRegion = param.SearchRegion
};
+ /*if ("正则".Equals(keywordType))
+ {
+ string keyword = param.Keywords[0];
+
+ param.Keywords = new List();
+ param.Keywords.Add(keyword.Replace(AppConst.REGEX_SEARCH_PREFIX, ""));
+ fileInfo.Keywords = param.Keywords;
+ }*/
+
// 词频统计(所有关键词匹配次数)
// fileInfo.MatchCount = GetMatchCount(fileInfo);
@@ -920,9 +932,11 @@ namespace TextLocator.Index
{
if (string.IsNullOrEmpty(keyword)) continue;
// 关键词是正则表达式
- if (AppConst.REGEX_JUDGMENT.IsMatch(keyword))
+ if (keyword.StartsWith(AppConst.REGEX_SEARCH_PREFIX))
+ //if (AppConst.REGEX_JUDGMENT.IsMatch(keyword))
{
- Regex regex = new Regex(keyword, RegexOptions.IgnoreCase);
+ string reg = keyword.Replace(AppConst.REGEX_SEARCH_PREFIX, "");
+ Regex regex = new Regex(reg, RegexOptions.IgnoreCase);
Match matches = regex.Match(content);
if (matches.Success)
{
@@ -1142,5 +1156,31 @@ namespace TextLocator.Index
return finishCount * 1.00F / totalCount * 1.00F * 100.00F;
}
#endregion
+
+ ///
+ /// 文本分词
+ ///
+ ///
+ ///
+ public static List GetKeywords(string q)
+ {
+ /*// 标准分词器分词
+ List keyworkds = new List();
+ Lucene.Net.Analysis.Analyzer analyzer = new Lucene.Net.Analysis.Standard.StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_30);
+ using (Lucene.Net.Analysis.TokenStream tokenStream = analyzer.TokenStream(null, new StringReader(q)))
+ {
+ Lucene.Net.Analysis.Tokenattributes.ITermAttribute termAttribute = null;
+ bool hasNext = tokenStream.IncrementToken();
+ while (hasNext)
+ {
+ termAttribute = tokenStream.GetAttribute();
+ keyworkds.Add(termAttribute.Term);
+ hasNext = tokenStream.IncrementToken();
+ }
+ }
+ return keyworkds;*/
+ // Jieba分词器分词
+ return AppConst.INDEX_SEGMENTER.CutForSearch(q).ToList();
+ }
}
}
diff --git a/TextLocator/MainWindow.xaml.cs b/TextLocator/MainWindow.xaml.cs
index d30dc540033d603459f05d869832d878bb8e858e..ce8ac3a8f9724f1b8f3b2e21fc55f75c35932ae7 100644
--- a/TextLocator/MainWindow.xaml.cs
+++ b/TextLocator/MainWindow.xaml.cs
@@ -1435,7 +1435,8 @@ namespace TextLocator
keywords.Add(searchText);
}
// 正则表达式
- else if (AppConst.REGEX_JUDGMENT.IsMatch(searchText))
+ //else if (AppConst.REGEX_JUDGMENT.IsMatch(searchText))
+ else if (searchText.StartsWith(AppConst.REGEX_SEARCH_PREFIX))
{
keywords.Add(searchText);
}
@@ -1443,7 +1444,7 @@ namespace TextLocator
else
{
// 分词列表
- List segmentList = AppConst.INDEX_SEGMENTER.CutForSearch(searchText).ToList();
+ List segmentList = IndexCore.GetKeywords(searchText);//AppConst.INDEX_SEGMENTER.CutForSearch(searchText).ToList();
// 合并关键列表
keywords = keywords.Union(segmentList).ToList();
}
diff --git a/TextLocator/Properties/AssemblyInfo.cs b/TextLocator/Properties/AssemblyInfo.cs
index 677010cbc06dd52f9c4bd7834f1c7c54183e4e13..59b6bfda471c14d07262d7c35073c0830a82a0e0 100644
--- a/TextLocator/Properties/AssemblyInfo.cs
+++ b/TextLocator/Properties/AssemblyInfo.cs
@@ -50,9 +50,9 @@ using System.Windows;
//通过使用 "*",如下所示:
// [assembly: AssemblyVersion("1.0.*")]
// 大版本,强制更新最小版本
-[assembly: AssemblyVersion("2.1.28.8")]
+[assembly: AssemblyVersion("2.1.30.0")]
// 小版本,选择更新版本
-[assembly: AssemblyFileVersion("2.1.28.8")]
+[assembly: AssemblyFileVersion("2.1.30.0")]
// Version minVersion = System.Reflection.Assembly.GetExecutingAssembly().GetName().Version;
// Version version = new Version(FileVersionInfo.GetVersionInfo(System.Windows.Forms.Application.ExecutablePath).ProductVersion);
diff --git a/TextLocator/Service/WordFileService.cs b/TextLocator/Service/WordFileService.cs
index 3089562e6cef4f29481626a6fa52285592faaffb..0f70219709bb19fc32cea1a37fbea120eb79afbe 100644
--- a/TextLocator/Service/WordFileService.cs
+++ b/TextLocator/Service/WordFileService.cs
@@ -175,7 +175,7 @@ namespace TextLocator.Service
foreach (XmlNode textNode in textNodes)
{
- builder.Append(textNode.InnerText);
+ builder.AppendLine(textNode.InnerText);
}
builder.AppendLine();
}
diff --git a/TextLocator/Util/FileContentUtil.cs b/TextLocator/Util/FileContentUtil.cs
index f9ff71f92ca143b42e7de8134792945cbdc427c9..990b73743c6a6616e04d49bd9b4212cfdee7f2c7 100644
--- a/TextLocator/Util/FileContentUtil.cs
+++ b/TextLocator/Util/FileContentUtil.cs
@@ -82,7 +82,12 @@ namespace TextLocator.Util
// 拿出Run的Text
string text = position.GetTextInRun(LogicalDirection.Forward);
// 关键词匹配查找
- Regex regex = new Regex(keyword, RegexOptions.IgnoreCase);
+ string reg = keyword;
+ if (keyword.StartsWith(AppConst.REGEX_SEARCH_PREFIX))
+ {
+ reg = keyword.Replace(AppConst.REGEX_SEARCH_PREFIX, "");
+ }
+ Regex regex = new Regex(reg, RegexOptions.IgnoreCase);
Match matches = regex.Match(text);
if (matches.Success)
{
@@ -161,8 +166,13 @@ namespace TextLocator.Util
// 遍历关键词列表
foreach (string keyword in keywords)
{
+ string reg = keyword;
+ if (keyword.StartsWith(AppConst.REGEX_SEARCH_PREFIX))
+ {
+ reg = keyword.Replace(AppConst.REGEX_SEARCH_PREFIX, "");
+ }
// 定义关键词正则
- Regex regex = new Regex(keyword, RegexOptions.IgnoreCase);
+ Regex regex = new Regex(reg, RegexOptions.IgnoreCase);
// 匹配集合
MatchCollection collection = regex.Matches(content);
// 遍历命中列表
diff --git a/images/Keywords3.png b/images/Keywords3.png
index 0ca42473b086a976f9904d850a4faabe7e1def8d..54c58395772f669753173f46f700f98c3498d612 100644
Binary files a/images/Keywords3.png and b/images/Keywords3.png differ