From e1ccd1c3e828568d2532f1ce72cddd2717ee3e63 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E8=87=A7=E8=87=A7?= <2556450572@qq.com> Date: Tue, 20 Dec 2022 11:19:36 +0800 Subject: [PATCH 1/3] =?UTF-8?q?=E4=BF=AE=E5=A4=8D=E9=AB=98=E4=BA=AEbug?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../core/business/highlight/HighlightHelper.java | 10 ++++++---- .../business/highlight/HighlightHelperTest.java | 15 +++++++++++++++ 2 files changed, 21 insertions(+), 4 deletions(-) diff --git a/stream-core/src/main/java/io/github/vampireachao/stream/core/business/highlight/HighlightHelper.java b/stream-core/src/main/java/io/github/vampireachao/stream/core/business/highlight/HighlightHelper.java index 07b20a26..614194cf 100644 --- a/stream-core/src/main/java/io/github/vampireachao/stream/core/business/highlight/HighlightHelper.java +++ b/stream-core/src/main/java/io/github/vampireachao/stream/core/business/highlight/HighlightHelper.java @@ -50,10 +50,12 @@ public class HighlightHelper { FoundWord last = Objects.requireNonNull(linkedList.pollLast()); // 判断上次和这次的长度,如果上次长度大于这次长度,说明这次词语是上次的一部分 lastIdx -= last.getWord().length(); - if (last.getWord().length() > fondWord.getWord().length()) { - // 直接使用上次的词汇以及重置历史下标 - word = last.getWord(); - } + // 获取要从哪里开始保留 + int index = fondWord.getWord().indexOf(last.getWord().charAt(last.getWord().length() - 1)); + // 这里没有判断是否找到是因为只要进入当前的if语句那么必定有重复串可以找到 + String suffix = fondWord.getWord().substring(index+1); + word = last.getWord() + suffix; + } else { // 否则根据历史下标到当前词汇下标进行查找额外部分 String partOne = text.substring(lastIdx, fondWord.getIndex()); diff --git a/stream-core/src/test/java/io/github/vampireachao/stream/core/business/highlight/HighlightHelperTest.java b/stream-core/src/test/java/io/github/vampireachao/stream/core/business/highlight/HighlightHelperTest.java index e41f9d00..f5a11f15 100644 --- a/stream-core/src/test/java/io/github/vampireachao/stream/core/business/highlight/HighlightHelperTest.java +++ b/stream-core/src/test/java/io/github/vampireachao/stream/core/business/highlight/HighlightHelperTest.java @@ -1,5 +1,6 @@ package io.github.vampireachao.stream.core.business.highlight; +import cn.hutool.dfa.WordTree; import io.github.vampireachao.stream.core.lambda.function.SerUnOp; import io.github.vampireachao.stream.core.stream.Steam; import org.junit.jupiter.api.Assertions; @@ -31,4 +32,18 @@ class HighlightHelperTest { Assertions.assertEquals("", HighlightHelper.highlight("", new ArrayList<>(), SerUnOp.identity())); } + + @Test + void containsStringTest() { + WordTree tree = new WordTree(); + tree.addWord("大土豆"); + tree.addWord("土豆呀"); + tree.addWord("刚出锅"); + String text = "我有一颗大土豆呀,刚出锅的"; + List foundWords = tree.matchAllWords(text, -1, true, true); + final List foundWordList = Steam.of(foundWords).map(w -> new io.github.vampireachao.stream.core.business.highlight.FoundWord(w.getWord(), w.getStartIndex())).toList(); + String result = HighlightHelper.highlight(text, foundWordList, s -> "<" + s + ">"); + Assertions.assertEquals("我有一颗<大土豆呀>,<刚出锅>的", result); + } + } -- Gitee From 0e747723e7b4d486824b9a84852321809251462f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E8=87=A7=E8=87=A7?= <2556450572@qq.com> Date: Tue, 20 Dec 2022 11:20:23 +0800 Subject: [PATCH 2/3] =?UTF-8?q?=E4=BF=AE=E5=A4=8D=E9=AB=98=E4=BA=AEbug?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../business/highlight/HighlightHelperTest.java | 14 -------------- 1 file changed, 14 deletions(-) diff --git a/stream-core/src/test/java/io/github/vampireachao/stream/core/business/highlight/HighlightHelperTest.java b/stream-core/src/test/java/io/github/vampireachao/stream/core/business/highlight/HighlightHelperTest.java index f5a11f15..abb88f68 100644 --- a/stream-core/src/test/java/io/github/vampireachao/stream/core/business/highlight/HighlightHelperTest.java +++ b/stream-core/src/test/java/io/github/vampireachao/stream/core/business/highlight/HighlightHelperTest.java @@ -32,18 +32,4 @@ class HighlightHelperTest { Assertions.assertEquals("", HighlightHelper.highlight("", new ArrayList<>(), SerUnOp.identity())); } - - @Test - void containsStringTest() { - WordTree tree = new WordTree(); - tree.addWord("大土豆"); - tree.addWord("土豆呀"); - tree.addWord("刚出锅"); - String text = "我有一颗大土豆呀,刚出锅的"; - List foundWords = tree.matchAllWords(text, -1, true, true); - final List foundWordList = Steam.of(foundWords).map(w -> new io.github.vampireachao.stream.core.business.highlight.FoundWord(w.getWord(), w.getStartIndex())).toList(); - String result = HighlightHelper.highlight(text, foundWordList, s -> "<" + s + ">"); - Assertions.assertEquals("我有一颗<大土豆呀>,<刚出锅>的", result); - } - } -- Gitee From 5722b3b79f2a6bb4bfbdb907b55ad833be8cc26c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E8=87=A7=E8=87=A7?= <2556450572@qq.com> Date: Tue, 20 Dec 2022 11:21:03 +0800 Subject: [PATCH 3/3] =?UTF-8?q?=E4=BF=AE=E5=A4=8D=E9=AB=98=E4=BA=AEbug?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../stream/core/business/highlight/HighlightHelperTest.java | 1 - 1 file changed, 1 deletion(-) diff --git a/stream-core/src/test/java/io/github/vampireachao/stream/core/business/highlight/HighlightHelperTest.java b/stream-core/src/test/java/io/github/vampireachao/stream/core/business/highlight/HighlightHelperTest.java index abb88f68..e41f9d00 100644 --- a/stream-core/src/test/java/io/github/vampireachao/stream/core/business/highlight/HighlightHelperTest.java +++ b/stream-core/src/test/java/io/github/vampireachao/stream/core/business/highlight/HighlightHelperTest.java @@ -1,6 +1,5 @@ package io.github.vampireachao.stream.core.business.highlight; -import cn.hutool.dfa.WordTree; import io.github.vampireachao.stream.core.lambda.function.SerUnOp; import io.github.vampireachao.stream.core.stream.Steam; import org.junit.jupiter.api.Assertions; -- Gitee