删除重复的短语
Remove duplicate phrases
如何删除重复的短语?我想生成 2 个字符串的名称,但没有重复的短语。
条件:
- 不要删除同一个字符串中的重复项。我的意思是:
String s1 = "duplicate duplicate duplicate"
String s2 = "xyz"
afterRemovedDuplicates = "duplicate duplicate duplicate xyz"
- 仅当短语一个接一个出现时才删除短语
String s1 = "duplicate phrases"
String s2 = "duplicate phrases abcd"
afterRemovedDuplicates = "duplicate phrases abcd"
但也可以是词组的一部分(仅当词组接连出现时)
String s1 = "duplicate phrases"
String s2 = "phrases abcd"
afterRemovedDuplicates = "duplicate phrases abcd"
所以如果我们有
String s1 = "duplicate phrases x"
String s2 = "duplicate phrases abcd"
afterRemovedDuplicates = "duplicate phrases x duplicate phrases abcd"
删除重复项不会执行任何操作,因为“x”在短语之间。
从索引零开始,您可以检查 s1 中索引处及之后的所有内容是否与 s2 的开头匹配,直到有匹配项或您到达 s1 的结尾。然后,当您加入字符串时,您可以丢弃在 s1 中获得的索引处和之后的所有内容,因为 s2 已经从它开始。如果字符串很长,此解决方案会很慢。
试试这个。
static String removeDuplicatePhrase(String s1, String s2) {
s1 = s1.trim(); s2 = s2.trim();
List<String> list1 = List.of(s1.split("\s+"));
List<String> list2 = List.of(s2.split("\s+"));
int size1 = list1.size(), size2 = list2.size();
int i = Math.min(size1, size2);
for (; i > 0; --i)
if (list1.subList(size1 - i, size1).equals(list2.subList(0, i)))
break;
return String.join(" ", list1) + " " + String.join(" ", list2.subList(i, size2));
}
static void test(String s1, String s2) {
System.out.println("s1 = " + s1);
System.out.println("s2 = " + s2);
System.out.println("result = " + removeDuplicatePhrase(s1, s2));
System.out.println();
}
public static void main(String[] args) {
test("duplicate duplicate duplicate", "xyz");
test("duplicate phrases", "duplicate phrases abcd");
test("duplicate phrases", "phrases abcd");
test("duplicate phrases x", "duplicate phrases abcd");
test("duplicate phrases", "duplicate phrases");
test("duplicate ph", "duplicate phrases");
test("duplicate phrases", "phrases");
test("duplicate phrases", "rases");
}
输出:
s1 = duplicate duplicate duplicate
s2 = xyz
result = duplicate duplicate duplicate xyz
s1 = duplicate phrases
s2 = duplicate phrases abcd
result = duplicate phrases abcd
s1 = duplicate phrases
s2 = phrases abcd
result = duplicate phrases abcd
s1 = duplicate phrases x
s2 = duplicate phrases abcd
result = duplicate phrases x duplicate phrases abcd
s1 = duplicate phrases
s2 = duplicate phrases
result = duplicate phrases
s1 = duplicate ph
s2 = duplicate phrases
result = duplicate ph duplicate phrases
s1 = duplicate phrases
s2 = phrases
result = duplicate phrases
s1 = duplicate phrases
s2 = rases
result = duplicate phrases rases
如何删除重复的短语?我想生成 2 个字符串的名称,但没有重复的短语。
条件:
- 不要删除同一个字符串中的重复项。我的意思是:
String s1 = "duplicate duplicate duplicate"
String s2 = "xyz"
afterRemovedDuplicates = "duplicate duplicate duplicate xyz"
- 仅当短语一个接一个出现时才删除短语
String s1 = "duplicate phrases"
String s2 = "duplicate phrases abcd"
afterRemovedDuplicates = "duplicate phrases abcd"
但也可以是词组的一部分(仅当词组接连出现时)
String s1 = "duplicate phrases"
String s2 = "phrases abcd"
afterRemovedDuplicates = "duplicate phrases abcd"
所以如果我们有
String s1 = "duplicate phrases x"
String s2 = "duplicate phrases abcd"
afterRemovedDuplicates = "duplicate phrases x duplicate phrases abcd"
删除重复项不会执行任何操作,因为“x”在短语之间。
从索引零开始,您可以检查 s1 中索引处及之后的所有内容是否与 s2 的开头匹配,直到有匹配项或您到达 s1 的结尾。然后,当您加入字符串时,您可以丢弃在 s1 中获得的索引处和之后的所有内容,因为 s2 已经从它开始。如果字符串很长,此解决方案会很慢。
试试这个。
static String removeDuplicatePhrase(String s1, String s2) {
s1 = s1.trim(); s2 = s2.trim();
List<String> list1 = List.of(s1.split("\s+"));
List<String> list2 = List.of(s2.split("\s+"));
int size1 = list1.size(), size2 = list2.size();
int i = Math.min(size1, size2);
for (; i > 0; --i)
if (list1.subList(size1 - i, size1).equals(list2.subList(0, i)))
break;
return String.join(" ", list1) + " " + String.join(" ", list2.subList(i, size2));
}
static void test(String s1, String s2) {
System.out.println("s1 = " + s1);
System.out.println("s2 = " + s2);
System.out.println("result = " + removeDuplicatePhrase(s1, s2));
System.out.println();
}
public static void main(String[] args) {
test("duplicate duplicate duplicate", "xyz");
test("duplicate phrases", "duplicate phrases abcd");
test("duplicate phrases", "phrases abcd");
test("duplicate phrases x", "duplicate phrases abcd");
test("duplicate phrases", "duplicate phrases");
test("duplicate ph", "duplicate phrases");
test("duplicate phrases", "phrases");
test("duplicate phrases", "rases");
}
输出:
s1 = duplicate duplicate duplicate
s2 = xyz
result = duplicate duplicate duplicate xyz
s1 = duplicate phrases
s2 = duplicate phrases abcd
result = duplicate phrases abcd
s1 = duplicate phrases
s2 = phrases abcd
result = duplicate phrases abcd
s1 = duplicate phrases x
s2 = duplicate phrases abcd
result = duplicate phrases x duplicate phrases abcd
s1 = duplicate phrases
s2 = duplicate phrases
result = duplicate phrases
s1 = duplicate ph
s2 = duplicate phrases
result = duplicate ph duplicate phrases
s1 = duplicate phrases
s2 = phrases
result = duplicate phrases
s1 = duplicate phrases
s2 = rases
result = duplicate phrases rases