如何获取 Java 中两个字符串之间所有差异的 starting/ending 索引?
How do I get starting/ending indexes of all differences between two strings in Java?
在 Java 中,我希望获取两个字符串之间差异的所有开始索引和结束索引的列表。我知道如何获得两个字符串之间第一个差异的起始索引,但我不太清楚如何完成这个问题。
我在 StringUtils 中找到代码:indexOfDifference(String, String),它获取两个字符串之间第一个差异的起始索引,但我没有找到获取第一个差异的结束索引的方法,也没有我看到了一种获取两个字符串之间所有差异的所有其余 starting/ending 索引的方法。
例如,如果我有这两个字符串:
原始结构:"Hello World"
revisedStr : "Help the World23"
我想要 orig 和 revised strs 之间的所有差异范围。
任何指导都会很有帮助。
这是我目前的代码:
import difflib.*;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.net.URL;
import java.util.LinkedList;
import java.util.List;
public class TestDiffUtils {
public TestDiffUtils() {
}
// Helper method to read the files to compare into memory, convert them to a list of Strings which can be used by the DiffUtils library for comparison
private static List fileToLines(String filename) {
List lines = new LinkedList();
String line;
try {
URL path = TestDiffUtils.class.getResource(filename);
File f = new File(path.getFile());
BufferedReader in = new BufferedReader(new FileReader(f));
while ((line = in.readLine()) != null) {
lines.add(line);
}
} catch (IOException e) {
e.printStackTrace();
}
return lines;
}
private static void printUnifiedDiffs(List<String> diffs){
for(String diff : diffs){
System.out.println(diff);
}
}
/**
* Compares two Strings, and returns the index at which the
* Strings begin to differ.
*
* For example,
* <code>indexOfDifference("i am a machine", "i am a robot") -> 7</code>
*
* <pre>
* StringUtils.indexOfDifference(null, null) = -1
* StringUtils.indexOfDifference("", "") = -1
* StringUtils.indexOfDifference("", "abc") = 0
* StringUtils.indexOfDifference("abc", "") = 0
* StringUtils.indexOfDifference("abc", "abc") = -1
* StringUtils.indexOfDifference("ab", "abxyz") = 2
* StringUtils.indexOfDifference("abcde", "abxyz") = 2
* StringUtils.indexOfDifference("abcde", "xyz") = 0
* </pre>
*
* @param str1 the first String, may be null
* @param str2 the second String, may be null
* @return the index where str2 and str1 begin to differ; -1 if they are equal
* @since 2.0
*/
public static int startingIndexOfDifference(String str1, String str2) {
if (str1 == str2) {
return -1;
}
if (str1 == null || str2 == null) {
return 0;
}
int i;
for (i = 0; i < str1.length() && i < str2.length(); ++i) {
if (str1.charAt(i) != str2.charAt(i)) {
break;
}
}
if (i < str2.length() || i < str1.length()) {
return i;
}
return -1;
}
private static void doBasicLineByLineDiff(Boolean doLargeFileTest) {
String origFileName;
String revisedFileName;
if( doLargeFileTest )
{
origFileName = "test_large_file.xml";
revisedFileName = "test_large_file_revised.xml";
}else{
origFileName = "originalFile.txt";
revisedFileName = "revisedFile.txt";
}
List<String> originalLines = fileToLines(origFileName);
List<String> revisedLines = fileToLines(revisedFileName);
Patch patch = DiffUtils.diff(originalLines, revisedLines);
List<String> diffs = DiffUtils.generateUnifiedDiff(origFileName, revisedFileName, originalLines, patch, 0); // 0 = don't show any lines of context around different lines
List<Delta> deltas = patch.getDeltas();
for(Delta delta : deltas){
int diffLine = delta.getOriginal().getPosition()+1;
System.out.println("[" + diffLine + " : (" + startingIndexOfDifference((String) delta.getOriginal().getLines().get(0), (String) delta.getRevised().getLines().get(0)) + ",<todo-diffEndIndexHere>)]");
}
// printUnifiedDiffs(diffs);
}
public static void main(String[] args) {
doBasicLineByLineDiff(false);
}
}
DiffUtils.diff()
接受 List<?>
,你用行 (List<String>
) 调用它来查找行差异。
您可以重复使用它来查找两行之间的字符差异,即 List<Character>
。
它已经具有识别差异结束的所有复杂性,并且重复地再次开始共性。不要尝试自己实现,当你已经有一个可以做到这一点的库时。
在 Java 中,我希望获取两个字符串之间差异的所有开始索引和结束索引的列表。我知道如何获得两个字符串之间第一个差异的起始索引,但我不太清楚如何完成这个问题。
我在 StringUtils 中找到代码:indexOfDifference(String, String),它获取两个字符串之间第一个差异的起始索引,但我没有找到获取第一个差异的结束索引的方法,也没有我看到了一种获取两个字符串之间所有差异的所有其余 starting/ending 索引的方法。
例如,如果我有这两个字符串: 原始结构:"Hello World" revisedStr : "Help the World23"
我想要 orig 和 revised strs 之间的所有差异范围。
任何指导都会很有帮助。
这是我目前的代码:
import difflib.*;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.net.URL;
import java.util.LinkedList;
import java.util.List;
public class TestDiffUtils {
public TestDiffUtils() {
}
// Helper method to read the files to compare into memory, convert them to a list of Strings which can be used by the DiffUtils library for comparison
private static List fileToLines(String filename) {
List lines = new LinkedList();
String line;
try {
URL path = TestDiffUtils.class.getResource(filename);
File f = new File(path.getFile());
BufferedReader in = new BufferedReader(new FileReader(f));
while ((line = in.readLine()) != null) {
lines.add(line);
}
} catch (IOException e) {
e.printStackTrace();
}
return lines;
}
private static void printUnifiedDiffs(List<String> diffs){
for(String diff : diffs){
System.out.println(diff);
}
}
/**
* Compares two Strings, and returns the index at which the
* Strings begin to differ.
*
* For example,
* <code>indexOfDifference("i am a machine", "i am a robot") -> 7</code>
*
* <pre>
* StringUtils.indexOfDifference(null, null) = -1
* StringUtils.indexOfDifference("", "") = -1
* StringUtils.indexOfDifference("", "abc") = 0
* StringUtils.indexOfDifference("abc", "") = 0
* StringUtils.indexOfDifference("abc", "abc") = -1
* StringUtils.indexOfDifference("ab", "abxyz") = 2
* StringUtils.indexOfDifference("abcde", "abxyz") = 2
* StringUtils.indexOfDifference("abcde", "xyz") = 0
* </pre>
*
* @param str1 the first String, may be null
* @param str2 the second String, may be null
* @return the index where str2 and str1 begin to differ; -1 if they are equal
* @since 2.0
*/
public static int startingIndexOfDifference(String str1, String str2) {
if (str1 == str2) {
return -1;
}
if (str1 == null || str2 == null) {
return 0;
}
int i;
for (i = 0; i < str1.length() && i < str2.length(); ++i) {
if (str1.charAt(i) != str2.charAt(i)) {
break;
}
}
if (i < str2.length() || i < str1.length()) {
return i;
}
return -1;
}
private static void doBasicLineByLineDiff(Boolean doLargeFileTest) {
String origFileName;
String revisedFileName;
if( doLargeFileTest )
{
origFileName = "test_large_file.xml";
revisedFileName = "test_large_file_revised.xml";
}else{
origFileName = "originalFile.txt";
revisedFileName = "revisedFile.txt";
}
List<String> originalLines = fileToLines(origFileName);
List<String> revisedLines = fileToLines(revisedFileName);
Patch patch = DiffUtils.diff(originalLines, revisedLines);
List<String> diffs = DiffUtils.generateUnifiedDiff(origFileName, revisedFileName, originalLines, patch, 0); // 0 = don't show any lines of context around different lines
List<Delta> deltas = patch.getDeltas();
for(Delta delta : deltas){
int diffLine = delta.getOriginal().getPosition()+1;
System.out.println("[" + diffLine + " : (" + startingIndexOfDifference((String) delta.getOriginal().getLines().get(0), (String) delta.getRevised().getLines().get(0)) + ",<todo-diffEndIndexHere>)]");
}
// printUnifiedDiffs(diffs);
}
public static void main(String[] args) {
doBasicLineByLineDiff(false);
}
}
DiffUtils.diff()
接受 List<?>
,你用行 (List<String>
) 调用它来查找行差异。
您可以重复使用它来查找两行之间的字符差异,即 List<Character>
。
它已经具有识别差异结束的所有复杂性,并且重复地再次开始共性。不要尝试自己实现,当你已经有一个可以做到这一点的库时。