用回溯拆分字符串
split strings with backtracking
我正在尝试编写一个代码,将无空格的字符串拆分成有意义的词,但是当我给出像“arealways”这样的句子时,它 returns ['a', 'real', 'ways'] 而我想要的是 ['are', 'always'] 并且我的字典包含所有这些词。我怎样才能编写一个不断回溯直到找到最佳匹配的代码?
returns'a','real','ways'的代码:
splitter.java:
public class splitter {
HashMap<String, String> map = new HashMap<>();
Trie dict;
public splitter(Trie t) {
dict = t;
}
public String split(String test) {
if (dict.contains(test)) {
return (test);
} else if (map.containsKey(test)) {
return (map.get(test));
} else {
for (int i = 0; i < test.length(); i++) {
String pre = test.substring(0, i);
if (dict.contains(pre)) {
String end = test.substring(i);
String fixedEnd = split(end);
if(fixedEnd != null){
map.put(test, pre + " " + fixedEnd);
return pre + " " + fixedEnd;
}else {
}
}
}
}
map.put(test,null);
return null;
}
}
Trie.java:
public class Trie {
public static class TrieNode {
private HashMap<Character, TrieNode> charMap = new HashMap<>();
public char c;
public boolean endOWord;
public void insert(String s){
}
public boolean contains(String s){
return true;
}
}
public TrieNode root;
public Trie() {
root = new TrieNode();
}
public void insert(String s){
TrieNode p = root;
for(char c : s.toCharArray()) {
if(! p.charMap.containsKey(c)) {
TrieNode node = new TrieNode();
node.c = c;
p.charMap.put(c, node);
}
p = p.charMap.get(c);
}
p.endOWord = true;
}
public boolean contains(String s){
TrieNode p = root;
for(char c : s.toCharArray()) {
if(!p.charMap.containsKey(c)) {
return false;
}
p = p.charMap.get(c);
}
return p.endOWord;
}
public void insertDictionary(String filename) throws FileNotFoundException{
File file = new File(filename);
Scanner sc = new Scanner(file);
while(sc.hasNextLine())
insert(sc.nextLine());
}
public void insertDictionary(File file) throws FileNotFoundException{
Scanner sc = new Scanner(file);
while(sc.hasNextLine())
insert(sc.nextLine());
}
}
WordSplitter class:
public class WordSplitter {
public static void main(String[] args) throws FileNotFoundException {
String test = "arealways";
String myFile = "/Users/abc/Desktop/dictionary.txt";
Trie dict = new Trie();
dict.insertDictionary(myFile);
splitter sp = new splitter(dict);
test = sp.split(test);
if(test != null)
System.out.println(test);
else
System.out.println("No Splitting Found.");
}
}
使用 OP 的 split
方法和在 The Trie Data Structure in Java Baeldung 的文章中找到的 Trie
的实现,我能够得到以下结果:
realways=real ways
arealways=a real ways
但是,如果我从字典中删除单词“real”或“a”,我会得到以下结果:
realways=null
arealways=are always
这是我用来获得这些结果的完整代码:
public class Splitter {
private static Map<String, String> map = new HashMap<>();
private Trie dict;
public Splitter(Trie t) {
dict = t;
}
/**
* @param args
*/
public static void main(String[] args) {
List<String> words = List.of("a", "always", "are", "area", "r", "way", "ways"); // The order of these words does not seem to impact the final result
String test = "arealways";
Trie t = new Trie();
for (String word : words) {
t.insert(word);
}
System.out.println(t);
Splitter splitter = new Splitter(t);
splitter.split(test);
map.entrySet().forEach(System.out::println);
}
public String split(String test) {
if (dict.find(test)) {
return (test);
} else if (map.containsKey(test)) {
return (map.get(test));
} else {
for (int i = 0; i < test.length(); i++) {
String pre = test.substring(0, i);
if (dict.find(pre)) {
String end = test.substring(i);
String fixedEnd = split(end);
if (fixedEnd != null) {
map.put(test, pre + " " + fixedEnd);
return pre + " " + fixedEnd;
} else {
}
}
}
}
map.put(test, null);
return null;
}
public static class Trie {
private TrieNode root = new TrieNode();
public boolean find(String word) {
TrieNode current = root;
for (int i = 0; i < word.length(); i++) {
char ch = word.charAt(i);
TrieNode node = current.getChildren().get(ch);
if (node == null) {
return false;
}
current = node;
}
return current.isEndOfWord();
}
public void insert(String word) {
TrieNode current = root;
for (char l : word.toCharArray()) {
current = current.getChildren().computeIfAbsent(l, c -> new TrieNode());
}
current.setEndOfWord(true);
}
@Override
public String toString() {
return toString(root);
}
/**
* @param root2
* @return
*/
private String toString(TrieNode node) {
return node.toString();
}
public static class TrieNode {
private Map<Character, TrieNode> children = new HashMap<>() ;
private String contents;
private boolean endOfWord;
public Map<Character, TrieNode> getChildren() {
return children;
}
public void setEndOfWord(boolean endOfWord) {
this.endOfWord = endOfWord;
}
public boolean isEndOfWord() {
return endOfWord;
}
@Override
public String toString() {
StringBuilder sbuff = new StringBuilder();
if (isLeaf()) {
return sbuff.toString();
}
children.entrySet().forEach(entry -> {
sbuff.append(entry.getKey() + "\n");
});
sbuff.append(" ");
return children.toString();
}
private boolean isLeaf() {
return children.isEmpty();
}
}
public void delete(String word) {
delete(root, word, 0);
}
private boolean delete(TrieNode current, String word, int index) {
if (index == word.length()) {
if (!current.isEndOfWord()) {
return false;
}
current.setEndOfWord(false);
return current.getChildren().isEmpty();
}
char ch = word.charAt(index);
TrieNode node = current.getChildren().get(ch);
if (node == null) {
return false;
}
boolean shouldDeleteCurrentNode = delete(node, word, index + 1) && !node.isEndOfWord();
if (shouldDeleteCurrentNode) {
current.getChildren().remove(ch);
return current.getChildren().isEmpty();
}
return false;
}
}
}
我通过在 Trie
和 TrieNode
中添加 toString()
方法改进了原始代码。现在,当我打印出 Trie
对象“t”时,我得到以下结果:
{a={r={e={a=}}, l={w={a={y={s=}}}}}, w={a={y={s=}}}}
我的结论是 OP 的 TrieNode
实现不正确。 Trie
的构建方式,给定输入的字符串值,OP 描述的行为似乎是正确的。
我正在尝试编写一个代码,将无空格的字符串拆分成有意义的词,但是当我给出像“arealways”这样的句子时,它 returns ['a', 'real', 'ways'] 而我想要的是 ['are', 'always'] 并且我的字典包含所有这些词。我怎样才能编写一个不断回溯直到找到最佳匹配的代码?
returns'a','real','ways'的代码:
splitter.java:
public class splitter {
HashMap<String, String> map = new HashMap<>();
Trie dict;
public splitter(Trie t) {
dict = t;
}
public String split(String test) {
if (dict.contains(test)) {
return (test);
} else if (map.containsKey(test)) {
return (map.get(test));
} else {
for (int i = 0; i < test.length(); i++) {
String pre = test.substring(0, i);
if (dict.contains(pre)) {
String end = test.substring(i);
String fixedEnd = split(end);
if(fixedEnd != null){
map.put(test, pre + " " + fixedEnd);
return pre + " " + fixedEnd;
}else {
}
}
}
}
map.put(test,null);
return null;
}
}
Trie.java:
public class Trie {
public static class TrieNode {
private HashMap<Character, TrieNode> charMap = new HashMap<>();
public char c;
public boolean endOWord;
public void insert(String s){
}
public boolean contains(String s){
return true;
}
}
public TrieNode root;
public Trie() {
root = new TrieNode();
}
public void insert(String s){
TrieNode p = root;
for(char c : s.toCharArray()) {
if(! p.charMap.containsKey(c)) {
TrieNode node = new TrieNode();
node.c = c;
p.charMap.put(c, node);
}
p = p.charMap.get(c);
}
p.endOWord = true;
}
public boolean contains(String s){
TrieNode p = root;
for(char c : s.toCharArray()) {
if(!p.charMap.containsKey(c)) {
return false;
}
p = p.charMap.get(c);
}
return p.endOWord;
}
public void insertDictionary(String filename) throws FileNotFoundException{
File file = new File(filename);
Scanner sc = new Scanner(file);
while(sc.hasNextLine())
insert(sc.nextLine());
}
public void insertDictionary(File file) throws FileNotFoundException{
Scanner sc = new Scanner(file);
while(sc.hasNextLine())
insert(sc.nextLine());
}
}
WordSplitter class:
public class WordSplitter {
public static void main(String[] args) throws FileNotFoundException {
String test = "arealways";
String myFile = "/Users/abc/Desktop/dictionary.txt";
Trie dict = new Trie();
dict.insertDictionary(myFile);
splitter sp = new splitter(dict);
test = sp.split(test);
if(test != null)
System.out.println(test);
else
System.out.println("No Splitting Found.");
}
}
使用 OP 的 split
方法和在 The Trie Data Structure in Java Baeldung 的文章中找到的 Trie
的实现,我能够得到以下结果:
realways=real ways
arealways=a real ways
但是,如果我从字典中删除单词“real”或“a”,我会得到以下结果:
realways=null
arealways=are always
这是我用来获得这些结果的完整代码:
public class Splitter {
private static Map<String, String> map = new HashMap<>();
private Trie dict;
public Splitter(Trie t) {
dict = t;
}
/**
* @param args
*/
public static void main(String[] args) {
List<String> words = List.of("a", "always", "are", "area", "r", "way", "ways"); // The order of these words does not seem to impact the final result
String test = "arealways";
Trie t = new Trie();
for (String word : words) {
t.insert(word);
}
System.out.println(t);
Splitter splitter = new Splitter(t);
splitter.split(test);
map.entrySet().forEach(System.out::println);
}
public String split(String test) {
if (dict.find(test)) {
return (test);
} else if (map.containsKey(test)) {
return (map.get(test));
} else {
for (int i = 0; i < test.length(); i++) {
String pre = test.substring(0, i);
if (dict.find(pre)) {
String end = test.substring(i);
String fixedEnd = split(end);
if (fixedEnd != null) {
map.put(test, pre + " " + fixedEnd);
return pre + " " + fixedEnd;
} else {
}
}
}
}
map.put(test, null);
return null;
}
public static class Trie {
private TrieNode root = new TrieNode();
public boolean find(String word) {
TrieNode current = root;
for (int i = 0; i < word.length(); i++) {
char ch = word.charAt(i);
TrieNode node = current.getChildren().get(ch);
if (node == null) {
return false;
}
current = node;
}
return current.isEndOfWord();
}
public void insert(String word) {
TrieNode current = root;
for (char l : word.toCharArray()) {
current = current.getChildren().computeIfAbsent(l, c -> new TrieNode());
}
current.setEndOfWord(true);
}
@Override
public String toString() {
return toString(root);
}
/**
* @param root2
* @return
*/
private String toString(TrieNode node) {
return node.toString();
}
public static class TrieNode {
private Map<Character, TrieNode> children = new HashMap<>() ;
private String contents;
private boolean endOfWord;
public Map<Character, TrieNode> getChildren() {
return children;
}
public void setEndOfWord(boolean endOfWord) {
this.endOfWord = endOfWord;
}
public boolean isEndOfWord() {
return endOfWord;
}
@Override
public String toString() {
StringBuilder sbuff = new StringBuilder();
if (isLeaf()) {
return sbuff.toString();
}
children.entrySet().forEach(entry -> {
sbuff.append(entry.getKey() + "\n");
});
sbuff.append(" ");
return children.toString();
}
private boolean isLeaf() {
return children.isEmpty();
}
}
public void delete(String word) {
delete(root, word, 0);
}
private boolean delete(TrieNode current, String word, int index) {
if (index == word.length()) {
if (!current.isEndOfWord()) {
return false;
}
current.setEndOfWord(false);
return current.getChildren().isEmpty();
}
char ch = word.charAt(index);
TrieNode node = current.getChildren().get(ch);
if (node == null) {
return false;
}
boolean shouldDeleteCurrentNode = delete(node, word, index + 1) && !node.isEndOfWord();
if (shouldDeleteCurrentNode) {
current.getChildren().remove(ch);
return current.getChildren().isEmpty();
}
return false;
}
}
}
我通过在 Trie
和 TrieNode
中添加 toString()
方法改进了原始代码。现在,当我打印出 Trie
对象“t”时,我得到以下结果:
{a={r={e={a=}}, l={w={a={y={s=}}}}}, w={a={y={s=}}}}
我的结论是 OP 的 TrieNode
实现不正确。 Trie
的构建方式,给定输入的字符串值,OP 描述的行为似乎是正确的。