Java: Phone 号码过滤和分配给 "Elite"
Java: Phone Number filter and allocation to "Elite"
任务是从大量号码(总共 200k)中分配精英和高级号码。 Elite 表示数字非常漂亮且昂贵,Premium 表示更漂亮。
我的解决方案有效,但速度很慢。处理 200k 个数字大约需要 40 分钟!问题是我必须使用掩码生成数以千计的正则表达式模式,然后通过数以千计的模式处理数以千计的数字!
图案看起来像 patternX、patternXY、patternAB、patternABC、patternXAB、patternXABC、patternXYZ、patternXYAB、patternXYAB,例如:
super.patternXYZ = "^\d+XXYYZZ$|^\d+ZZXYXY$|^\d+YXXYYZZ..$";
super.patternXYAB = "^\d+ABXXYY$|^\d+ABXYXY$";
其中所有字母都代表数字掩码:XXYY 数学 4488 或 9933 (X<>=Y) 和 AABB 匹配序列序列,如 3344 或 7788 (A+1=B)
通过以下方式进行匹配:
@Override
public Set<String> performCollect() {
for (String number : numbers) {
if (isPatternXMatches(number)) {
result.add(number);
} else if (isPatternXYMatches(number)) {
result.add(number);
}
...
}
return result;
}
其中为每个匹配和匹配执行生成正则表达式模式:
protected boolean isPatternXYZMatches(String number) {
for (int X = 0; X < 10; X++) {
for (int Y = 0; Y < 10; Y++) {
for (int Z = 0; Z < 10; Z++) {
Pattern pattern = Pattern.compile(patternXYZ.replace("X", String.valueOf(X)).replace("Y", String.valueOf(Y)).replace("Z", String.valueOf(Z)));
Matcher matcher = pattern.matcher(number);
if (matcher.find()) {
return true;
}
}
}
}
return false;
}
protected boolean isPatternXYABMatches(String number) {
for (int X = 0; X < 10; X++) {
for (int Y = 0; Y < 10; Y++) {
for (int A = 0, B = 1; B < 10; A++, B++) {
Pattern pattern = Pattern.compile(patternXYAB.replace("A", String.valueOf(A)).replace("B", String.valueOf(B)).replace("X", String.valueOf(X)).replace("Y", String.valueOf(Y)));
Matcher matcher = pattern.matcher(number);
if (matcher.find()) {
return true;
}
}
}
}
return false;
}
问题:有谁知道或可以提出更好更快的解决方案吗?
我用自定义匹配器替换了正则表达式,现在处理 200k 个数字只需要 5 秒而不是 40 分钟!
public Set<String> performCollect() {
for (String number : numbers) {
if (isNumberMatches(number)) {
result.add(number);
}
}
return result;
}
protected boolean isNumberMatches(String number) {
NumberMatcher nm = new NumberMatcher(number, offset);
for (NumberPattern pattern : patterns) {
if (nm.processMatch(pattern)) {
return true;
}
}
return false;
}
...
public class NumberPattern {
private char[] maskChars;
private Integer weight;
public NumberPattern(String mask, Integer weight) {
maskChars = mask.toCharArray();
this.weight = weight;
}
public char[] getMaskChars() {
return maskChars;
}
public void setMaskChars(char[] maskChars) {
this.maskChars = maskChars;
}
public Integer getWeight() {
return weight;
}
public void setWeight(Integer weight) {
this.weight = weight;
}
}
...
public class NumberMatcher {
private char[] numberChars;
private int uniqueChars = 0;
public NumberMatcher(String number, int offset) {
numberChars = number.toCharArray();
List<Character> chars = new ArrayList<>();
for (Character ch : number.substring(offset).toCharArray()) {
if (!chars.contains(ch)) {
uniqueChars++;
chars.add(ch);
}
}
}
public boolean processMatch(NumberPattern pattern) {
if (pattern.getWeight() < uniqueChars) {
return false;
}
Character X = null;
Character Y = null;
Character Z = null;
Character A = null;
Character B = null;
Character C = null;
Character D = null;
final char[] patternChars = pattern.getMaskChars();
int patternIndex = patternChars.length;
int numberIndex = numberChars.length;
while (patternIndex > 0) {
patternIndex--;
numberIndex--;
char numberChar = numberChars[numberIndex];
char patternChar = patternChars[patternIndex];
switch (patternChar) {
case 'A':
if (A == null) {
A = numberChar;
B = (char) (A + 1);
C = (char) (B + 1);
D = (char) (C + 1);
} else if (!A.equals(numberChar)) {
return false;
}
break;
case 'B':
if (B == null) {
B = numberChar;
A = (char) (B - 1);
C = (char) (B + 1);
D = (char) (C + 1);
} else if (!B.equals(numberChar)) {
return false;
}
break;
case 'C':
if (C == null) {
C = numberChar;
B = (char) (C - 1);
A = (char) (B - 1);
D = (char) (C + 1);
} else if (!C.equals(numberChar)) {
return false;
}
break;
case 'D':
if (D == null) {
D = numberChar;
C = (char) (D - 1);
B = (char) (C - 1);
A = (char) (B - 1);
} else if (!D.equals(numberChar)) {
return false;
}
break;
case 'X':
if (X == null) {
X = numberChar;
} else if (!X.equals(numberChar)) {
return false;
}
break;
case 'Y':
if (Y == null) {
Y = numberChar;
} else if (!Y.equals(numberChar)) {
return false;
}
break;
case 'Z':
if (Z == null) {
Z = numberChar;
} else if (!Z.equals(numberChar)) {
return false;
}
break;
case '.':
break;
case '0':
if (numberChar != '0') {
return false;
}
break;
}
}
return true;
}
}
任务是从大量号码(总共 200k)中分配精英和高级号码。 Elite 表示数字非常漂亮且昂贵,Premium 表示更漂亮。
我的解决方案有效,但速度很慢。处理 200k 个数字大约需要 40 分钟!问题是我必须使用掩码生成数以千计的正则表达式模式,然后通过数以千计的模式处理数以千计的数字!
图案看起来像 patternX、patternXY、patternAB、patternABC、patternXAB、patternXABC、patternXYZ、patternXYAB、patternXYAB,例如:
super.patternXYZ = "^\d+XXYYZZ$|^\d+ZZXYXY$|^\d+YXXYYZZ..$";
super.patternXYAB = "^\d+ABXXYY$|^\d+ABXYXY$";
其中所有字母都代表数字掩码:XXYY 数学 4488 或 9933 (X<>=Y) 和 AABB 匹配序列序列,如 3344 或 7788 (A+1=B)
通过以下方式进行匹配:
@Override
public Set<String> performCollect() {
for (String number : numbers) {
if (isPatternXMatches(number)) {
result.add(number);
} else if (isPatternXYMatches(number)) {
result.add(number);
}
...
}
return result;
}
其中为每个匹配和匹配执行生成正则表达式模式:
protected boolean isPatternXYZMatches(String number) {
for (int X = 0; X < 10; X++) {
for (int Y = 0; Y < 10; Y++) {
for (int Z = 0; Z < 10; Z++) {
Pattern pattern = Pattern.compile(patternXYZ.replace("X", String.valueOf(X)).replace("Y", String.valueOf(Y)).replace("Z", String.valueOf(Z)));
Matcher matcher = pattern.matcher(number);
if (matcher.find()) {
return true;
}
}
}
}
return false;
}
protected boolean isPatternXYABMatches(String number) {
for (int X = 0; X < 10; X++) {
for (int Y = 0; Y < 10; Y++) {
for (int A = 0, B = 1; B < 10; A++, B++) {
Pattern pattern = Pattern.compile(patternXYAB.replace("A", String.valueOf(A)).replace("B", String.valueOf(B)).replace("X", String.valueOf(X)).replace("Y", String.valueOf(Y)));
Matcher matcher = pattern.matcher(number);
if (matcher.find()) {
return true;
}
}
}
}
return false;
}
问题:有谁知道或可以提出更好更快的解决方案吗?
我用自定义匹配器替换了正则表达式,现在处理 200k 个数字只需要 5 秒而不是 40 分钟!
public Set<String> performCollect() {
for (String number : numbers) {
if (isNumberMatches(number)) {
result.add(number);
}
}
return result;
}
protected boolean isNumberMatches(String number) {
NumberMatcher nm = new NumberMatcher(number, offset);
for (NumberPattern pattern : patterns) {
if (nm.processMatch(pattern)) {
return true;
}
}
return false;
}
...
public class NumberPattern {
private char[] maskChars;
private Integer weight;
public NumberPattern(String mask, Integer weight) {
maskChars = mask.toCharArray();
this.weight = weight;
}
public char[] getMaskChars() {
return maskChars;
}
public void setMaskChars(char[] maskChars) {
this.maskChars = maskChars;
}
public Integer getWeight() {
return weight;
}
public void setWeight(Integer weight) {
this.weight = weight;
}
}
...
public class NumberMatcher {
private char[] numberChars;
private int uniqueChars = 0;
public NumberMatcher(String number, int offset) {
numberChars = number.toCharArray();
List<Character> chars = new ArrayList<>();
for (Character ch : number.substring(offset).toCharArray()) {
if (!chars.contains(ch)) {
uniqueChars++;
chars.add(ch);
}
}
}
public boolean processMatch(NumberPattern pattern) {
if (pattern.getWeight() < uniqueChars) {
return false;
}
Character X = null;
Character Y = null;
Character Z = null;
Character A = null;
Character B = null;
Character C = null;
Character D = null;
final char[] patternChars = pattern.getMaskChars();
int patternIndex = patternChars.length;
int numberIndex = numberChars.length;
while (patternIndex > 0) {
patternIndex--;
numberIndex--;
char numberChar = numberChars[numberIndex];
char patternChar = patternChars[patternIndex];
switch (patternChar) {
case 'A':
if (A == null) {
A = numberChar;
B = (char) (A + 1);
C = (char) (B + 1);
D = (char) (C + 1);
} else if (!A.equals(numberChar)) {
return false;
}
break;
case 'B':
if (B == null) {
B = numberChar;
A = (char) (B - 1);
C = (char) (B + 1);
D = (char) (C + 1);
} else if (!B.equals(numberChar)) {
return false;
}
break;
case 'C':
if (C == null) {
C = numberChar;
B = (char) (C - 1);
A = (char) (B - 1);
D = (char) (C + 1);
} else if (!C.equals(numberChar)) {
return false;
}
break;
case 'D':
if (D == null) {
D = numberChar;
C = (char) (D - 1);
B = (char) (C - 1);
A = (char) (B - 1);
} else if (!D.equals(numberChar)) {
return false;
}
break;
case 'X':
if (X == null) {
X = numberChar;
} else if (!X.equals(numberChar)) {
return false;
}
break;
case 'Y':
if (Y == null) {
Y = numberChar;
} else if (!Y.equals(numberChar)) {
return false;
}
break;
case 'Z':
if (Z == null) {
Z = numberChar;
} else if (!Z.equals(numberChar)) {
return false;
}
break;
case '.':
break;
case '0':
if (numberChar != '0') {
return false;
}
break;
}
}
return true;
}
}