使用 java 检测 txt 文件中的重复元组 [fi,(j-1), fi,j ,fi,j+1]

Detect repeated tuples [fi,(j-1), fi,j ,fi,j+1] on txt file using java

我正在寻找一个小代码片段,它将在文件的 (a) 行中找到并检测并提醒用户该行(或多行)包含不可接受的条目
但找不到。

因此,例如我在以下文件中:

myFile.txt:

Field1,Field2,Field3,Field4,Field5,Field6,Field7
a,b,a,d,e,f,g
h,i,h,i,h,ff,f27
f31,f32,f33,f34,f35,f36,f37
f41,f42,f43,f44,f45,f46,f47
f51,f52,f53,f54,f55,f56,f57
f61,f62,a,b,a,f66,f67
f71,f72,f73,f74,f75,f76,f77
f81,f82,f83,f84,f85,f86,f87
f91,f92,f93,f94,f95,f96,f97
f101,f102,f103,f104,f105,f106,f107
f111,f112,f113,f114,f115,f116,f117
f121,f122,f123,f124,f125,f126,f127
f131,f132,f133,f134,f135,f136,f137
f141,f142,f143,f144,f145,f146,f147
f151,f152,f153,f154,f155,f156,f157
f161,a,b,a,f165,f166,f167
i,h,ff,f174,f175,f176,f177
f181,f182,f183,f184,f185,f186,f187
f191,f192,f193,f194,f195,f196,f197
f201,f202,f203,f204,f205,f206,f207
f211,f212,f213,f214,f215,f216,f217
f221,f222,f223,f224,f225,f226,f227
f231,f232,f233,f234,f235,f236,f237
f241,f242,f243,f244,f245,f246,f247
f251,f252,f253,f254,f255,f256,f257
f261,f262,f263,f264,f265,f266,f267
f271,f272,f273,f274,f275,f276,f277
f281,f282,f283,i,h,ff,f287
fn1,fn2,fn3,fn4,fn5,fn6,fn7
f301,f302,f303,f304,f305,f306,f307

TXT 文件中的所有值都被视为字符串。

不可接受的条目

一行(或多行)中不可接受的条目是包含 fi,j 的行,其中元组 [fi,(j-1), fi,j ,fi,j+1] 之前或之后已经存在在 txt 文件中。即对于目标字段 X,检测左侧 XL 的字段和右侧 XR 的字段是否与 txt 文件中的任何先前字段不匹配,因此如果它匹配,我们必须输出:提交的 X 行号是有问题的,因为元组 [XL,X,XR] 已经在前面的行号
上定义了 我们展示: - all 会引起冲突的行:也就是说, + 上一行(txt 文件将接受第一次出现的行 阅读)和 + 有问题的行(在读取 txt 文件时遵循上一行 因此将被忽略)
-接受的第一次出现的元组的行号但被接受 - 将被忽略的未接受元组的最终行号 - 导致问题的元组 [XL,X,XR]。

示例:

Field1;Field2;Field3;Field4;Field5;Field6;Field7<--------Headers
a;b;a;d;e;f;g
h;i;h;i;h;ff;f27
f31;f32;f33;f34;f35;f36;f37
f41;f42;f43;f44;f45;f46;f47
f51;f52;f53;f54;f55;f56;f57
f61;f62;a;b;a;f66;f67
............................
f161;a;b;a;f165;f166;f167
i;h;ff;f174;f175;f176;f177
...........................
f281;f282;f283;i;h;ff;f287
fn1;fn2;fn3;fn4;fn5;fn6;fn7

会显示:

[a;b;a], accepetd on line 1 but rejected on lines: 6,16
Line accepted is : a;b;a;d;e;f;g
Line(s) rejected are: f61;f62;a;b;a;f66;f67
                      f161;a;b;a;f165;f166;f167

[h;i;h], Not accepted at all. rejected on lines: 2 
Line accepted is: empty
Lines rejected :  h;i;h;i;h;ff;f27

[i;h;ff],Not accepted at all. rejected on lines: 2,17,28
Line accepted is: empty
Lines rejected :
             h;i;h;i;h;ff;f27
             i;h;ff;f174;f175;f176;f177
             f281;f282;f283;i;h;ff;f287

N.B: Not accepted all accepted Line列表为空即同一行出现问题时,会显示Not accepted all。

欢迎任何建议和帮助。

更新

我给了答案

非常感谢。

这是一个解决方案,如果需要可以使用。

import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Objects;
import java.util.Set;
import java.util.TreeSet;

    import java.io.BufferedReader;

    public class TextFileProgram {

      private static <T> Set<T> findDuplicates(Collection<T> list) {

        Set<T> duplicates = new LinkedHashSet<T>();
        Set<T> uniques = new HashSet<T>();

        for(T t : list) {
          if(!uniques.add(t)) {
            duplicates.add(t);
          }
        }

        return duplicates;
      }

      private static boolean hasDuplicates(HashMap<Integer, List<String>> datamap) {
        boolean status = false;

        Set valueset = new HashSet(datamap.values());

        if(datamap.values().size() != valueset.size()) {
          status = true;
        }
        else {
          status = false;
        }

        return status;

      }

      static HashMap<Integer, List<String>> findTriplets(ArrayList<Line> data) {

        HashMap<Integer, List<String>> hm = new HashMap<Integer, List<String>>();
        int j = 0;
        for(int i = 0; i < data.size(); i++) {
          String line = data.get(i).toString();
          String[] arr = line.split(",");

          final int L = arr.length;
          final int K = 3;
          List<String> list = new ArrayList<String>(Arrays.asList(arr));
          list.addAll(list.subList(0, K - 1));

          for(int z = 0; z < L - 2; z++) {
            hm.put(j, list.subList(z, z + K));
            j++;
          }

        }
        return hm;
      }

      public static <T, E> Set<T> getKeysByValue(Map<T, E> map, E value) {
        Set<T> keys = new HashSet<T>();
        for(Entry<T, E> entry : map.entrySet()) {
          if(Objects.equals(value, entry.getValue())) {
            keys.add(entry.getKey());
          }
        }
        return keys;
      }

      public static boolean getDataFromFile() {

ArrayList<Line> data = new ArrayList<Line>();

FileInputStream fis = null;
BufferedReader br = null;
boolean done = false;
String result1 = "";
String line = "";
String result2 = "";

try {

  File mFile = new File("C:\siebog-master\maven-demo\" + "TestTuples.txt");
  fis = new FileInputStream(mFile);
  br = new BufferedReader(new InputStreamReader(fis));
  int iteration = 0;
  while((line = br.readLine()) != null) {

    if(iteration < 1) {
      iteration++;
      result1 = result1 + line + System.getProperty("line.separator");
      continue;
    }
    String[] pair = line.split(",");
    data.add(new Line(pair[0], pair[1], pair[2], pair[3], pair[4], pair[5], pair[6]));
  }

  HashMap<Integer, List<String>> hm = findTriplets(data);

  boolean isContainingDuplicates = hasDuplicates(hm);

  if(isContainingDuplicates) {
    Collection<List<String>> valuesList = hm.values();
    Set<List<String>> set = findDuplicates(valuesList);
    Set<Integer> setOfAlreadyRejected = new HashSet<Integer>();
    for(List<String> li : set) {

      Set<String> setToTestForDuplicate = new HashSet<String>(li);
      Set<Integer> myKeySet = getKeysByValue(hm, li);
      int index = 0;
      boolean allreadyDone = false;
      ArrayList<Integer> sortedList = new ArrayList(myKeySet);
      Collections.sort(sortedList);

      for(Integer key : myKeySet) {

        if(index == 0) {
          String value = hm.get(key).toString();
          System.out.print(value);

        }
        index++;
        if(setToTestForDuplicate.size() < li.size() && !allreadyDone) {
          System.out.print(", Not accepted at all. rejected on lines: ");
          System.out.println((key / 5 + 2) + " ");// number of rejected
          setOfAlreadyRejected.add(key / 5 + 2);// added to set of rejected
          System.out.println("Line accepted is: empty");
          System.out.print("Line rejected :");
          System.out.println(" " + data.get(key / 5));
          allreadyDone = true;
          break;
        }
        else if(set.size() >= li.size() || allreadyDone) {
          int z = 0;
          for(Integer s : sortedList) {
            boolean blnAlreadyExistsOnSetOfRejected = false;
            if(z == 0) {
              blnAlreadyExistsOnSetOfRejected = setOfAlreadyRejected.contains((Integer.valueOf(s) / 5 + 2));
              if(blnAlreadyExistsOnSetOfRejected) {
                System.out.print(" , Not accepetd on line ");
                System.out.print(" " + (Integer.valueOf(s) / 5 + 2)
                                 + "  because already rejected on the same line ");
                System.out.println(" " + (Integer.valueOf(s) / 5 + 2) + " ");
                System.out.print("Line rejected : ");
                System.out.println(" " + data.get(s / 5));

              }
              else {
                System.out.print(" , accepetd on line ");
                System.out.print(" " + (Integer.valueOf(s) / 5 + 2) + "  rejected on lines: ");

              }

            }
            else {

              System.out.println(" " + (Integer.valueOf(s) / 5 + 2) + " ");
              System.out.print("Line rejected : ");
              System.out.println(" " + data.get(s / 5));
            }
            z++;
          }
          System.out.println();
          break;
        }
      }
      System.out.println();
    }

  }

}
catch(FileNotFoundException ex) {

}
catch(IOException ex) {

}
catch(NullPointerException ex) {

}
finally {
  try {
    fis.close();
    br.close();
    done = true;
  }
  catch(IOException ex) {

  }
}
return done;

}

      public static void main(String[] args) {

        getDataFromFile();
      }
    }

public class Line implements Comparable<Line> {

  private String fieldOne;

  private String fieldTwo;

  private String fieldThree;

  private String fieldFour;

  private String fieldFive;

  private String fieldSix;

  private String fieldSeven;

  public Line(String fieldOne,
              String fieldTwo,
              String fieldThree,
              String fieldFour,
              String fieldFive,
              String fieldSix,
              String fieldSeven) {
    super();
    this.fieldOne = fieldOne;
    this.fieldThree = fieldThree;
    this.fieldFive = fieldFive;
    this.fieldSix = fieldSix;
    this.fieldFour = fieldFour;
    this.fieldTwo = fieldTwo;
    this.fieldSeven = fieldSeven;
  }

  public Line(String fieldOne) {
    super();
    this.fieldOne = fieldOne;
    this.fieldThree = "";
    this.fieldFive = "";
    this.fieldSix = "";
    this.fieldFour = "";
    this.fieldTwo = "";
    this.fieldSeven = "";
  }

  public Line(String fieldOne, String fieldTwo) {
    super();
    this.fieldOne = fieldOne;
    this.fieldThree = "";
    this.fieldFive = "";
    this.fieldSix = "";
    this.fieldFour = "";
    this.fieldTwo = fieldTwo;
    this.fieldSeven = "";
  }

  public Line(String fieldOne, String fieldTwo, String fieldThree) {
    super();
    this.fieldOne = fieldOne;
    this.fieldThree = fieldThree;
    this.fieldFive = "";
    this.fieldSix = "";
    this.fieldFour = "";
    this.fieldTwo = fieldTwo;
    this.fieldSeven = "";
  }

  public Line(String fieldOne, String fieldTwo, String fieldThree, String fieldFour) {
    super();
    this.fieldOne = fieldOne;
    this.fieldThree = fieldThree;
    this.fieldFive = "";
    this.fieldSix = "";
    this.fieldFour = fieldFour;
    this.fieldTwo = fieldTwo;
    this.fieldSeven = "";
  }

  public Line(String fieldOne, String fieldTwo, String fieldThree, String fieldFour, String fieldFive) {
    super();
    this.fieldOne = fieldOne;
    this.fieldThree = fieldThree;
    this.fieldFive = fieldFive;
    this.fieldSix = "";
    this.fieldFour = fieldFour;
    this.fieldTwo = fieldTwo;
    this.fieldSeven = "";
  }

  public Line(String fieldOne,
              String fieldTwo,
              String fieldThree,
              String fieldFour,
              String fieldFive,
              String fieldSix) {
    super();
    this.fieldOne = fieldOne;
    this.fieldThree = fieldThree;
    this.fieldFive = fieldFive;
    this.fieldSix = fieldSix;
    this.fieldFour = fieldFour;
    this.fieldTwo = fieldTwo;
    this.fieldSeven = "";
  }

  public String getFieldOne() {
    return fieldOne;
  }

  public void setFieldOne(String fieldOne) {
    this.fieldOne = fieldOne;
  }

  public String getFieldTwo() {
    return fieldTwo;
  }

  public void setFieldTwo(String fieldTwo) {
    fieldTwo = fieldTwo;
  }

  public String getFieldThree() {
    return fieldThree;
  }

  public void setFieldThree(String fieldThree) {
    this.fieldThree = fieldThree;
  }

  public String getFieldFour() {
    return fieldFour;
  }

  public void setCity(String fieldFour) {
    fieldFour = fieldFour;
  }

  public String getFieldFive() {
    return fieldFive;
  }

  public void setFieldFive(String fieldFive) {
    this.fieldFive = fieldFive;
  }

  public String getFieldSix() {
    return fieldSix;
  }

  public void setFieldSix(String fieldSix) {
    fieldSix = fieldSix;
  }

  public String getFieldSeven() {
    return fieldSeven;
  }

  public void setDetail(String fieldSeven) {
    fieldSeven = fieldSeven;
  }

  // Easy to print and show the row data
  @Override
  public String toString() {

    if(fieldTwo == null || fieldTwo.isEmpty())
      return fieldOne;
    else if(fieldThree == null || fieldThree.isEmpty())
      return fieldOne + "," + fieldTwo;
    else if(fieldFour == null || fieldFour.isEmpty())
      return fieldOne + "," + fieldTwo + "," + fieldThree;
    else if(fieldFive == null || fieldFive.isEmpty())
      return fieldOne + "," + fieldTwo + "," + fieldThree + "," + fieldFour;
    else if(fieldSix == null || fieldSix.isEmpty())
      return fieldOne + "," + fieldTwo + "," + fieldThree + "," + fieldFour + "," + fieldFive;
    else if(fieldSeven == null || fieldSeven.isEmpty())
      return fieldOne + "," + fieldTwo + "," + fieldThree + "," + fieldFour + "," + fieldFive + "," + fieldSix;
    else
      return fieldOne + "," + fieldTwo + "," + fieldThree + "," + fieldFour + "," + fieldFive + "," + fieldSix + ","
             + fieldSeven;
  }

  // sort based on column "fieldOne"
  @Override
  public int compareTo(Line o) {
    return this.fieldOne.compareTo(o.fieldOne);
  }
}

TestTuples.txt 测试

Field1,Field2,Field3,Field4,Field5,Field6,Field7
a,b,a,d,e,f,g
h,i,h,i,h,ff,f27
f31,f32,f33,f34,f35,f36,f37
f41,f42,f43,f44,f45,f46,f47
f51,f52,f53,f54,f55,f56,f57
f61,f62,a,b,a,f66,f67
f71,f72,f73,f74,f75,f76,f77
f81,f82,f83,f84,f85,f86,f87
f91,f92,f93,f94,f95,f96,f97
f101,f102,f103,f104,f105,f106,f107
f111,f112,f113,f114,f115,f116,f117
f121,f122,f123,f124,f125,f126,f127
f131,f132,f133,f134,f135,f136,f137
f141,f142,f143,f144,f145,f146,f147
f151,f152,f153,f154,f155,f156,f157
f161,a,b,a,f165,f166,f167
i,h,ff,f174,f175,f176,f177
f181,f182,f183,f184,f185,f186,f187
f191,f192,f193,f194,f195,f196,f197
f201,f202,f203,f204,f205,f206,f207
f211,f212,f213,f214,f215,f216,f217
f221,f222,f223,f224,f225,f226,f227
f231,f232,f233,f234,f235,f236,f237
f241,f242,f243,f244,f245,f246,f247
f251,f252,f253,f254,f255,f256,f257
f261,f262,f263,f264,f265,f266,f267
f271,f272,f273,f274,f275,f276,f277
f281,f282,f283,i,h,ff,f287
fn1,fn2,fn3,fn4,fn5,fn6,fn7
f301,f302,f303,f304,f305,f306,f307

输出

[h, i, h], Not accepted at all. rejected on lines: 3 
Line accepted is: empty
Line rejected : h,i,h,i,h,ff,f27

[a, b, a], Not accepted at all. rejected on lines: 2 
Line accepted is: empty
Line rejected : a,b,a,d,e,f,g

[i, h, ff] , Not accepetd on line  3  because already rejected on the same line  3 
Line rejected :  h,i,h,i,h,ff,f27
 18 
Line rejected :  i,h,ff,f174,f175,f176,f177
 29 
Line rejected :  f281,f282,f283,i,h,ff,f287

这有点像对象。您应该创建一个对象模型来反映您正在使用的东西。

所以首先你要创建一个 class,类似这样的东西

public class SeptTuple {
  public final String field1, field2, ..., field7

  public SeptTuple(String f1, String f2, ..., String f7) {
    field1 = f1;
    ...
    field7 = f7;
  }

  @Override
  public boolean equals(Object o) {
    if(!(o instanceof SeptTuple))
      return false;

    SeptTuple s = (SeptTuple)o;
    return Objects.equals(field1, s.field1) && Objects.equals(field2, s.field2) && ... && Objects.equals(field7, s.field7)
  }

  @Override
  public int hashcode() {
    // If 2 objects are equal, they must return the same hashcode
    return Objects.hash(field1, field2, ..., field7);
  }
}

然后一旦你做到了,找到骗子就和

一样简单
Map<SeptTuple, SeptTuple> map = new HashMap<>();
....
// If already set, map will return the old value on put
SeptTuple temp = map.put(newSetTuple, newSetTuple);
if(temp != null) {
   // handle clash
}

如果您需要在每一行的子集中找到相等的部分,请将此解决方案分解为您需要的尽可能多的对象以准确表示元组的每个元素。 (您需要创建 3 classes 来表示元组的每个部分。)