删除列表中重复位置的算法

Question

我有一个服务可以找到旅程并删除重复的访问城市。

public static void main(String[] args){
        List<List<String>> allPaths = new ArrayList<>();
        allPaths.add(List.of("Newyork","Washington","Los Angeles","Chicago"));
        allPaths.add(List.of("Newyork","Washington","Houston"));
        allPaths.add(List.of("Newyork","Dallas"));
        allPaths.add(List.of("Newyork","Columbus", "Chicago"));
        Set<String> orphanageLocations = new HashSet<>();
        removeDuplicatedLocation(allPaths, orphanageLocations);
        //expected allPaths:
        //"Newyork","Washington","Los Angeles","Chicago"
        //"Newyork","Dallas"
        //"Newyork","Columbus"
        
        //expected orphanageLocations
        //"Houston"
        
    }
    
    private static void removeDuplicatedLocation(List<List<String>> allPaths, Set<String> orphanageLocations){
        //do something here
    }

在allPaths中我存储了从origin到其他城市的所有路径。但可能某些路径将包含相同的城市，例如 Washington 出现在第一条和第二条路径中。

现在我需要一项服务来删除那个重复的城市。当两条路径有相同的城市时，我们走去更多城市的路径。

而且服务还return不能访问的城市。例如，第二条路径的“华盛顿”与第一条路径重复，然后我们删除第二条路径（它的城市比第一条路径少），然后就没有通往“休斯顿”的路径 -> 成为孤儿院

其他测试用例：

public static void main(String[] args){
        List<List<String>> allPaths = new ArrayList<>();
        allPaths.add(List.of("Newyork","Washington","Los Angeles","Chicago", "Dallas"));
        allPaths.add(List.of("Newyork","Los Angeles","Houston", "Philadenphia"));
        allPaths.add(List.of("Newyork","Dallas"));
        allPaths.add(List.of("Newyork","Columbus", "San Francisco"));
        Set<String> orphanageLocations = new HashSet<>();
        removeDuplicatedLocation(allPaths, orphanageLocations);
        //expected allPaths:
        //"Newyork","Washington","Los Angeles","Chicago", "Dallas"
        //"Newyork","Columbus", "San Francisco"
        
        //expected orphanageLocations
        //"Houston","Philadenphia"
        
    }

有人会建议我解决这个问题的算法吗？

---编辑 1：我在这里更新了我的脏解决方案，仍在等待更好的解决方案

private static void removeDuplicatedLocation(List<List<String>> allPaths, Set<String> orphanageLocations){
        //sort to make sure the longest path is on top
        List<List<String>> sortedPaths = allPaths.stream().sorted((a, b) -> Integer.compare(b.size(), a.size()))
                .collect(Collectors.toList());
        for(int i = 0; i < sortedPaths.size()-1; i++){
            List<String> path = sortedPaths.get(i);
            orphanageLocations.removeIf(path::contains);
            for(int j = i+1; j < sortedPaths.size(); j++){
                for(int k = 1; k < path.size();k++) {
                    Iterator<String> iterator = sortedPaths.get(j).iterator();
                    boolean isRemove = false;
                    while (iterator.hasNext()) {
                        String city = iterator.next();
                        if(isRemove && !path.contains(city)){
                            orphanageLocations.add(city);
                        }
                        if(StringUtils.equals(city, path.get(k))){
                            isRemove = true;
                        }
                        if(isRemove){
                            iterator.remove();
                        }
                    }
                }
            }
        }
        //remove path if it's only origin
        sortedPaths.removeIf(item -> item.size() == 1);
        allPaths.clear();
        allPaths.addAll(sortedPaths);
    }

---编辑2：感谢@devReddit 的解决方案，我用大量路由做了一个小测试。每条路径中的城市越多，您的解决方案就越慢

public static void main(String[] args){
        List<List<String>> allPaths = new ArrayList<>();
        List<List<String>> allPaths2 = new ArrayList<>();
        List<String> locations = Stream.of("A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M", "N",
                                        "O", "P", "Q", "R", "S", "T", "U", "V", "X", "Y", "Z").collect(Collectors.toList());
        Random rand = new Random();
        int numberOfRoute = 10000;
        String origin = "NY";
        for(int i = 0; i < numberOfRoute; i++){
            List<String> route = new ArrayList<>();
            List<String> route2 = new ArrayList<>();
            route.add(origin);
            route2.add(origin);
            //int routeLength = rand.nextInt(locations.size());
            int routeLength = 10;
            while(route.size() < routeLength){
                int randomIndex = rand.nextInt(locations.size()-1);
                if(!route.contains(locations.get(randomIndex))){
                    route.add(locations.get(randomIndex));
                    route2.add(locations.get(randomIndex));
                }
            }
            allPaths.add(route);
            allPaths2.add(route2);
        }

        System.out.println("Process for " + allPaths2.size() + " routes");
        Set<String> orphanageLocations2 = new HashSet<>();
        long startTime2 = System.currentTimeMillis();
        removeDuplicatedLocation3(allPaths2, orphanageLocations2); //uncle bob solution
        long endTime2 = System.currentTimeMillis();
        System.out.println(allPaths2);
        System.out.println(orphanageLocations2);
        System.out.println("Total time uncleBob solution(ms):" + (endTime2-startTime2));

        System.out.println("Process for " + allPaths.size() + " routes");
        Set<String> orphanageLocations = new HashSet<>();
        long startTime = System.currentTimeMillis();
        removeDuplicatedLocation(allPaths, orphanageLocations); //devReddit solution
        long endTime = System.currentTimeMillis();
        System.out.println(allPaths);
        System.out.println(orphanageLocations);
        System.out.println("Total time devReddit solution(ms):" + (endTime-startTime));
}

//devReddit solution
private static void removeDuplicatedLocation(List<List<String>> allPaths, Set<String> orphanageLocations) {
        List<List<String>> sortedFixedPaths = allPaths                  // List.of produces immutable lists,
                .stream()                                               // so you can't directly remove string from the list
                .sorted((a, b) -> Integer.compare(b.size(), a.size()))  // this fixed list will be needed later
                .collect(Collectors.toList());
        List<List<String>> sortedPaths = sortedFixedPaths              // The list is regenerated through manual deep copy
                .stream()                                              // generated a single string from the streams of
                .map(path ->                                           // each List<String> and created new list, this is now mutable
                        new ArrayList<>(Arrays.asList(String.join(",", path).split(","))))
                .collect(Collectors.toList());
        Set<List<String>> valuesToBeRemoved = new HashSet<>();
        String source = sortedPaths.get(0).get(0);
        Map<String, List<Integer>> cityMapOfIndex = generateHashMap(sortedPaths, source);    // This hashmap keeps track of the existence of cities in different lists
        removeDuplicates(cityMapOfIndex, sortedPaths);                 // this method removes the duplicates from the smaller paths
        // adds the remaining cities to orphanList
        cityMapOfIndex.forEach((cityName, value) -> {           // this block checks whether any mid element in the path is gone
            int index = value.get(0);                        // removes the path from result list
            List<String> list = sortedPaths.get(index);
            int indexInPath = list.indexOf(cityName);
            if (indexInPath != sortedFixedPaths.get(index).indexOf(cityName)) {
                orphanageLocations.add(cityName);
                sortedPaths.get(index).remove(indexInPath);
            }
        });
        valuesToBeRemoved.add(new ArrayList<>(Collections.singleton(source)));   // To handle the case where only source remains in the path
        sortedPaths.removeAll(valuesToBeRemoved);                                 // after removing other duplicates
        allPaths.clear();
        allPaths.addAll(sortedPaths);
    }

    private static void removeDuplicates(Map<String, List<Integer>> cityMapOfIndex, List<List<String>> sortedPaths) {
        for (Map.Entry<String, List<Integer>> entry : cityMapOfIndex.entrySet()) {
            List<Integer> indexList = entry.getValue();
            while (indexList.size() > 1) {
                int index = indexList.get(indexList.size() - 1);         // get the last index i.e. the smallest list of city where this entry exists
                sortedPaths.get(index).remove(entry.getKey());          // remove the city from the list
                indexList.remove((Integer) index);                // update the index list of occurrence
            }
            cityMapOfIndex.put(entry.getKey(), indexList);
        }
    }

    private static Map<String, List<Integer>> generateHashMap(List<List<String>> sortedPaths,
                                                              String source) {
        Map<String, List<Integer>> cityMapOfIndex = new HashMap<>();
        for (int x = 0; x < sortedPaths.size(); x++) {
            int finalX = x;
            sortedPaths.get(x)
                    .forEach(city -> {
                        if (!city.equalsIgnoreCase(source)) {                   // add entries for all except the source
                            List<Integer> indexList = cityMapOfIndex.containsKey(city) ?         // checks whether there's already an entry
                                    cityMapOfIndex.get(city) : new ArrayList<>();                // to avoid data loss due to overwriting
                            indexList.add(finalX);                                    // adds the index of the List of string
                            cityMapOfIndex.put(city, indexList);               // add or update the map with current indexList
                        }
                    });

        }
        return cityMapOfIndex;
    }

//Bob solution
private static void removeDuplicatedLocation3(List<List<String>> allPaths, Set<String> orphanageLocations){
        //sort to make sure the longest path is on top
        List<List<String>> sortedPaths = allPaths.stream().sorted((a, b) -> Integer.compare(b.size(), a.size()))
                .collect(Collectors.toList());
        for(int i = 0; i < sortedPaths.size()-1; i++){
            List<String> path = sortedPaths.get(i);
            orphanageLocations.removeIf(path::contains);
            for(int j = i+1; j < sortedPaths.size(); j++){
                for(int k = 1; k < path.size();k++) {
                    Iterator<String> iterator = sortedPaths.get(j).iterator();
                    boolean isRemove = false;
                    while (iterator.hasNext()) {
                        String city = iterator.next();
                        if(isRemove && !path.contains(city)){
                            orphanageLocations.add(city);
                        }
                        if(StringUtils.equals(city, path.get(k))){
                            isRemove = true;
                        }
                        if(isRemove){
                            iterator.remove();
                        }
                    }
                }
            }
        }
        //remove path if it's only origin
        sortedPaths.removeIf(item -> item.size() == 1);
        allPaths.clear();
        allPaths.addAll(sortedPaths);
    }

这是其中一个结果：

测试路由长度为 6


Process for 10000 routes
[[NY, Q, Y, T, S, X], [NY, E], [NY, V, A, H, N], [NY, J, L, I], [NY, D], [NY, O], [NY, C], [NY, P, M], [NY, F], [NY, K], [NY, U], [NY, G], [NY, R], [NY, B]]
[]
Total time uncleBob solution(ms):326
Process for 10000 routes
[[NY, Q, Y, T, S, X], [NY, E], [NY, V], [NY, J, L], [NY, D], [NY, O]]
[A, B, C, F, G, H, I, K, M, N, P, R, U]
Total time devReddit solution(ms):206

路线长度为10

Process for 10000 routes
[[NY, J, V, G, A, I, B, R, U, S], [NY, L, X, Q, M, E], [NY, K], [NY, Y], [NY, F, P], [NY, N], [NY, H, D], [NY, T, O], [NY, C]]
[]
Total time uncleBob solution(ms):292
Process for 10000 routes
[[NY, J, V, G, A, I, B, R, U, S]]
[C, D, E, F, H, K, L, M, N, O, P, Q, T, X, Y]
Total time devReddit solution(ms):471

结果也不一样，从同一个坑，我的return更有效的路线

实际上这不是我所期望的，因为@devReddit 的解决方案看起来更好更快

谢谢

Answer 1

您提供的解决方案是O(m^2xn^2)。我找到了一个具有 O(n^2) 时间复杂度的解决方案。已添加必要的评论作为解释：

核心方法removeDuplicatedLocation:

private static void removeDuplicatedLocation(List<List<String>> allPaths, Set<String> orphanageLocations) {
    List<List<String>> sortedFixedPaths = allPaths                  // List.of produces immutable lists,
            .stream()                                               // so you can't directly remove string from the list
            .sorted((a, b) -> Integer.compare(b.size(), a.size()))  // this fixed list will be needed later
            .collect(Collectors.toList());
    List<List<String>> sortedPaths = sortedFixedPaths              // The list is regenerated through manual deep copy
            .stream()                                              // generated a single string from the streams of
            .map(path ->                                           // each List<String> and created new list, this is now mutable
                    new ArrayList<>(Arrays.asList(String.join(",", path).split(","))))
            .collect(Collectors.toList());
    Set<List<String>> valuesToBeRemoved = new HashSet<>();
    String source = sortedPaths.get(0).get(0);
    Map<String, List<Integer>> cityMapOfIndex = generateHashMap(sortedPaths, source);    // This hashmap keeps track of the existence of cities in different lists
    removeDuplicates(cityMapOfIndex, sortedPaths);                 // this method removes the duplicates from the smaller paths
    cityMapOfIndex.entrySet().stream().forEach(city -> {           // this block checks whether any mid element in the path is gone
        String cityName = city.getKey();                           // adds the remaining cities to orphanList
        int index = city.getValue().get(0);                        // removes the path from result list
        List<String> list = sortedPaths.get(index);
        int indexInPath = list.indexOf(cityName);
        if (indexInPath != sortedFixedPaths.get(index).indexOf(cityName)) {
            orphanageLocations.add(cityName);
            sortedPaths.get(index).remove(indexInPath);
        }
    });
    valuesToBeRemoved.add(new ArrayList<>(Collections.singleton(source)));   // To handle the case where only source remains in the path
    sortedPaths.removeAll(valuesToBeRemoved);                                 // after removing other duplicates
    allPaths.clear();
    allPaths.addAll(sortedPaths);
}

上述存根中使用的removeDuplicates和generateHashMap方法如下：

private static void removeDuplicates(Map<String, List<Integer>> cityMapOfIndex, List<List<String>> sortedPaths) {
    for (Map.Entry<String, List<Integer>> entry : cityMapOfIndex.entrySet()) {
        List<Integer> indexList = entry.getValue();
        while (indexList.size() > 1) {
            int index = indexList.get(indexList.size() - 1);         // get the last index i.e. the smallest list of city where this entry exists
            sortedPaths.get(index).remove(entry.getKey());          // remove the city from the list
            indexList.remove((Integer) index);                // update the index list of occurrence
        }
        cityMapOfIndex.put(entry.getKey(), indexList);
    }
}

private static Map<String, List<Integer>> generateHashMap(List<List<String>> sortedPaths,
                                                          String source) {
    Map<String, List<Integer>> cityMapOfIndex = new HashMap<>();
    for (int x = 0; x < sortedPaths.size(); x++) {     
        int finalX = x;
        sortedPaths.get(x)
                .stream()
                .forEach(city -> {
                    if (!city.equalsIgnoreCase(source)) {                   // add entries for all except the source
                        List<Integer> indexList = cityMapOfIndex.containsKey(city) ?         // checks whether there's already an entry
                                cityMapOfIndex.get(city) : new ArrayList<>();                // to avoid data loss due to overwriting
                        indexList.add(finalX);                                    // adds the index of the List of string
                        cityMapOfIndex.put(city, indexList);               // add or update the map with current indexList
                    }
                });
    }
    return cityMapOfIndex;
}

如果您有任何疑问，请告诉我。

删除列表中重复位置的算法

Algorithm to remove duplicated location on list

algorithm

journey