python如何记录list中与list中出现顺序相同的元素个数

Question

我正在尝试追踪listA、listB、listC...与原始列表的相似程度。如何打印出现在 listA 中的相同 sequence 中的元素数量，因为它们出现在原始列表中？

original_list = ['I', 'live', 'in', 'space', 'with', 'my', 'dog']

listA = ['my', 'name', 'my', 'dog', 'is', 'two', 'years', 'old']

listB = ['how', 'where', 'I', 'live', 'in', 'space', 'with']

listC = ['I', 'live', 'to', 'the' 'in', 'space', 'with', 'my', 'football', 'my','dog']

Output: 
listA: Count = 2     #'my', 'dog'

listB: Count = 5     #'I',  'live', 'in', 'space', 'with'

listC: Count = 2,4,2     #'I',  'live'
                     #'in', 'space', 'with', 'my'
                     #'my', 'dog'

Answer 1

已编辑

我发现这个问题做起来很有趣，并想从已接受的问题中探索其他一些选项。

def _get_sequences(inter_dict : dict, list_range : int) -> tuple[set, int]:
    occuring = [0] * list_range
    for key, indices in inter_dict.items(): # lays out intersecting strings as they occur
        for idx in indices:
            occuring[idx] = key
    
    _temp_list = []
    lengths = []
    matches = []

    for idx in range(len(occuring)):
        item = occuring.pop(0)
        if item != 0: # if on python 3.8+ you could use (( item := occuring.pop(0) ) != 0) instead
            _temp_list.append(item)

        elif (bool(_temp_list) and len(_temp_list) > 1):
            matches.append( _temp_list.copy() )
            lengths.append( len(_temp_list) )
            _temp_list.clear()

        elif (bool(_temp_list) and item == 0) and len(_temp_list) == 1: # if its a single occurrence ignore
            _temp_list.clear()

    if bool(_temp_list) and len(_temp_list) > 1: # ensures no matching strings are missed
        matches.append( _temp_list )
        lengths.append( len(_temp_list) )

    return lengths, matches

def get_intersecting(list_a, list_b) -> tuple[set, int]:
    intersecting = set(list_a) & set(list_b) # returns intersecting strings
    indices_dict = {}
    for item in intersecting:
        indices = [ index for index, value in enumerate(list_b) if value == item ] # gets occuring indices of each string
        indices_dict[item] = indices

    return _get_sequences( indices_dict, len(list_b) )

if __name__ == "__main__":
    original = ['I', 'live', 'in', 'space', 'with', 'my', 'dog']
    listA = ['my', 'name', 'my', 'dog', 'is', 'two', 'years', 'old']
    listB = ['how', 'where', 'I', 'live', 'in', 'space', 'with']
    listC = ['I', 'live', 'to', 'the', 'in', 'space', 'with', 'my', 'football', 'my', 'dog']

    lengths, matches = get_intersecting(original, listA)
    print(lengths, matches) # [2] [['my', 'dog']]

    lengths, matches = get_intersecting(original, listB)
    print(lengths, matches) # [5] [['I', 'live', 'in', 'space', 'with']]

    lengths, matches = get_intersecting(original, listC)
    print(lengths, matches) # [2, 4, 2] [['I', 'live'] ['in', 'space', 'with', 'my'] ['my', 'dog']]

已编辑 x2 这可能是我的最终解决方案。

def ordered_intersecting(list_a, list_b) -> tuple[int, list]:
    matches = []
    for item in list_b:
        if item in list_a: # while iterating we can just add them to a return list as they appear
            matches.append(item)
        elif len(matches) > 1: # once we come across an item that does not intersect we know we can yield a return value ( as long as matches are greater than 1 )
            yield len(matches), matches.copy() ; matches.clear() # a shallow copy should be good enough, but if needed it can be changed to a deep one

    if len(matches) > 1: # catch any remaining matches
        yield len(matches), matches

if __name__ == "__main__":
    original = ['I', 'live', 'in', 'space', 'with', 'my', 'dog']
    listA = ['my', 'name', 'my', 'dog', 'is', 'two', 'years', 'old']
    listB = ['how', 'where', 'I', 'live', 'in', 'space', 'with']
    listC = ['I', 'live', 'to', 'the', 'in', 'space', 'with', 'my', 'football', 'my', 'dog']

    print( list(ordered_intersecting(original, listA)) )
    print( list(ordered_intersecting(original, listB)) )
    print( list(ordered_intersecting(original, listC)) )

Answer 2

我写了一个函数来完成我认为的工作。它可能有点太复杂，但我目前看不到更简单的方法：

original = ['I', 'live', 'in', 'space', 'with', 'my', 'dog']

listA = ['my', 'name', 'my', 'dog', 'is', 'two', 'years', 'old']
listB = ['how', 'where', 'I', 'live', 'in', 'space', 'with']
listC = ['I', 'live', 'to', 'the', 'in', 'space', 'with', 'my', 'football', 'my', 'dog']


def get_sequence_lengths(original_list, comparative_list):

    original_options = []
    for i in range(len(original_list)):
        for j in range(i + 1, len(original_list)):
            original_options.append(original_list[i:j + 1])

    comparative_options = []
    for i in range(len(comparative_list)):
        for j in range(i+1, len(comparative_list)):
            comparative_options.append(comparative_list[i:j+1])
    comparative_options.sort(key=len, reverse=True)

    matches = []
    while comparative_options:
        for option in comparative_options:
            if option in original_options:
                matches.append(option)
                new_comparative_options = comparative_options.copy()
                for l in comparative_options:
                    counter = 0
                    for v in option:
                        counter = counter + 1 if v in l else 0
                        if counter == len(l):
                            new_comparative_options.remove(l)
                            break
                comparative_options = new_comparative_options
                break
        if option == comparative_options[-1]:
            break

    matches = [option for option in original_options if option in matches]
    lengths = [len(option) for option in matches]
    print(lengths)
    print(matches)

    return lengths

如果您使用原始列表和示例列表调用它，它会打印以下内容。
get_sequence_lengths(original, listA) 打印 [2] [['my', 'dog']]。
get_sequence_lengths(original, listB) 打印 [5] [['I', 'live', 'in', 'space', 'with']]。
get_sequence_lengths(original, listC) 打印 [2, 4, 2] [['I', 'live'], ['in', 'space', 'with', 'my'], ['my', 'dog']].

python如何记录list中与list中出现顺序相同的元素个数

python how to record the number of elements in listA that appear in the same order as in listB

python

numpy

list

count