分而治之策略来确定列表中是否有超过 1/3 的相同元素

Question

我正在使用分而治之算法来确定列表中是否有超过 1/3 的元素相同。例如：[1,2,3,4] 不，所有元素都是唯一的。 [1,1,2,4,5] 是的，其中2个相同。

没有排序，有分而治之的策略吗？我被困在如何划分...

def is_valid(ids): 
    n = len(ids) 
    is_valid_recur(ids, n n-1)

def is_valid_recur(ids, l, r):
    m = (l + h) // 2
    return ....  is_valid_recur(ids, l, m) ...is_valid_recur(ids, m+1, r):

非常感谢！

Answer 1

您可以使用二叉搜索树 (BST)。 1.创建BST在每个节点维护键数 2. 使用分治法遍历树找到最大键数 3. 测试最大计数是否 > n/3 使用 BST 中的数据，分而治之很简单，因为我们只是必须确定左分支、当前分支或右分支是否具有最高重复计数。

# A utility function to create a new BST node  
class newNode:  
    # Constructor to create a new node  
    def __init__(self, data):  
        self.key = data 
        self.count = 1
        self.left = None
        self.right = None

# A utility function to insert a new node  
# with given key in BST  
def insert(node, key): 
    # If the tree is empty, return a new node  
    if node == None: 
        k = newNode(key) 
        return k 

    # If key already exists in BST, increment 
    # count and return  
    if key == node.key: 
        (node.count) += 1
        return node 

    # Otherwise, recur down the tree  
    if key < node.key:  
        node.left = insert(node.left, key)  
    else: 
        node.right = insert(node.right, key) 

    # return the (unchanged) node pointer  
    return node 

# Finds the node with the highest count in a binary search tree
def MaxCount(node):
  if node == None:
    return 0, None
  else:
    left = MaxCount(node.left)
    right = MaxCount(node.right)
    current = node.count, node

    return max([left, right, current], key=lambda x: x[0])

def generateBST(a):
  root = None
  for x in a:
    root = insert(root, x)

  return root

# Driver Code 
if __name__ == '__main__': 
    a = [1, 2, 3, 1, 1]
    root = generateBST(a)
    cnt, node = MaxCount(root)
    if cnt >= (len(a) // 3):
      print(node.key)  # Prints 1
    else:
      print(None)

用于 n/3 的 非分而治之 技术，从 https://www.geeksforgeeks.org/n3-repeated-number-array-o1-space/:

开始的时间为 O(n)

# Python 3 program to find if  
# any element appears more than 
# n/3. 
import sys 

def appearsNBy3(arr, n): 

    count1 = 0
    count2 = 0
    first = sys.maxsize 
    second = sys.maxsize 

    for i in range(0, n):  

        # if this element is 
        # previously seen,  
        # increment count1. 
        if (first == arr[i]): 
            count1 += 1

        # if this element is 
        # previously seen,  
        # increment count2. 
        elif (second == arr[i]): 
            count2 += 1

        elif (count1 == 0): 
            count1 += 1
            first = arr[i] 

        elif (count2 == 0): 
            count2 += 1
            second = arr[i] 


        # if current element is  
        # different from both 
        # the previously seen  
        # variables, decrement 
        # both the counts. 
        else: 
            count1 -= 1
            count2 -= 1



    count1 = 0
    count2 = 0

    # Again traverse the array 
    # and find the actual counts. 
    for i in range(0, n):  
        if (arr[i] == first): 
            count1 += 1

        elif (arr[i] == second): 
            count2 += 1


    if (count1 > n / 3): 
        return first 

    if (count2 > n / 3): 
        return second 

    return -1

# Driver code 
arr = [1, 2, 3, 1, 1 ] 
n = len(arr)  
print(appearsNBy3(arr, n))

Answer 2

这是我为了好玩而试验的草稿。看起来分而治之可能会减少候选频率检查的次数，但我不确定（请参阅最后一个示例，其中仅针对完整列表检查 0）。

如果我们将列表一分为三，则有效候选人可以拥有的最小频率是每个部分的 1/3。这缩小了我们在其他部分搜索的候选人名单。让 f(A, l, r) 代表可能在其父组中出现频率为 1/3 或更多的候选人。那么：

from math import ceil

def f(A, l, r):
  length = r - l + 1

  if length <= 3:
    candidates = A[l:r+1]
    print "l, r, candidates: %s, %s, %s\n" % (l, r, candidates)
    return candidates

  i = 0
  j = 0
  third = length // 3
  lg_third = int(ceil(length / float(3)))
  sm_third = lg_third // 3

  if length % 3 == 1:
    i, j = l + third, l + 2 * third
  elif length % 3 == 2:
    i, j = l + third, l + 2 * third + 1
  else:
    i, j = l + third - 1, l + 2 * third - 1

  left_candidates = f(A, l, i)
  middle_candidates = f(A, i + 1, j)
  right_candidates = f(A, j + 1, r)
  print "length: %s, sm_third: %s, lg_third: %s" % (length, sm_third, lg_third)
  print "Candidate parts: %s, %s, %s" % (left_candidates, middle_candidates, right_candidates)
  left_part = A[l:i+1]
  middle_part = A[i+1:j+1]
  right_part = A[j+1:r+1]
  candidates = []
  seen = []

  for e in left_candidates:
    if e in seen or e in candidates:
      continue
    seen.append(e)
    count = left_part.count(e)
    if count >= lg_third:
      candidates.append(e)
    else:
      middle_part_count = middle_part.count(e)
      print "Left: counting %s in middle: %s" % (e, middle_part_count)
      if middle_part_count >= sm_third:
        count = count + middle_part_count
      right_part_count = right_part.count(e)
      print "Left: counting %s in right: %s" % (e, right_part_count)
      if right_part_count >= sm_third:
        count = count + right_part_count
      if count >= lg_third:
        candidates.append(e)

  seen = []
  for e in middle_candidates:
    if e in seen or e in candidates:
      continue
    seen.append(e)
    count = middle_part.count(e)
    if count >= lg_third:
      candidates.append(e)
    else:
      left_part_count = left_part.count(e)
      print "Middle: counting %s in left: %s" % (e, left_part_count)
      if left_part_count >= sm_third:
        count = count + left_part_count
      right_part_count = right_part.count(e)
      print "Middle: counting %s in right: %s" % (e, right_part_count)
      if right_part_count >= sm_third:
        count = count + right_part_count
      if count >= lg_third:
        candidates.append(e)

  seen = []
  for e in right_candidates:
    if e in seen or e in candidates:
      continue
    seen.append(e)
    count = right_part.count(e)
    if count >= lg_third:
      candidates.append(e)
    else:
      left_part_count = left_part.count(e)
      print "Right: counting %s in left: %s" % (e, left_part_count)
      if left_part_count >= sm_third:
        count = count + left_part_count
      middle_part_count = middle_part.count(e)
      print "Right: counting %s in middle: %s" % (e, middle_part_count)
      if middle_part_count >= sm_third:
        count = count + middle_part_count
      if count >= lg_third:
        candidates.append(e)
  print "l, r, candidates: %s, %s, %s\n" % (l, r, candidates)
  return candidates


#A = [1, 1, 2, 4, 5]
#A = [1, 2, 3, 1, 2, 3, 1, 2, 3]
#A = [1, 1, 1, 1, 1, 2, 2, 2, 2, 3]
A = [2, 2, 1, 3, 3, 1, 4, 4, 1]
#A = [x for x in range(1, 13)] + [0] * 6
print f(A, 0, len(A) - 1)

分而治之策略来确定列表中是否有超过 1/3 的相同元素

Divide and Conquer strategy to determine if more than 1/3 same element in list

python

algorithm

divide-and-conquer