OpenCV中如何正确使用CSRT Tracker跟踪物体

How to use the CSRT Tracker correctly to track objects in OpenCV

我尝试使用 OpenCv V4.5.1 中的 CSRT 跟踪器来跟踪视频序列中的面部,在 一些 视频的结尾我得到了这个我不明白为什么会发生错误!

Note: wen I used KCF Tracker things worked perfectly! tracker = cv2.TrackerKCF_create()

---> 12     tracker.init(frame, myBox)
error: OpenCV(4.5.1) C:\Users\appveyor\AppData\Local\Temp\pip-req-build-r2ue8w6k\opencv\modules\core\src\matrix.cpp:811:
error: (-215:Assertion failed) 
0 <= roi.x && 0 <= roi.width && roi.x + roi.width <= m.cols && 0 <= roi.y && 0 <= roi.height && roi.y + roi.height <= m.rows 
in function 'cv::Mat::Mat'

我用来使用跟踪器的代码是:

def tracking(frame, bbox):
    """
    Parameters:
    @param: frame: nd-array frame from video sequence.
    @param: bbox: bounding box
    """
    [x0, y0, x1, y1] = bbox
    myBox = (x0, y0, x1, y1)
    tracker = cv2.TrackerCSRT_create()

    # Initialize tracker with first frame and bounding box
    tracker.init(frame, myBox)
    # Update tracker
    ok, box = tracker.update(frame)
    if ok:
        [x0, x1] = [x0, x1] if x1>x0 else [x1, x0]
        [y0, y1] = [y0, y1] if y1>y0 else [y1, y0]
        result = [x0, y0, x1, y1]
        return result
    print("tracking No result ", bbox)
    return bbox

调用跟踪的函数是:

def violaJones(xmlPath, videoPath, verbose=False):
    """
    Parameters:
    @param: xmlPath: string, path to the Haar_Cascade xml file.
    @param: videoPath: string, Video Full Path.
    @param: verbose
    """
    # print(videoPath)
    cap = cv2.VideoCapture(videoPath)
    bboxes = {}
    nb_frame = 0
    # Blue color in BGR 
    color = [(255, 128, 128),(128, 255, 128),(128, 128, 255), (128,255,255),(255,128,255), (255,255,128)]
    # Line thickness of 2 px 
    thickness = 2
    # max numer of people
    maxID = 0
    track = [None]*10
    # Read until video is completed
    while (cap.isOpened()):
        
        # Capture frame-by-frame
        ret, frame = cap.read()
        if ret:
            # 
            gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
            gauss = cv2.GaussianBlur(gray, (3, 3), 0)
            # gauss = skinThresh(frame)
            # Cascade Classifier
            face_cascade = cv2.CascadeClassifier(xmlPath)
            detected_faces = face_cascade.detectMultiScale(gauss.astype(np.uint8), 1.2, 4)
            id = 0
            for (x0,y0,w,h) in detected_faces:
                id += 1
                maxID = maxID if maxID > id else id
                x1 = x0 + w
                y1 = y0 + h
                # Tracking
                track[id-1] = tracking(frame, [x0, y0, x1, y1])

            for ID in range(maxID):
                if track[ID] is not None:
                    bboxes[str(nb_frame)+str(ID+1)] = np.array(tracking(frame, track[ID]))
                    # Draw Annotations
                    if verbose:
                        [xx0, yy0, xx1, yy1] = track[ID]
                        cv2.rectangle(frame, (xx0,yy0),(xx1,yy1), color[ID], thickness)
            # show
            if verbose:
                cv2.imshow("Viola_Jones", frame)
                key = cv2.waitKey(1) & 0xFF
        # Break the loop
        else:
            break
        nb_frame +=1
    # Closes all the frames
    cv2.destroyAllWindows()
    # When everything done, release the video capture object
    cap.release()
    return bboxes

错误前打印函数的最后值是:

x0 y0 x1 y1 Frame Shape
574 46 634 106 (450, 720, 3)
600 35 663 98 (450, 720, 3)
600 35 663 98 (450, 720, 3)
600 35 663 98 (450, 720, 3)
600 35 663 98

解决方案是为每个对象声明一个跟踪器,并如下启动该跟踪器一次:

追踪功能

def tracking(frame, bbox, tracker, Init):
    """
    Parameters:
    @param: frame: nd-array frame from video sequence.
    @param: bbox: bounding box
    @param: tracker: tracker from OpenCV such as KCF or CSRT
    """
    [x0, y0, x1, y1] = bbox
    myBox = (x0, y0, x1, y1)
    if(not Init):
        # Initialize tracker with first frame and bounding box
        tracker.init(frame, myBox)
        Init = True
    # Update tracker
    ok, box = tracker.update(frame)
    if ok:
        [x0, x1] = [x0, x1] if x1>x0 else [x1, x0]
        [y0, y1] = [y0, y1] if y1>y0 else [y1, y0]
        result = [x0, y0, x1, y1]
        return result
    return bbox

维奥拉-琼斯

def violaJones(xmlPath, videoPath, verbose=False):
    """
    Parameters:
    @param: xmlPath: string, path to the Haar_Cascade xml file.
    @param: videoPath: string, Video Full Path.
    @param: verbose, bolean to visualize the results.
    """
    # print(videoPath)
    cap = cv2.VideoCapture(videoPath)
    bboxes = {}
    nb_frame = 0
    # Blue color in BGR 
    color = [(255, 128, 128),(128, 255, 128),(128, 128, 255), (128,255,255),(255,128,255), (255,255,128)]
    # Line thickness of 2 px 
    thickness = 2
    # max numer of people
    maxID = 0
    track = [None]*10

    trackInit = [False]*5

    tracker1 = cv2.TrackerKCF_create()
    tracker2 = cv2.TrackerKCF_create()
    tracker3 = cv2.TrackerKCF_create()
    tracker4 = cv2.TrackerKCF_create()
    tracker5 = cv2.TrackerKCF_create()

    trackers = [tracker1, tracker2, tracker3, tracker4, tracker5]
    # tracker1 = cv2.TrackerCSRT_create()
    # tracker2 = cv2.TrackerKCF_create()

    # Read until video is completed
    while (cap.isOpened()):
        
        # Capture frame-by-frame
        ret, frame = cap.read()
        if ret:
            # 
            # gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
            # gauss = cv2.GaussianBlur(gray, (3, 3), 0)
            gauss = skinThresh(frame)
            # Cascade Classifier
            face_cascade = cv2.CascadeClassifier(xmlPath)
            detected_faces = face_cascade.detectMultiScale(gauss, 1.2, 4)
            id = 0
            for (x0,y0,w,h) in detected_faces:
                id += 1
                maxID = maxID if maxID > id else id
                x1 = x0 + w
                y1 = y0 + h
                # Detection
                track[id-1] = [x0, y0, x1, y1]

            for ID in range(maxID):
                #Tracking
                if track[ID] is not None:
                    bboxes[str(nb_frame)+str(ID+1)] = np.array(tracking(frame, track[ID], trackers[ID], trackInit[ID]))
                    trackInit[ID] = True
                    # Draw Annotations
                    if verbose:
                        [xx0, yy0, xx1, yy1] = track[ID]
                        cv2.rectangle(frame, (xx0,yy0),(xx1,yy1), color[ID], thickness)
            # show
            if verbose:
                cv2.imshow("Viola_Jones", frame)
                key = cv2.waitKey(1) & 0xFF
        # Break the loop
        else:
            break
        nb_frame +=1
    # When everything done, release the video capture object
    cap.release()
    # Closes all the frames
    cv2.destroyAllWindows()
    return bboxes