How to find complex contours in images (subtitle text range in video file) with pyopencv?

299 Views Asked by At

Trying to detect the subtitle range in films like this. enter image description here

import cv2 
import numpy as np
from IPython.display import display



image = cv2.imread('org.jpeg')
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
blur = cv2.GaussianBlur(gray, (9,9), 1)
edges = cv2.Canny(blur,100,500) 



img = cv2.cvtColor(edges, cv2.COLOR_BGR2RGB) # Converting BGR to RGB

display(Image.fromarray(img))

enter image description here

Be aware that the subtitle range contains many egdes in a small region, hoping to find a way to find these region.

What I have tried:

kernel =np.ones((20,20),np.float32)/20**2
filter2d = cv2.filter2D(edges,-1,kernel=kernel)


##using np.std to check the edge matrix, hard to find proper  threshold value in varies of different scenes.
kernel = cv2.getStructuringElement(cv2.MORPH_CROSS, (3,3))  
dilated = cv2.dilate(edges, kernel, iterations=9) 
## Got lots of  surplus regions.

These regions contains much more edges than others, since the subtitle font is embedded in the scene.

Is there any better way to detect these subtitle region?

Any cv filter2D for detecting high entropy parts?

Or prebulid convNetwork for these jobs?

No need to OCR these subtitiles, just find contours.

UPDATE!

Opencv EAST trial, Not as expected.

from imutils.object_detection import non_max_suppression

import numpy as np

import argparse
import time
import cv2
 
# construct the argument parser and parse the arguments
width=320
height=320
min_confidence=0.5
modelpath="frozen_east_text_detection.pb"
imagepath="org.jpeg"
# load the input image and grab the image dimensions
image = cv2.imread(imagepath)
orig = image.copy()
(H, W) = image.shape[:2]
 
# set the new width and height and then determine the ratio in change
# for both the width and height
(newW, newH) = (width,height)
rW = W / float(newW)
rH = H / float(newH)
 
# resize the image and grab the new image dimensions
image = cv2.resize(image, (newW, newH))
(H, W) = image.shape[:2]
 
# define the two output layer names for the EAST detector model that
# we are interested -- the first is the output probabilities and the
# second can be used to derive the bounding box coordinates of text
layerNames = [
    "feature_fusion/Conv_7/Sigmoid",
    "feature_fusion/concat_3"]
 
# load the pre-trained EAST text detector
print("[INFO] loading EAST text detector...")
net = cv2.dnn.readNet(modelpath)
 
# construct a blob from the image and then perform a forward pass of
# the model to obtain the two output layer sets
blob = cv2.dnn.blobFromImage(image, 1.0, (W, H),
    (123.68, 116.78, 103.94), swapRB=True, crop=False)
start = time.time()
net.setInput(blob)
(scores, geometry) = net.forward(layerNames)
end = time.time()
 
# show timing information on text prediction
print("[INFO] text detection took {:.6f} seconds".format(end - start))
 
# grab the number of rows and columns from the scores volume, then
# initialize our set of bounding box rectangles and corresponding
# confidence scores
(numRows, numCols) = scores.shape[2:4]
rects = []
confidences = []
 
# loop over the number of rows
for y in range(0, numRows):
    # extract the scores (probabilities), followed by the geometrical
    # data used to derive potential bounding box coordinates that
    # surround text
    scoresData = scores[0, 0, y]
    xData0 = geometry[0, 0, y]
    xData1 = geometry[0, 1, y]
    xData2 = geometry[0, 2, y]
    xData3 = geometry[0, 3, y]
    anglesData = geometry[0, 4, y]
 
    # loop over the number of columns
    for x in range(0, numCols):
        # if our score does not have sufficient probability, ignore it
        if scoresData[x] < min_confidence:
            continue
 
        # compute the offset factor as our resulting feature maps will
        # be 4x smaller than the input image
        (offsetX, offsetY) = (x * 4.0, y * 4.0)
 
        # extract the rotation angle for the prediction and then
        # compute the sin and cosine
        angle = anglesData[x]
        cos = np.cos(angle)
        sin = np.sin(angle)
 
        # use the geometry volume to derive the width and height of
        # the bounding box
        h = xData0[x] + xData2[x]
        w = xData1[x] + xData3[x]
 
        # compute both the starting and ending (x, y)-coordinates for
        # the text prediction bounding box
        endX = int(offsetX + (cos * xData1[x]) + (sin * xData2[x]))
        endY = int(offsetY - (sin * xData1[x]) + (cos * xData2[x]))
        startX = int(endX - w)
        startY = int(endY - h)
 
        # add the bounding box coordinates and probability score to
        # our respective lists
        rects.append((startX, startY, endX, endY))
        confidences.append(scoresData[x])
 
# apply non-maxima suppression to suppress weak, overlapping bounding

# boxes

boxes = non_max_suppression(np.array(rects), probs=confidences)
 
# loop over the bounding boxes
for (startX, startY, endX, endY) in boxes:
    # scale the bounding box coordinates based on the respective
    # ratios
    startX = int(startX * rW)
    startY = int(startY * rH)
    endX = int(endX * rW)
    endY = int(endY * rH)
 
    # draw the bounding box on the image
    cv2.rectangle(orig, (startX, startY), (endX, endY), (0, 0, 255), 2)
 
# show the output image
#cv2.imshow("Text Detection", orig)
#cv2.waitKey(0)



img = cv2.cvtColor(orig, cv2.COLOR_BGR2RGB) # Converting BGR to RGB
display(Image.fromarray(img))


enter image description here

1

There are 1 best solutions below

2
On

Done by myself.

It turns out that lower level functions will meet my needs.

1.Adding ones matrix conv-filter on the egde above.

2.Threshold detecting.

3.Erode & dilate.

4.FindContours & drawbox.

        kernel =np.ones((50,50),np.float32)/50**2

        filter2d = cv2.filter2D(edges,-1,kernel=kernel)


        nf=np.array(filter2d/np.max(filter2d)*255,np.uint8)

        ret,threst1=cv2.threshold(nf,100,255,cv2.THRESH_BINARY)
        #img = cv2.cvtColor(threst1, cv2.COLOR_BGR2RGB) # Converting BGR to RGB
        #display(Image.fromarray(img))

        kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (20,20))  
        eroded= cv2.erode(threst1, kernel, iterations=1) 
        kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (50,20))  
        dilated = cv2.dilate(eroded, kernel, iterations=1) 

       # img = cv2.cvtColor(dilated, cv2.COLOR_BGR2RGB) # Converting BGR to RGB
        #display(Image.fromarray(img))

        contours, hierarchy = cv2.findContours(dilated,cv2.RETR_EXTERNAL,cv2.CHAIN_APPROX_SIMPLE)  #寻找轮廓
        maxi=0
        maxarea=0
        for j in range(len(contours)):
            #rect = cv2.minAreaRect(c)
            area = cv2.contourArea(contours[j])
            if area>maxarea:
                maxi=j
                maxarea=area
            else:
                pass