首页 \ 问答 \ 为MNIST OCR预处理图像(Preprocessing an image for MNIST OCR)

为MNIST OCR预处理图像(Preprocessing an image for MNIST OCR)

 我正忙于在python中使用OCR应用程序来读取数字。 我正在使用OpenCV查找图像上的轮廓，裁剪它，然后将图像预处理为28x28以获取MNIST数据集。 我的图像不是方形的，所以当我调整图像大小时，我似乎失去了很多质量。 我可以尝试任何提示或建议吗？  
 这是原始图像  
 这是在编辑之后  
 这应该是它的质量  
 我尝试过http://opencv-python-tutroals.readthedocs.io/en/latest/py_tutorials/py_imgproc/py_morphological_ops/py_morphological_ops.html中的一些技巧，比如Dilation和Opening。 但它并没有让它变得更好，只会让它变得模糊......  
 这是我使用的代码（找到轮廓，裁剪它，调整它，然后阈值，然后我居中）  
import numpy as np
import cv2
import imutils
import scipy
from imutils.perspective import four_point_transform
from scipy import ndimage

images = np.zeros((4, 784))
correct_vals = np.zeros((4, 10))

i = 0


def getBestShift(img):
    cy, cx = ndimage.measurements.center_of_mass(img)

    rows, cols = img.shape
    shiftx = np.round(cols / 2.0 - cx).astype(int)
    shifty = np.round(rows / 2.0 - cy).astype(int)

    return shiftx, shifty


def shift(img, sx, sy):
    rows, cols = img.shape
    M = np.float32([[1, 0, sx], [0, 1, sy]])
    shifted = cv2.warpAffine(img, M, (cols, rows))
    return shifted


for no in [1, 3, 4, 5]:
    image = cv2.imread("images/" + str(no) + ".jpg")
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    blurred = cv2.GaussianBlur(gray, (5, 5), 0)
    edged = cv2.Canny(blurred, 50, 200, 255)

    cnts = cv2.findContours(edged.copy(), cv2.RETR_EXTERNAL,
                            cv2.CHAIN_APPROX_SIMPLE)
    cnts = cnts[0] if imutils.is_cv2() else cnts[1]
    cnts = sorted(cnts, key=cv2.contourArea, reverse=True)
    displayCnt = None

    for c in cnts:
        # approximate the contour
        peri = cv2.arcLength(c, True)
        approx = cv2.approxPolyDP(c, 0.02 * peri, True)

        # if the contour has four vertices, then we have found
        # the thermostat display
        if len(approx) == 4:
            displayCnt = approx
            break

    warped = four_point_transform(gray, displayCnt.reshape(4, 2))
    gray = cv2.resize(255 - warped, (28, 28))
    (thresh, gray) = cv2.threshold(gray, 128, 255, cv2.THRESH_BINARY |     cv2.THRESH_OTSU)


    while np.sum(gray[0]) == 0:
        gray = gray[1:]

    while np.sum(gray[:, 0]) == 0:
        gray = np.delete(gray, 0, 1)

    while np.sum(gray[-1]) == 0:
        gray = gray[:-1]

    while np.sum(gray[:, -1]) == 0:
        gray = np.delete(gray, -1, 1)

    rows, cols = gray.shape

    if rows > cols:
        factor = 20.0 / rows
        rows = 20
        cols = int(round(cols * factor))
        gray = cv2.resize(gray, (cols, rows))

    else:
        factor = 20.0 / cols
        cols = 20
        rows = int(round(rows * factor))
        gray = cv2.resize(gray, (cols, rows))

    colsPadding = (int(np.math.ceil((28 - cols) / 2.0)), int(np.math.floor((28 - cols) / 2.0)))
    rowsPadding = (int(np.math.ceil((28 - rows) / 2.0)), int(np.math.floor((28 - rows) / 2.0)))
    gray = np.lib.pad(gray, (rowsPadding, colsPadding), 'constant')

    shiftx, shifty = getBestShift(gray)
    shifted = shift(gray, shiftx, shifty)
    gray = shifted

    cv2.imwrite("processed/" + str(no) + ".png", gray)
    cv2.imshow("imgs", gray)
    cv2.waitKey(0)

I'm busy with an OCR application in python to read digits. I'm using OpenCV to find the contours on an image, crop it, and then preprocess the image to 28x28 for the MNIST dataset. My images are not square, so I seem to lose a lot of quality when I resize the image. Any tips or suggestions I could try? 
This is the original image 
This is after editing it 
And this is the quality it should be 
I've tried some tricks from http://opencv-python-tutroals.readthedocs.io/en/latest/py_tutorials/py_imgproc/py_morphological_ops/py_morphological_ops.html , like Dilation and Opening. But it doesnt make it better, it only makes it vague... 
This it the code im using (find contour,crop it, resize it, then threshold, and then i center it) 
import numpy as np
import cv2
import imutils
import scipy
from imutils.perspective import four_point_transform
from scipy import ndimage

images = np.zeros((4, 784))
correct_vals = np.zeros((4, 10))

i = 0


def getBestShift(img):
    cy, cx = ndimage.measurements.center_of_mass(img)

    rows, cols = img.shape
    shiftx = np.round(cols / 2.0 - cx).astype(int)
    shifty = np.round(rows / 2.0 - cy).astype(int)

    return shiftx, shifty


def shift(img, sx, sy):
    rows, cols = img.shape
    M = np.float32([[1, 0, sx], [0, 1, sy]])
    shifted = cv2.warpAffine(img, M, (cols, rows))
    return shifted


for no in [1, 3, 4, 5]:
    image = cv2.imread("images/" + str(no) + ".jpg")
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    blurred = cv2.GaussianBlur(gray, (5, 5), 0)
    edged = cv2.Canny(blurred, 50, 200, 255)

    cnts = cv2.findContours(edged.copy(), cv2.RETR_EXTERNAL,
                            cv2.CHAIN_APPROX_SIMPLE)
    cnts = cnts[0] if imutils.is_cv2() else cnts[1]
    cnts = sorted(cnts, key=cv2.contourArea, reverse=True)
    displayCnt = None

    for c in cnts:
        # approximate the contour
        peri = cv2.arcLength(c, True)
        approx = cv2.approxPolyDP(c, 0.02 * peri, True)

        # if the contour has four vertices, then we have found
        # the thermostat display
        if len(approx) == 4:
            displayCnt = approx
            break

    warped = four_point_transform(gray, displayCnt.reshape(4, 2))
    gray = cv2.resize(255 - warped, (28, 28))
    (thresh, gray) = cv2.threshold(gray, 128, 255, cv2.THRESH_BINARY |     cv2.THRESH_OTSU)


    while np.sum(gray[0]) == 0:
        gray = gray[1:]

    while np.sum(gray[:, 0]) == 0:
        gray = np.delete(gray, 0, 1)

    while np.sum(gray[-1]) == 0:
        gray = gray[:-1]

    while np.sum(gray[:, -1]) == 0:
        gray = np.delete(gray, -1, 1)

    rows, cols = gray.shape

    if rows > cols:
        factor = 20.0 / rows
        rows = 20
        cols = int(round(cols * factor))
        gray = cv2.resize(gray, (cols, rows))

    else:
        factor = 20.0 / cols
        cols = 20
        rows = int(round(rows * factor))
        gray = cv2.resize(gray, (cols, rows))

    colsPadding = (int(np.math.ceil((28 - cols) / 2.0)), int(np.math.floor((28 - cols) / 2.0)))
    rowsPadding = (int(np.math.ceil((28 - rows) / 2.0)), int(np.math.floor((28 - rows) / 2.0)))
    gray = np.lib.pad(gray, (rowsPadding, colsPadding), 'constant')

    shiftx, shifty = getBestShift(gray)
    shifted = shift(gray, shiftx, shifty)
    gray = shifted

    cv2.imwrite("processed/" + str(no) + ".png", gray)
    cv2.imshow("imgs", gray)
    cv2.waitKey(0)

原文：https://stackoverflow.com/questions/49796710

更新时间：2021-07-12 20:07

最满意答案

 听起来你可能正在使用自动生成的字段。 在设计视图中，单击gridview上的智能标记，然后单击“编辑列”。 然后取消选中“自动生成字段”复选框。 如果我理解正确的话，我认为这应该可以解决你的问题。 

Sounds like you might be using Auto-generated fields. In design view, click the smart tag on the gridview and click "edit columns." Then uncheck the checkbox that says "Auto-generate fields." I think this should fix your problem if I am understanding you correctly.

为MNIST OCR预处理图像(Preprocessing an image for MNIST OCR)

最满意答案

相关问答

动态创建GridView(Dynamically Creating GridView)[2022-06-25]

来自checkboxlist的Gridview(Gridview from checkboxlist)[2023-01-20]

如何格式化动态添加的GridView页脚列(How do I format dynamically added footer columns of GridView)[2022-09-28]

ASP.NET gridview - 如何将动态填充的下拉列表添加到动态绑定的gridview(ASP.NET gridview - how to add dynamically populated dropdown to dynamically bound gridview)[2021-08-09]

如何动态编辑gridview数据？(How can I dynamically edit gridview data?)[2023-10-22]

在GridView中从动态添加的模板字段中找到id的控件(find a control by id from Dynamically added Template field in GridView)[2022-07-12]

从GridView Footer行访问动态添加的控件(Accessing dynamically added control from GridView Footer row)[2022-12-04]

动态更新gridview。(Dynamically update gridview.)[2021-08-13]

在gridview中发布数据，我在那里动态添加了控件(Posted data in gridview where I was added control dynamically)[2022-04-06]

动态添加命令按钮到GridView(Dynamically adding a command button to a GridView)[2022-02-27]

相关文章

最新问答