Différences entre versions de « Projets:Machine a lire IA »
De wikilab
| (2 versions intermédiaires par 2 utilisateurs non affichées) | |||
| Ligne 4 : | Ligne 4 : | ||
== Cahier des charges == | == Cahier des charges == | ||
| − | |||
| − | |||
| − | |||
| − | |||
| − | |||
| − | |||
| − | |||
| − | |||
| − | |||
| − | |||
| − | |||
| − | |||
| − | |||
| − | |||
| − | |||
| − | |||
| − | |||
| − | |||
| − | |||
| − | |||
| − | |||
| − | |||
| − | |||
== Analyse de l'existant == | == Analyse de l'existant == | ||
| Ligne 209 : | Ligne 186 : | ||
==Journal de bord== | ==Journal de bord== | ||
| + | |||
[[Category:Projets]] | [[Category:Projets]] | ||
[[Category:En cours]] | [[Category:En cours]] | ||
| + | [[Category:Malvoyance]] | ||
Version actuelle datée du 22 octobre 2021 à 17:31
Description du projet
Le but est de créer une petite machine à lire portable capable d’acquérir le texte à partir d'une capture d'image et de le lire au moyen d’une synthèse vocale.
Cahier des charges
Analyse de l'existant
Équipe (Porteur de projet et contributeurs)
- Porteurs du projet : François LB
- Concepteurs/contributeurs : Mickaël Le Cabellec
- Animateur (coordinateur du projet) :
- Fabmanager référent :
- Responsable de documentation
Matériel nécessaire
Outils nécessaires
Coût
Délai estimé
Fichiers source
##Loading the necessary packages
import cv2
import numpy as np
import pytesseract
from imutils.object_detection import non_max_suppression
from matplotlib import pyplot as plt
# Creating argument dictionary for the default arguments needed in the code.
args = {"image": "../input/text-detection/example-images/Example-images/ex24.jpg",
"east": "../input/text-detection/east_text_detection.pb", "min_confidence": 0.5, "width": 320, "height": 320}
# Give location of the image to be read.
# "Example-images/ex24.jpg" image is being loaded here.
cv2.namedWindow("preview")
vc = cv2.VideoCapture(0)
if vc.isOpened(): # try to get the first frame
rval, frame = vc.read()
else:
rval = False
while rval:
cv2.imshow("preview", frame)
rval, frame = vc.read()
key = cv2.waitKey(20)
if key == 27: # exit on ESC
break
cv2.destroyWindow("preview")
args['image'] = "../input/text-detection/example-images/Example-images/ex24.jpg"
image = cv2.imread(args['image'])
# Saving a original image and shape
orig = image.copy()
(origH, origW) = image.shape[:2]
# set the new height and width to default 320 by using args #dictionary.
(newW, newH) = (args["width"], args["height"])
# Calculate the ratio between original and new image for both height and weight.
# This ratio will be used to translate bounding box location on the original image.
rW = origW / float(newW)
rH = origH / float(newH)
# resize the original image to new dimensions
image = cv2.resize(image, (newW, newH))
(H, W) = image.shape[:2]
# construct a blob from the image to forward pass it to EAST model
blob = cv2.dnn.blobFromImage(image, 1.0, (W, H),
(123.68, 116.78, 103.94), swapRB=True, crop=False)
# load the pre-trained EAST model for text detection
net = cv2.dnn.readNet(args["east"])
# We would like to get two outputs from the EAST model.
# 1. Probabilty scores for the region whether that contains text or not.
# 2. Geometry of the text -- Coordinates of the bounding box detecting a text
# The following two layer need to pulled from EAST model for achieving this.
layerNames = [
"feature_fusion/Conv_7/Sigmoid",
"feature_fusion/concat_3"]
# Forward pass the blob from the image to get the desired output layers
net.setInput(blob)
(scores, geometry) = net.forward(layerNames)
## Returns a bounding box and probability score if it is more than minimum confidence
def predictions(prob_score, geo):
(numR, numC) = prob_score.shape[2:4]
boxes = []
confidence_val = []
# loop over rows
for y in range(0, numR):
scoresData = prob_score[0, 0, y]
x0 = geo[0, 0, y]
x1 = geo[0, 1, y]
x2 = geo[0, 2, y]
x3 = geo[0, 3, y]
anglesData = geo[0, 4, y]
# loop over the number of columns
for i in range(0, numC):
if scoresData[i] < args["min_confidence"]:
continue
(offX, offY) = (i * 4.0, y * 4.0)
# extracting the rotation angle for the prediction and computing the sine and cosine
angle = anglesData[i]
cos = np.cos(angle)
sin = np.sin(angle)
# using the geo volume to get the dimensions of the bounding box
h = x0[i] + x2[i]
w = x1[i] + x3[i]
# compute start and end for the text pred bbox
endX = int(offX + (cos * x1[i]) + (sin * x2[i]))
endY = int(offY - (sin * x1[i]) + (cos * x2[i]))
startX = int(endX - w)
startY = int(endY - h)
boxes.append((startX, startY, endX, endY))
confidence_val.append(scoresData[i])
# return bounding boxes and associated confidence_val
return (boxes, confidence_val)
# Find predictions and apply non-maxima suppression
(boxes, confidence_val) = predictions(scores, geometry)
boxes = non_max_suppression(np.array(boxes), probs=confidence_val)
for (startX, startY, endX, endY) in boxes:
# scale the coordinates based on the respective ratios in order to reflect bounding box on the original image
startX = int(startX * rW)
startY = int(startY * rH)
endX = int(endX * rW)
endY = int(endY * rH)
# extract the region of interest
r = orig[startY:endY, startX:endX]
# configuration setting to convert image to string.
configuration = ("-l eng --oem 1 --psm 8")
##This will recognize the text from the image of bounding box
text = pytesseract.image_to_string(r, config=configuration)
# append bbox coordinate and associated text to the list of results
results.append(((startX, startY, endX, endY), text))
# Display the image with bounding box and recognized text
orig_image = orig.copy()
# Moving over the results and display on the image
for ((start_X, start_Y, end_X, end_Y), text) in results:
# display the text detected by Tesseract
print("{}\n".format(text))
# Displaying text
text = "".join([x if ord(x) < 128 else "" for x in text]).strip()
cv2.rectangle(orig_image, (start_X, start_Y), (end_X, end_Y),
(0, 0, 255), 2)
cv2.putText(orig_image, text, (start_X, start_Y - 30),
cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 255), 2)
plt.imshow(orig_image)
plt.title('Output')
plt.show()