Skip to content
Snippets Groups Projects
Commit ac9a86c3 authored by Yonatan's avatar Yonatan
Browse files

Fully moved to OpenCV as image processing library

parent f6285b67
No related branches found
No related tags found
1 merge request!1Final merge
......@@ -3,7 +3,6 @@ package es.yonatan.tfg;
import es.yonatan.tfg.grouper.SimpleUnitGrouper;
import es.yonatan.tfg.image.OpenCVPostProcessor;
import es.yonatan.tfg.model.UnitGroup;
import es.yonatan.tfg.recognizer.color.LeptonicaColorRecognizer;
import es.yonatan.tfg.recognizer.TesseractRecognizer;
import es.yonatan.tfg.translator.DeeplTranslator;
import net.sourceforge.tess4j.util.ImageHelper;
......@@ -17,7 +16,7 @@ import java.io.IOException;
public class Main {
public static void main(String[] args) throws IOException {
var file = new File("C:/Proyectos/TFG/src/main/resources/04.jpg");
var file = new File("C:/Proyectos/TFG/src/main/resources/test_img_tfg.png");
var time = System.currentTimeMillis();
//TODO: 10/04/2023 transform originalImage for best Tesseract OCR results: grayscale, binarize, noise reduction, etc.
......@@ -37,18 +36,19 @@ public class Main {
var merger = new SimpleUnitGrouper();
var groups = merger.group(words, image.getWidth(), image.getHeight(), 4, 4);
var groups = merger.group(words, image.getWidth(), image.getHeight(), 4, 4, 25, -5);
System.out.println(System.currentTimeMillis() - time);
var translated = new DeeplTranslator<UnitGroup>("secret").translate(groups, null, "en-US");
var translated = new DeeplTranslator<UnitGroup>("f6542fe3-8e30-5cd2-064e-5eb18c7dbbf3:fx").translate(groups, null, "es");
new OpenCVPostProcessor().processImage(file, groups);
time = System.currentTimeMillis();
// basic GUI to render the bounding boxes resulting of the OCR recognition as well as testing the word merge algorithm
var processedImage = new OpenCVPostProcessor().processImage(file, groups);
System.out.println("OpenCV time: " + (System.currentTimeMillis() - time));
var colors = new LeptonicaColorRecognizer();
colors.load(file);
// basic GUI to render the bounding boxes resulting of the OCR recognition as well as testing the word merge algorithm
var scale = 1.5;
var window = new JFrame();
......@@ -64,24 +64,20 @@ public class Main {
var time = System.currentTimeMillis();
g.drawImage(originalImage.getScaledInstance((int) (originalImage.getWidth() / scale), (int) (originalImage.getHeight() / scale), BufferedImage.SCALE_SMOOTH), 0, 0, null);
g.drawImage(processedImage.getScaledInstance((int) (originalImage.getWidth() / scale), (int) (originalImage.getHeight() / scale), BufferedImage.SCALE_SMOOTH), 0, 0, null);
var g2d = ((Graphics2D) g);
g2d.setStroke(new BasicStroke(1.5f));
g2d.setFont(new Font(Font.SANS_SERIF, Font.PLAIN, 15));
g2d.setFont(new Font(Font.MONOSPACED, Font.BOLD, 15));
for (UnitGroup group : groups) {
var boundingBox = group.bounds();
var color = colors.getColorAt(boundingBox.x(), boundingBox.y(), boundingBox.width(), boundingBox.height());
boundingBox = boundingBox.withTolerance(2, 2);
g2d.setColor(color.getBgColor());
g2d.fillRect((int) (boundingBox.x() / scale), (int) (boundingBox.y() / scale), (int) (boundingBox.width() / scale), (int) (boundingBox.height() / scale));
//g2d.setColor(Color.red);
//g2d.drawRect((int) (boundingBox.x() / scale), (int) (boundingBox.y() / scale), (int) (boundingBox.width() / scale), (int) (boundingBox.height() / scale));
g2d.setColor(color.getColor());
g2d.setColor(Color.black);
g2d.drawString(group.getTranslation(), (int) (boundingBox.x() / scale), (int) ((boundingBox.y() + boundingBox.height() * 0.75) / scale));
}
......
......@@ -9,9 +9,8 @@ import java.util.List;
public interface IUnitGrouper<In extends Unit, Out extends UnitGroup> {
default @NotNull List<Out> group(@NotNull Collection<In> units, int width, int height, int cellCount) {
return group(units, width, height, cellCount, cellCount);
}
@NotNull List<Out> group(@NotNull Collection<In> units, int width, int height, int horizontalCellCount, int verticalCellCount);
@NotNull List<Out> group(@NotNull Collection<In> units, int width, int height, int horizontalCellCount, int verticalCellCount, int horizontalTolerance, int verticalTolerance);
}
......@@ -19,6 +19,11 @@ public class SimpleUnitGrouper implements IUnitGrouper<Unit, UnitGroup> {
@Override
public @NotNull List<UnitGroup> group(@NotNull Collection<Unit> units, int width, int height, int horizontalCellCount, int verticalCellCount) {
return group(units, width, height, horizontalCellCount, verticalCellCount, 0, 0);
}
@Override
public @NotNull List<UnitGroup> group(@NotNull Collection<Unit> units, int width, int height, int horizontalCellCount, int verticalCellCount, int horizontalTolerance, int verticalTolerance) {
// ensure no previously added items are still in the grid
var grid = new HashGrid2D<>(width, height, horizontalCellCount, verticalCellCount);
......@@ -33,7 +38,7 @@ public class SimpleUnitGrouper implements IUnitGrouper<Unit, UnitGroup> {
checked.add(unit);
grid.remove(unit);
var colliders = grid.getColliding(unit, 20, -5);
var colliders = grid.getColliding(unit, horizontalTolerance, verticalTolerance);
var group = new UnitGroup();
group.addUnit(unit, false);
......
......@@ -2,11 +2,13 @@ package es.yonatan.tfg.image;
import es.yonatan.tfg.model.HasBoundingBox;
import org.jetbrains.annotations.NotNull;
import org.jetbrains.annotations.Nullable;
import java.awt.image.BufferedImage;
import java.io.File;
import java.util.Collection;
public interface IImagePostProcessor {
void processImage(@NotNull File imgFile, @NotNull Collection<? extends HasBoundingBox> units);
@Nullable BufferedImage processImage(@NotNull File imgFile, @NotNull Collection<? extends HasBoundingBox> units);
}
......@@ -2,6 +2,7 @@ package es.yonatan.tfg.image;
import es.yonatan.tfg.model.HasBoundingBox;
import org.jetbrains.annotations.NotNull;
import org.jetbrains.annotations.Nullable;
import org.opencv.core.CvType;
import org.opencv.core.Mat;
import org.opencv.core.Scalar;
......@@ -9,6 +10,8 @@ import org.opencv.imgcodecs.Imgcodecs;
import org.opencv.imgproc.Imgproc;
import org.opencv.photo.Photo;
import java.awt.image.BufferedImage;
import java.awt.image.DataBufferByte;
import java.io.File;
import java.util.Collection;
......@@ -20,24 +23,31 @@ public class OpenCVPostProcessor implements IImagePostProcessor{
}
@Override
public void processImage(@NotNull File imgFile, @NotNull Collection<? extends HasBoundingBox> units) {
public @Nullable BufferedImage processImage(@NotNull File imgFile, @NotNull Collection<? extends HasBoundingBox> units) {
var img = Imgcodecs.imread(imgFile.getAbsolutePath());
var textMask = createTextMask(img, units);
Photo.inpaint(img, textMask, img, 1, Photo.INPAINT_NS);
Photo.inpaint(img, textMask, img, 0, Photo.INPAINT_NS);
Imgcodecs.imwrite(new File(imgFile.getParent(), "test_mask.jpg").getAbsolutePath(), textMask);
Imgcodecs.imwrite(new File(imgFile.getParent(), "test.jpg").getAbsolutePath(), img);
return matToBufferedImage(img);
}
private @NotNull Mat createTextMask(@NotNull Mat srcImg, @NotNull Collection<? extends HasBoundingBox> units) {
var maskImg = new Mat(srcImg.size(), CvType.CV_8UC1, Scalar.all(0));
for (HasBoundingBox unit : units) {
Imgproc.rectangle(maskImg, unit.bounds().toRect(), Scalar.all(255), -1);
Imgproc.rectangle(maskImg, unit.bounds().withTolerance(1, 1).toRect(), Scalar.all(255), -1);
}
return maskImg;
}
private BufferedImage matToBufferedImage(Mat mat) {
var image = new BufferedImage(mat.width(), mat.height(), BufferedImage.TYPE_3BYTE_BGR);
var dataBuffer = ((DataBufferByte) image.getRaster().getDataBuffer()).getData();
mat.get(0, 0, dataBuffer);
return image;
}
}
......@@ -21,87 +21,56 @@ public class DeeplTranslator<T extends ITranslatable> implements IOnlineTranslat
units.forEach(unit -> buffer.append(unit.getText()).append("\n"));
//try {
var apiResult = "Blocked\n" +
"development\n" +
"+ Add a card\n" +
"=\n" +
"development\n" +
"Tesseract library documentation\n" +
"OCR\n" +
"=\n" +
"memory\n" +
"development\n" +
"Different types of models and\n" +
"Pending\n" +
"In process\n" +
"memory\n" +
"memory\n" +
"miscellaneous\n" +
"Drafting of the report\n" +
"Project planning\n" +
"=\n" +
"report\n" +
"report\n" +
"development\n" +
"Bibliography documentation\n" +
"Translation services: online and offline\n" +
"memory\n" +
"offline. Study\n" +
"recognition methods, study\n" +
"memory\n" +
"development\n" +
"Spatial Hash Grid data structure\n" +
"+\n" +
"card\n" +
"Documentation of the development\n" +
"development\n" +
"development\n" +
"Documentation of the structures of\n" +
"TFG development\n" +
"data created\n" +
"development\n" +
"development\n" +
"Preliminary image optimization for\n" +
"Definition of the structure of the\n" +
"improve the results of the\n" +
"project\n" +
"recognition\n" +
"memory\n" +
"development\n" +
"development\n" +
"Text color detection and\n" +
"Analysis of background collision algorithms\n" +
"background\n" +
"bounding box 2d\n" +
"miscellaneous\n" +
"development\n" +
"Creation of data structures\n" +
"Training model recognition\n" +
"from\n" +
"to improve in\n" +
"specific\n" +
"within the scope of the application\n" +
"development\n" +
"miscellaneous\n" +
"Git structure and\n" +
"project\n" +
"Application packaging\n" +
"+ Add a card\n" +
"a\n" +
"+ Add a card\n" +
"+ Add another list\n" +
"Pending review\n" +
"Completed\n" +
"memo\n" +
"development\n" +
"Creation of trello for tracking\n" +
"First approach to\n" +
"text recognition using\n" +
"development\n" +
"Tesseract\n" +
"Analysis of text recognition libraries\n" +
"libraries\n" +
"+ Add a card\n" ;
var apiResult = """
SHREK
Érase una vez una encantadora
princesa. Pero tenía un
encantamiento sobre ella de un tipo
que sólo podía romperse
el primer beso de amor. Estaba encerrada
en un castillo custodiado por un
terrible dragón que escupe fuego.
Muchos valientes caballeros habían intentado
liberarla de esta terrible prisión,
pero ninguno prevaleció. Ella esperaba en
juego. Shrek sigue con su
los aldeanos se reúnen para
la guarida del dragón en la
habitación de la torre más alta para su
amor verdadero y el primer
beso.
(Risas) Como si eso alguna vez
va a suceder. Lo que una carga de -
(descarga de inodoro)
MAN1
¿Crees que está ahí?
risas
hacer un traje
Allstar - de Smashmouth comienza al
día. Mientras en un pueblo cercano,
ve tras el ogro.
NOCHE - CERCA DE LA CASA DE SHREK
MAN2
derecha. ¡Vamos a por él!
Agárrate. ¿Sabes lo que
esa cosa puede hacerte?
HOMBRE3
Sí,
1t'11 moler los huesos de
su pan.
SHREK
Sí, bueno, en realidad, eso sería
un gigante. Ahora, los ogros, oh son
mucho peores.
de su piel recién pelada.
HOMBRES
¡No!
SHREK
aféitate el hígado. Exprime
¡la gelatina de tus ojos! En realidad,
es bastante bueno en tostadas.
Shrek se acerca sigilosamente por detrás y
""";
//translator.translateText(buffer.toString(), sourceLang, targetLang);
var lines = apiResult
......@@ -117,10 +86,7 @@ public class DeeplTranslator<T extends ITranslatable> implements IOnlineTranslat
"es"
//apiResult.getDetectedSourceLanguage()
, targetLang);
// }
// catch (DeepLException e) {
// throw new RuntimeException(e);
// } catch (InterruptedException e) {
// } catch (DeepLException | InterruptedException e) {
// throw new RuntimeException(e);
// }
}
......
src/main/resources/test_img_tfg.png

237 KiB

0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment