Skip to content
Snippets Groups Projects
Commit bba9dd68 authored by Yonatan's avatar Yonatan
Browse files

Final comments and minor refactor

parent bbabbcf0
No related branches found
No related tags found
1 merge request!1Final merge
Showing
with 259 additions and 288 deletions
//package es.yonatan.tfg;
//
//import es.yonatan.tfg.grouper.SimpleUnitGrouper;
//import es.yonatan.tfg.image.post.PostProcessor;
//import es.yonatan.tfg.model.UnitGroup;
//import es.yonatan.tfg.recognizer.TesseractRecognizer;
//import net.sourceforge.tess4j.util.ImageHelper;
//
//import javax.imageio.ImageIO;
//import javax.swing.*;
//import java.awt.*;
//import java.awt.image.BufferedImage;
//import java.io.File;
//import java.io.IOException;
//
//public class Main {
// public static void main(String[] args) throws IOException {
// var file = new File("C:/Proyectos/TFG/src/main/resources/test_img_tfg.png");
//
// var time = System.currentTimeMillis();
// //TODO: 10/04/2023 transform originalImage for best Tesseract OCR results: grayscale, binarize, noise reduction, etc.
// var originalImage = ImageIO.read(file);
// var image = ImageHelper.convertImageToGrayscale(originalImage);
// image = ImageHelper.convertImageToBinary(image);
//
// var tess = new TesseractRecognizer.Builder()
// .setDataPath(new File("C:/Proyectos/TFG/src/main/resources/tessdata/"))
// .setSupportedLanguages("eng", "spa")
// .setLanguage("spa")
// .setVariable("user_defined_dpi", "70") // TODO: 10/04/2023 300 dpi is too high, test lower dpi and see how it affects the results and completion time
// .build();
//
// var words = tess.recognizeSync(image, 80);
//
// if (words == null) return;
//
// var merger = new SimpleUnitGrouper();
//
// var groups = merger.group(words, image.getWidth(), image.getHeight(), 1, 1, 25, -5);
//
// System.out.println(System.currentTimeMillis() - time);
//
// //var translated = new DeeplTranslator<UnitGroup>("f6542fe3-8e30-5cd2-064e-5eb18c7dbbf3:fx").translate(groups, null, "es");
//
// time = System.currentTimeMillis();
//
// var processedImage = new PostProcessor().processImage(file, groups);
//
// System.out.println("OpenCV time: " + (System.currentTimeMillis() - time));
//
// // basic GUI to render the bounding boxes resulting of the OCR recognition as well as testing the word merge algorithm
//
// var scale = 1.5;
// var window = new JFrame();
// window.setSize(new Dimension((int) (originalImage.getWidth() / scale), (int) (originalImage.getHeight() / scale)));
// window.setPreferredSize(new Dimension(originalImage.getWidth(), originalImage.getHeight()));
// window.setDefaultCloseOperation(WindowConstants.EXIT_ON_CLOSE);
// window.setLocationRelativeTo(null);
//
// groups.forEach(group -> System.out.println(group.getText()));
//
// var imagePanel = new JPanel() {
// @Override
// protected void paintComponent(Graphics g) {
// super.paintComponent(g);
//
// var time = System.currentTimeMillis();
//
// g.drawImage(originalImage.getScaledInstance((int) (originalImage.getWidth() / scale), (int) (originalImage.getHeight() / scale), BufferedImage.SCALE_SMOOTH), 0, 0, null);
// var g2d = ((Graphics2D) g);
// g2d.setStroke(new BasicStroke(1.5f));
//
// g2d.setFont(new Font(Font.MONOSPACED, Font.BOLD, 15));
//
// for (UnitGroup group : groups) {
//
// var boundingBox = group.bounds();
//
// g2d.setColor(Color.red);
// g2d.drawRect((int) (boundingBox.x() / scale), (int) (boundingBox.y() / scale), (int) (boundingBox.width() / scale), (int) (boundingBox.height() / scale));
//
// g2d.setColor(Color.black);
// //g2d.drawString(group.getText(), (int) (boundingBox.x() / scale), (int) ((boundingBox.y() + boundingBox.height() * 0.75) / scale));
// }
//
// System.out.println("Draw time: " + (System.currentTimeMillis() - time));
// }
// };
//
//
// window.add(imagePanel);
// window.setVisible(true);
// }
//}
\ No newline at end of file
......@@ -12,6 +12,10 @@ import java.awt.image.BufferedImage;
import java.io.File;
import java.util.*;
/**
* Implementation of {@link IUnitRecognizer} using Google's Tesseract OCR library
* It can handle multiple images returning a {@link Unit#NEXT_IMAGE_UNIT} between them
*/
public final class TesseractRecognizer implements IUnitRecognizer<Unit> {
private final Tesseract tess;
......@@ -46,6 +50,10 @@ public final class TesseractRecognizer implements IUnitRecognizer<Unit> {
return units;
}
/**
* Utility class based on the Builder pattern to create a {@link TesseractRecognizer} with
* the required parameters in a more readable way with default values
*/
public static final class Builder {
private File dataPath;
......@@ -60,12 +68,11 @@ public final class TesseractRecognizer implements IUnitRecognizer<Unit> {
}
public Builder withDefaults() {
this.dataPath = LoadLibs.extractTessResources("tessdata");
this.supportedLanguages = Set.of("latin");
this.language = "latin";
this.charWhitelist = null;
this.charBlacklist = null;
return this;
return this.setDataPath(LoadLibs.extractTessResources("tessdata")).
setSupportedLanguages(Set.of("latin")).
setLanguage("latin").
setCharWhitelist(null).
setCharBlacklist(null);
}
public Builder setVariable(@NotNull String variable, @NotNull String value) {
......@@ -103,12 +110,12 @@ public final class TesseractRecognizer implements IUnitRecognizer<Unit> {
return this;
}
public Builder setCharBlacklist(@NotNull String charBlacklist) {
public Builder setCharBlacklist(String charBlacklist) {
this.charBlacklist = charBlacklist;
return this;
}
public Builder setCharWhitelist(@NotNull String charWhitelist) {
public Builder setCharWhitelist(String charWhitelist) {
this.charWhitelist = charWhitelist;
return this;
}
......
package es.yonatan.tfg.server;
import com.google.gson.Gson;
import com.google.gson.GsonBuilder;
import es.yonatan.tfg.grouper.IUnitGrouper;
import es.yonatan.tfg.grouper.SimpleUnitGrouper;
import es.yonatan.tfg.image.post.IImagePostProcessor;
import es.yonatan.tfg.image.post.PostProcessor;
import es.yonatan.tfg.image.pre.IImagePreProcessor;
import es.yonatan.tfg.image.pre.PreProcessor;
import es.yonatan.tfg.model.UnitGroup;
import es.yonatan.tfg.recognizer.TesseractRecognizer;
import es.yonatan.tfg.translator.DeeplTranslator;
import es.yonatan.tfg.translator.TranslationResult;
import org.jetbrains.annotations.Nullable;
import spark.Route;
import spark.Spark;
import javax.imageio.ImageIO;
import java.awt.image.BufferedImage;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Base64;
import java.util.List;
public class SparkServer {
public static void main(String[] args) {
......@@ -32,8 +9,6 @@ public class SparkServer {
server.start(8080);
}
private static final Gson GSON = new GsonBuilder().setPrettyPrinting().create();
/**
* starts Spark REST server on the specified port and sets up the routes
*
......@@ -55,150 +30,8 @@ public class SparkServer {
}
private void addRoutes() {
Spark.post("/translate", translateRoute);
}
record Options(String srcLang, String targetLang, double confidenceLevel, boolean excludeNumbers, boolean debug) {
}
record RequestBody(String imageData, Options options) {
Spark.post("/translate", new TranslateRoute());
}
private final Route translateRoute = (req, res) -> {
try {
//-------------
var body = GSON.fromJson(req.body(), RequestBody.class);
var options = body.options;
var base64Img = body.imageData();
var image = decodeBase64(base64Img);
//-------------
//-------------
var preprocessor = new PreProcessor();
var preProcessingResult = preprocessor.processImage(image, options.debug);
if (preProcessingResult == null) return errorResponse("Error pre-processing image");
image = preProcessingResult.image();
//-------------
//-------------
var builder = new TesseractRecognizer.Builder().withDefaults();
if (options.srcLang != null && !options.srcLang.isEmpty()) builder.setLanguage(options.srcLang);
if (options.excludeNumbers) builder.excludeNumbers();
//-------------
//-------------
var recognizer = builder.build();
var result = recognizer.recognizeSync(image, options.confidenceLevel);
if (result == null || result.isEmpty()) return errorResponse("No words found");
//-------------
//-------------
var grouper = new SimpleUnitGrouper();
var groups = grouper.group(result, image.getWidth(), image.getHeight(), IUnitGrouper.AUTO_CELL_COUNT, IUnitGrouper.AUTO_CELL_COUNT, IUnitGrouper.DEFAULT_HORIZONTAL_TOLERANCE, IUnitGrouper.DEFAULT_VERTICAL_TOLERANCE);
//-------------
//-------------
var translator = new DeeplTranslator<UnitGroup>("f6542fe3-8e30-5cd2-064e-5eb18c7dbbf3:fx");
var translationResult = translator.translate(groups, options.srcLang, options.targetLang);
if (translationResult == null) return errorResponse("Error while translating the image");
//-------------
//-------------
var postprocessor = new PostProcessor();
var postProcessingResult = postprocessor.processImage(image, translationResult.units(), options.debug);
if (postProcessingResult == null) return errorResponse("Error post-processing image");
//-------------
return successResponse(preProcessingResult, postProcessingResult, translationResult, options.debug);
} catch (Exception e) {
e.printStackTrace();
return errorResponse(e.getMessage());
}
};
/**
* Generates a JSON success response with the translation data and image
*
* @param preProcessResult result of the {@link IImagePreProcessor}
* @param postProcessResult result of the {@link IImagePostProcessor}
* @param translationResult result of the {@link es.yonatan.tfg.translator.IOnlineTranslator}
* @param debug whether to include debug data
* @return JSON success message
*/
private String successResponse(IImagePreProcessor.PreProcessResult preProcessResult, IImagePostProcessor.PostProcessResult postProcessResult, TranslationResult<UnitGroup> translationResult, boolean debug) {
record Response(String imageData, String srcTranscript, String translatedTranscript, String srcLang, String targetLang, @Nullable List<String> debugImages, boolean success) { }
try {
List<String> debugImages;
if (debug) {
debugImages = new ArrayList<>();
preProcessResult.debugImages().forEach(image -> {
try {
debugImages.add(encodeBase64(image));
} catch (IOException e) {
throw new RuntimeException(e);
}
});
postProcessResult.debugImages().forEach(image -> {
try {
debugImages.add(encodeBase64(image));
} catch (IOException e) {
throw new RuntimeException(e);
}
});
} else {
debugImages = null;
}
return GSON.toJson(new Response(
encodeBase64(postProcessResult.image()),
translationResult.srcTranscript(),
translationResult.translatedTranscript(),
translationResult.srcLang(),
translationResult.targetLang(),
debugImages,
true)
);
} catch (IOException e) {
return errorResponse(e.getMessage());
}
}
/**
* Generates a JSON error response
*
* @param msg the message to be displayed
* @return JSON error message
*/
private String errorResponse(String msg) {
record Response(String message, boolean success) {
}
return GSON.toJson(new Response(msg, false));
}
private String encodeBase64(BufferedImage image) throws IOException {
var out = new ByteArrayOutputStream();
ImageIO.write(image, "png", out);
out.flush();
return Base64.getEncoder().encodeToString(out.toByteArray());
}
/**
* Creates a BufferedImage instance with the data from the given Base64 encoded image
*
* @param base64Img the base64 image
* @return the BufferedImage created
*/
private BufferedImage decodeBase64(String base64Img) throws IOException {
var data = Base64.getDecoder().decode(base64Img);
return ImageIO.read(new ByteArrayInputStream(data));
}
}
\ No newline at end of file
package es.yonatan.tfg.server;
import com.google.gson.Gson;
import com.google.gson.GsonBuilder;
import es.yonatan.tfg.grouper.IUnitGrouper;
import es.yonatan.tfg.grouper.SimpleUnitGrouper;
import es.yonatan.tfg.image.post.IImagePostProcessor;
import es.yonatan.tfg.image.post.PostProcessor;
import es.yonatan.tfg.image.pre.IImagePreProcessor;
import es.yonatan.tfg.image.pre.PreProcessor;
import es.yonatan.tfg.model.UnitGroup;
import es.yonatan.tfg.recognizer.TesseractRecognizer;
import es.yonatan.tfg.translator.DeeplTranslator;
import es.yonatan.tfg.translator.TranslationResult;
import org.jetbrains.annotations.Nullable;
import spark.Request;
import spark.Response;
import spark.Route;
import javax.imageio.ImageIO;
import java.awt.image.BufferedImage;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Base64;
import java.util.List;
public class TranslateRoute implements Route {
private static final Gson GSON = new GsonBuilder().setPrettyPrinting().create();
/**
* Implementation of the flow an application implementing the text recognition and
* translation functionality should follow, each step is checked for errors and
* if an error is found the process is stopped and an error response is returned
*
* @param req The request object providing information about the HTTP request
* @param res The response object providing functionality for modifying the response
* @return The success response containing the translated text or an error response
*/
@Override
public Object handle(Request req, Response res) {
try {
//-------------
var body = GSON.fromJson(req.body(), RequestBody.class);
var options = body.options;
var base64Img = body.imageData();
var image = decodeBase64(base64Img);
//-------------
//-------------
var preprocessor = new PreProcessor();
var preProcessingResult = preprocessor.processImage(image, options.debug);
if (preProcessingResult == null) return errorResponse("Error pre-processing image");
image = preProcessingResult.image();
//-------------
//-------------
var builder = new TesseractRecognizer.Builder().withDefaults();
if (options.srcLang != null && !options.srcLang.isEmpty()) builder.setLanguage(options.srcLang);
if (options.excludeNumbers) builder.excludeNumbers();
//-------------
//-------------
var recognizer = builder.build();
var result = recognizer.recognizeSync(image, options.confidenceLevel);
if (result == null || result.isEmpty()) return errorResponse("No words found");
//-------------
//-------------
var grouper = new SimpleUnitGrouper();
var groups = grouper.group(result, image.getWidth(), image.getHeight(), IUnitGrouper.AUTO_CELL_COUNT, IUnitGrouper.AUTO_CELL_COUNT, IUnitGrouper.DEFAULT_HORIZONTAL_TOLERANCE, IUnitGrouper.DEFAULT_VERTICAL_TOLERANCE);
//-------------
//-------------
var translator = new DeeplTranslator<UnitGroup>("f6542fe3-8e30-5cd2-064e-5eb18c7dbbf3:fx");
var translationResult = translator.translate(groups, options.srcLang, options.targetLang);
if (translationResult == null) return errorResponse("Error while translating the image");
//-------------
//-------------
var postprocessor = new PostProcessor();
var postProcessingResult = postprocessor.processImage(image, translationResult.units(), options.debug);
if (postProcessingResult == null) return errorResponse("Error post-processing image");
//-------------
return successResponse(preProcessingResult, postProcessingResult, translationResult, options.debug);
} catch (Exception e) {
e.printStackTrace();
return errorResponse(e.getMessage());
}
}
record Options(String srcLang, String targetLang, double confidenceLevel, boolean excludeNumbers, boolean debug) { }
record RequestBody(String imageData, Options options) { }
/**
* Generates a JSON success response with the translation data and image
* If debug is enabled, the debug images are also included in the response in order of processing
*
* @param preProcessResult result of the {@link IImagePreProcessor}
* @param postProcessResult result of the {@link IImagePostProcessor}
* @param translationResult result of the {@link es.yonatan.tfg.translator.IOnlineTranslator}
* @param debug whether to include debug data
* @return JSON success message
*/
private String successResponse(IImagePreProcessor.PreProcessResult preProcessResult, IImagePostProcessor.PostProcessResult postProcessResult, TranslationResult<UnitGroup> translationResult, boolean debug) {
record Response(String imageData, String srcTranscript, String translatedTranscript, String srcLang, String targetLang, @Nullable List<String> debugImages, boolean success) { }
try {
List<String> debugImages;
if (debug) {
debugImages = new ArrayList<>();
preProcessResult.debugImages().forEach(image -> {
try {
debugImages.add(encodeBase64(image));
} catch (IOException e) {
throw new RuntimeException(e);
}
});
postProcessResult.debugImages().forEach(image -> {
try {
debugImages.add(encodeBase64(image));
} catch (IOException e) {
throw new RuntimeException(e);
}
});
} else {
debugImages = null;
}
return GSON.toJson(new Response(
encodeBase64(postProcessResult.image()),
translationResult.srcTranscript(),
translationResult.translatedTranscript(),
translationResult.srcLang(),
translationResult.targetLang(),
debugImages,
true)
);
} catch (IOException e) {
return errorResponse(e.getMessage());
}
}
/**
* Generates a JSON error response
*
* @param msg the message to be displayed
* @return JSON error message
*/
private String errorResponse(String msg) {
record Response(String message, boolean success) { }
return GSON.toJson(new Response(msg, false));
}
/**
* Encodes the given BufferedImage instance to a Base64 encoded string
*
* @param image the image to encode
* @return the Base64 encoded image
* @throws IOException if an error occurs while encoding the image
*/
private String encodeBase64(BufferedImage image) throws IOException {
var out = new ByteArrayOutputStream();
ImageIO.write(image, "png", out);
out.flush();
return Base64.getEncoder().encodeToString(out.toByteArray());
}
/**
* Creates a BufferedImage instance with the data from the given Base64 encoded image
*
* @param base64Img the base64 image
* @return the BufferedImage created
* @throws IOException if an error occurs while decoding the image
*/
private BufferedImage decodeBase64(String base64Img) throws IOException {
var data = Base64.getDecoder().decode(base64Img);
return ImageIO.read(new ByteArrayInputStream(data));
}
}
......@@ -9,6 +9,9 @@ import org.jetbrains.annotations.Nullable;
import java.util.List;
/**
* Translator that uses the DeepL API
*/
public class DeeplTranslator<T extends ITranslatable> implements IOnlineTranslator<T> {
private final Translator translator;
......@@ -17,6 +20,15 @@ public class DeeplTranslator<T extends ITranslatable> implements IOnlineTranslat
translator = new Translator(apiKey);
}
/**
* Groups the {@link es.yonatan.tfg.model.UnitGroup} vertically and translates them
* for a more accurate translation
*
* @param units The units to translate
* @param sourceLang The source language, or null to auto-detect
* @param targetLang The target language
* @return The translation result with the translated units, or null if an error occurred
*/
@Override
public @Nullable TranslationResult<T> translate(@NotNull List<T> units, @Nullable String sourceLang, @NotNull String targetLang) {
try {
......
......@@ -6,7 +6,12 @@ import org.jetbrains.annotations.Nullable;
import java.util.List;
/**
* Interface for online translator
*
* @param <T> The type of the translatable units
*/
public interface IOnlineTranslator<T extends ITranslatable> {
@Nullable TranslationResult translate(@NotNull List<T> units, @Nullable String sourceLang, @NotNull String targetLang);
@Nullable TranslationResult<T> translate(@NotNull List<T> units, @Nullable String sourceLang, @NotNull String targetLang);
}
......@@ -4,6 +4,12 @@ import es.yonatan.tfg.model.ITranslatable;
import java.util.Collection;
/**
* The result of a translation, contains the translated units and the source and target languages
* as well as a transcript of the translated units and the source units
*
* @param <T> The type of the translatable units
*/
public record TranslationResult<T extends ITranslatable>(Collection<T> units, String srcLang, String targetLang) {
public String translatedTranscript() {
......
......@@ -8,12 +8,23 @@ import java.util.Arrays;
import java.util.Collection;
import java.util.Objects;
/**
* Utility class that represents a bounding box: a rectangle with a position and size in 2D space
* with some useful methods to transform, merge and compare them as well as to check for intersections
* and creation from common types like {@link Rectangle} and {@link Rect}
*/
public record BoundingBox(int x, int y, int width, int height) {
public boolean intersects(BoundingBox test, int horizontalTolerance, int verticalTolerance) {
return test.intersects(this.withTolerance(horizontalTolerance, verticalTolerance));
}
/**
* Checks if this bounding box intersects with another
*
* @param test The other bounding box
* @return Whether they intersect or not
*/
public boolean intersects(BoundingBox test) {
return (this.x < test.x + test.width) &&
(this.x + this.width > test.x) &&
......
......@@ -5,6 +5,19 @@ import org.jetbrains.annotations.NotNull;
import java.util.*;
/**
* A 2D grid that stores objects based on their bounding boxes based on the work of Sylvain, Hoppe:
* <a href="https://hhoppe.com/perfecthash.pdf">Perfect Spatial Hashing</a>
* <br>
* It's based on coordinate hashing:
* Each cell is identified by a unique hash, which is computed from the cell's coordinates,
* then the objects are stored in a map, where the key is the hash and the value is a list of objects
* <br>
* This speeds up the search for objects that are close to a given point, only having to check the objects
* in the cell where the point is located and the surrounding cells
*
* @param <T> The type of the objects to store in the grid (must implement {@link HasBoundingBox})
*/
public class HashGrid2D<T extends HasBoundingBox> {
protected final int width;
......@@ -14,10 +27,6 @@ public class HashGrid2D<T extends HasBoundingBox> {
protected final Map<Integer, List<T>> grid;
public HashGrid2D(int width, int height, int cellCount) {
this(width, height, cellCount, cellCount);
}
public HashGrid2D(int width, int height, int horizontalCellCount, int verticalCellCount) {
this.width = width;
this.height = height;
......@@ -98,19 +107,6 @@ public class HashGrid2D<T extends HasBoundingBox> {
return (int) (y / (height / (double) verticalCellCount));
}
public void setCellCount(int cellCount) {
setHorizontalCellCount(cellCount);
setVerticalCellCount(cellCount);
}
public void setHorizontalCellCount(int horizontalCellCount) {
this.horizontalCellCount = horizontalCellCount;
}
public void setVerticalCellCount(int verticalCellCount) {
this.verticalCellCount = verticalCellCount;
}
@Override
public String toString() {
return "HashGrid2D{" + "grid=" + grid + '}';
......
package es.yonatan.tfg.util;
/**
* Reference class for constants
*/
public final class Reference {
public static final String TESSERACT_CHAR_WHITELIST = "tessedit_char_whitelist";
public static final String TESSERACT_CHAR_BLACKLIST = "tessedit_char_blacklist";
......
......@@ -12,4 +12,5 @@ module TFG {
exports es.yonatan.tfg.server;
opens es.yonatan.tfg.server to com.google.gson;
opens es.yonatan.tfg;
}
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment