Skip to content
Snippets Groups Projects
Commit 4c0f46b0 authored by Yonatan's avatar Yonatan
Browse files

Minor refactor and initial implementation of online translation service

parent a479c164
Branches
No related tags found
1 merge request!1Final merge
Showing
with 326 additions and 33 deletions
......@@ -59,6 +59,11 @@
<version>24.0.1</version>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>com.deepl.api</groupId>
<artifactId>deepl-java</artifactId>
<version>1.1.0</version>
</dependency>
</dependencies>
</project>
\ No newline at end of file
package es.yonatan.tfg;
import es.yonatan.tfg.model.HasBoundingBox;
import es.yonatan.tfg.grouper.SimpleUnitGrouper;
import es.yonatan.tfg.model.UnitGroup;
import es.yonatan.tfg.recognizer.color.ColorRecognizer;
import es.yonatan.tfg.recognizer.group.SimpleUnitGrouper;
import es.yonatan.tfg.recognizer.color.LeptonicaColorRecognizer;
import es.yonatan.tfg.recognizer.unit.TesseractRecognizer;
import es.yonatan.tfg.translator.DeeplTranslator;
import net.sourceforge.tess4j.util.ImageHelper;
import javax.imageio.ImageIO;
......@@ -16,7 +16,7 @@ import java.io.IOException;
public class Main {
public static void main(String[] args) throws IOException {
var file = new File("C:/Users/Yonatan/Desktop/04.jpg");
var file = new File("C:/Proyectos/TFG/src/main/resources/04.jpg");
var time = System.currentTimeMillis();
//TODO: 10/04/2023 transform originalImage for best Tesseract OCR results: grayscale, binarize, noise reduction, etc.
......@@ -40,16 +40,11 @@ public class Main {
System.out.println(System.currentTimeMillis() - time);
groups.sort(HasBoundingBox::compareTo);
var test = new StringBuilder();
groups.forEach(a -> test.append(a).append(';'));
System.out.println(test);
new DeeplTranslator<UnitGroup>("f6542fe3-8e30-5cd2-064e-5eb18c7dbbf3:fx").translate(groups, null, "en-US");
// basic GUI to render the bounding boxes resulting of the OCR recognition as well as testing the word merge algorithm
var colors = new ColorRecognizer();
var colors = new LeptonicaColorRecognizer();
colors.load(file);
var scale = 1.5;
......@@ -78,15 +73,13 @@ public class Main {
var color = colors.getColorAt(boundingBox.x(), boundingBox.y(), boundingBox.width(), boundingBox.height());
System.out.println(color);
boundingBox = boundingBox.withTolerance(1, 1);
boundingBox = boundingBox.withTolerance(2, 2);
g2d.setColor(color.getBgColor());
g2d.fillRect((int) (boundingBox.x() / scale), (int) (boundingBox.y() / scale), (int) (boundingBox.width() / scale), (int) (boundingBox.height() / scale));
g2d.setColor(Color.BLACK);
g2d.drawString(group.toString(), (int) (boundingBox.x() / scale), (int) ((boundingBox.y() + boundingBox.height()) / scale));
g2d.setColor(color.getColor());
g2d.drawString(group.getTranslation(), (int) (boundingBox.x() / scale), (int) ((boundingBox.y() + boundingBox.height() * 0.75) / scale));
}
//colors.free();
......
package es.yonatan.tfg.recognizer.group;
package es.yonatan.tfg.grouper;
import es.yonatan.tfg.model.Unit;
import es.yonatan.tfg.model.UnitGroup;
......
package es.yonatan.tfg.recognizer.group;
package es.yonatan.tfg.grouper;
import es.yonatan.tfg.model.HasBoundingBox;
import es.yonatan.tfg.model.Unit;
......
package es.yonatan.tfg.model;
import org.jetbrains.annotations.NotNull;
import org.jetbrains.annotations.Nullable;
public interface ITranslatable {
@NotNull String getText();
void setTranslation(@NotNull String text);
@Nullable String getTranslation();
}
......@@ -2,17 +2,67 @@ package es.yonatan.tfg.model;
import es.yonatan.tfg.util.BoundingBox;
import org.jetbrains.annotations.NotNull;
import org.jetbrains.annotations.Nullable;
public record Unit(@NotNull String text, float confidence, @NotNull BoundingBox bounds) implements HasBoundingBox {
import java.util.Objects;
public final class Unit implements HasBoundingBox, ITranslatable {
public static final Unit NEXT_IMAGE_UNIT = new Unit("next_image", 100, new BoundingBox(-1, -1, -1, -1));
private final @NotNull String text;
private final float confidence;
private final @NotNull BoundingBox bounds;
private @Nullable String translation;
public Unit(@NotNull String text, float confidence, @NotNull BoundingBox bounds) {
this.text = text;
this.confidence = confidence;
this.bounds = bounds;
}
@Override
public void setTranslation(@NotNull String text) {
translation = text;
}
@Override
public @Nullable String getTranslation() {
return translation;
}
@Override
public @NotNull String getText() {
return text;
}
public float getConfidence() {
return confidence;
}
@Override
public @NotNull BoundingBox bounds() {
return bounds;
}
@Override
public String toString() {
return "BoxedWord{" +
"text='" + text + '\'' +
", confidence=" + confidence +
", bounds=" + bounds +
'}';
return getText();
}
@Override
public boolean equals(Object obj) {
if (obj == this) return true;
if (obj == null || obj.getClass() != this.getClass()) return false;
var that = (Unit) obj;
return Objects.equals(this.text, that.text) &&
Float.floatToIntBits(this.confidence) == Float.floatToIntBits(that.confidence) &&
Objects.equals(this.bounds, that.bounds);
}
@Override
public int hashCode() {
return Objects.hash(text, confidence, bounds);
}
}
......@@ -9,26 +9,27 @@ import java.util.Comparator;
import java.util.LinkedList;
import java.util.List;
public class UnitGroup implements HasBoundingBox {
public class UnitGroup implements HasBoundingBox, ITranslatable {
private final @NotNull LinkedList<Unit> units;
private @Nullable BoundingBox bounds;
private @Nullable String translation;
public UnitGroup() {
units = new LinkedList<>();
}
public UnitGroup merge(UnitGroup... unitGroups) {
return merge(List.of(unitGroups));
public void merge(UnitGroup... unitGroups) {
merge(List.of(unitGroups));
}
public UnitGroup merge(Collection<UnitGroup> unitGroups) {
public void merge(Collection<UnitGroup> unitGroups) {
for (UnitGroup unitGroup : unitGroups) {
addWords(unitGroup.units, false);
}
recomputeOrder();
return this;
}
public void addWords(boolean recomputeOrder, @NotNull Unit... units) {
......@@ -70,9 +71,24 @@ public class UnitGroup implements HasBoundingBox {
}
@Override
public String toString() {
public @NotNull String getText() {
var builder = new StringBuilder();
units.forEach(unit -> builder.append(unit.text()).append(" "));
units.forEach(unit -> builder.append(unit.getText()).append(" "));
return builder.toString().trim();
}
@Override
public String toString() {
return getText();
}
@Override
public void setTranslation(@NotNull String text) {
translation = text;
}
@Override
public @Nullable String getTranslation() {
return translation;
}
}
......@@ -9,7 +9,7 @@ public interface IColorRecognizer {
ColorData getColorAt(int u, int v, int width, int height);
@NotNull IColorRecognizer load(@NotNull File image);
void load(@NotNull File image);
void free();
}
......@@ -9,13 +9,13 @@ import org.jetbrains.annotations.Nullable;
import java.awt.*;
import java.io.File;
import java.nio.IntBuffer;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.Map;
import java.util.stream.Collectors;
public class ColorRecognizer implements IColorRecognizer {
public class LeptonicaColorRecognizer implements IColorRecognizer {
public static final int COLOR_COMPRESSION_FACTOR = 8;
private @Nullable Pix pix;
private @Nullable Pix pix32;
......@@ -24,35 +24,35 @@ public class ColorRecognizer implements IColorRecognizer {
public ColorData getColorAt(int u, int v, int width, int height) {
if (pix == null || pix32 == null) throw new NullPointerException("Image not loaded");
var rBuffer = IntBuffer.allocate(32);
var gBuffer = IntBuffer.allocate(32);
var bBuffer = IntBuffer.allocate(32);
var rBuffer = IntBuffer.allocate(1);
var gBuffer = IntBuffer.allocate(1);
var bBuffer = IntBuffer.allocate(1);
var occurrences = new HashMap<Color, Integer>();
for (int i = 0; i < width; i++) {
Leptonica1.pixGetRGBPixel(pix32, u + i, v, rBuffer, gBuffer, bBuffer);
var color = new Color(rBuffer.get(), gBuffer.get(), bBuffer.get());
for (int i = 0; i < width / 2; i++)
for (int j = 0; j < height / 2; j++) {
Leptonica1.pixGetRGBPixel(pix32, u + i * 2, v + j * 2, rBuffer, gBuffer, bBuffer);
var color = new Color(rBuffer.get() / COLOR_COMPRESSION_FACTOR, gBuffer.get() / COLOR_COMPRESSION_FACTOR, bBuffer.get() / COLOR_COMPRESSION_FACTOR);
rBuffer.clear();
gBuffer.clear();
bBuffer.clear();
occurrences.put(color, occurrences.getOrDefault(color, 0) + 1);
}
var sorted = occurrences.entrySet().stream().sorted(Comparator.comparingInt(Map.Entry::getValue)).collect(Collectors.toList());
Collections.reverse(sorted);
var sorted = occurrences.entrySet().stream().max(Comparator.comparingInt(Map.Entry::getValue));
var color = sorted.isPresent() ? sorted.get().getKey() : Color.BLACK;
if (sorted.size() == 1)
sorted.add(sorted.get(0));
var colorBg = new Color(color.getRed() * COLOR_COMPRESSION_FACTOR, color.getGreen() * COLOR_COMPRESSION_FACTOR, color.getBlue() * COLOR_COMPRESSION_FACTOR);
return new ColorData(sorted.get(0).getKey(), sorted.get(0).getKey());
return new ColorData(Color.black, colorBg);
}
@Override
public @NotNull IColorRecognizer load(@NotNull File image) {
public void load(@NotNull File image) {
pix = Leptonica1.pixRead(image.getPath());
pix32 = Leptonica1.pixConvertTo32(pix);
return this;
}
@Override
......
package es.yonatan.tfg.translator;
import com.deepl.api.Translator;
import es.yonatan.tfg.model.ITranslatable;
import org.jetbrains.annotations.NotNull;
import org.jetbrains.annotations.Nullable;
import java.util.List;
public class DeeplTranslator<T extends ITranslatable> implements IOnlineTranslator<T> {
private final Translator translator;
public DeeplTranslator(String apiKey) {
translator = new Translator(apiKey);
}
@Override
public @Nullable TranslationResult<T> translate(@NotNull List<T> units, @Nullable String sourceLang, @NotNull String targetLang) {
var buffer = new StringBuilder();
units.forEach(unit -> buffer.append(unit.getText()).append("\n"));
//try {
var apiResult = "Blocked\n" +
"development\n" +
"+ Add a card\n" +
"=\n" +
"development\n" +
"Tesseract library documentation\n" +
"OCR\n" +
"=\n" +
"memory\n" +
"development\n" +
"Different types of models and\n" +
"Pending\n" +
"In process\n" +
"memory\n" +
"memory\n" +
"miscellaneous\n" +
"Drafting of the report\n" +
"Project planning\n" +
"=\n" +
"report\n" +
"report\n" +
"development\n" +
"Bibliography documentation\n" +
"Translation services: online and offline\n" +
"memory\n" +
"offline. Study\n" +
"recognition methods, study\n" +
"memory\n" +
"development\n" +
"Spatial Hash Grid data structure\n" +
"+\n" +
"card\n" +
"Documentation of the development\n" +
"development\n" +
"development\n" +
"Documentation of the structures of\n" +
"TFG development\n" +
"data created\n" +
"development\n" +
"development\n" +
"Preliminary image optimization for\n" +
"Definition of the structure of the\n" +
"improve the results of the\n" +
"project\n" +
"recognition\n" +
"memory\n" +
"development\n" +
"development\n" +
"Text color detection and\n" +
"Analysis of background collision algorithms\n" +
"background\n" +
"bounding box 2d\n" +
"miscellaneous\n" +
"development\n" +
"Creation of data structures\n" +
"Training model recognition\n" +
"from\n" +
"to improve in\n" +
"specific\n" +
"within the scope of the application\n" +
"development\n" +
"miscellaneous\n" +
"Git structure and\n" +
"project\n" +
"Application packaging\n" +
"+ Add a card\n" +
"a\n" +
"+ Add a card\n" +
"+ Add another list\n" +
"Pending review\n" +
"Completed\n" +
"memo\n" +
"development\n" +
"Creation of trello for tracking\n" +
"First approach to\n" +
"text recognition using\n" +
"development\n" +
"Tesseract\n" +
"Analysis of text recognition libraries\n" +
"libraries\n" +
"+ Add a card\n" ;
//translator.translateText(buffer.toString(), sourceLang, targetLang);
var lines = apiResult
//.getText()
.split("\n");
for (int i = 0; i < units.size(); i++) {
var unit = units.get(i);
unit.setTranslation(lines[i]);
}
return new TranslationResult<>(units,
"es"
//apiResult.getDetectedSourceLanguage()
, targetLang);
// }
// catch (DeepLException e) {
// throw new RuntimeException(e);
// } catch (InterruptedException e) {
// throw new RuntimeException(e);
// }
}
}
package es.yonatan.tfg.translator;
import es.yonatan.tfg.model.ITranslatable;
import org.jetbrains.annotations.NotNull;
import org.jetbrains.annotations.Nullable;
import java.util.List;
public interface IOnlineTranslator<T extends ITranslatable> {
@Nullable TranslationResult translate(@NotNull List<T> units, @Nullable String sourceLang, @NotNull String targetLang);
}
package es.yonatan.tfg.translator;
import es.yonatan.tfg.model.ITranslatable;
import java.util.Collection;
public record TranslationResult<T extends ITranslatable>(Collection<T> units, String srcLang, String targetLang) {
}
module TFG {
requires org.jetbrains.annotations;
requires java.desktop;
requires com.sun.jna;
requires tess4j;
requires lept4j;
requires deepl.java;
}
\ No newline at end of file
src/main/resources/04.jpg

534 KiB

0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment