Skip to content
Snippets Groups Projects
Commit fca43b2a authored by Yonatan's avatar Yonatan
Browse files

Code cleanup and color recognition base

parent 18db8f52
Branches
No related tags found
1 merge request!1Final merge
package es.yonatan.tfg;
import es.yonatan.tfg.boxing.HasBoundingBox;
import es.yonatan.tfg.boxing.Sentence;
import es.yonatan.tfg.boxing.UnitGroup;
import es.yonatan.tfg.boxing.Unit;
import es.yonatan.tfg.recognizer.ColorRecognizer;
import es.yonatan.tfg.recognizer.TesseractRecognizer;
import es.yonatan.tfg.util.HashGrid2D;
import net.sourceforge.tess4j.util.ImageHelper;
......@@ -13,15 +14,19 @@ import java.awt.*;
import java.awt.image.BufferedImage;
import java.io.File;
import java.io.IOException;
import java.util.Comparator;
import java.util.HashSet;
public class Main {
public static void main(String[] args) throws IOException {
var file = new File("C:/Users/Yonatan/Desktop/04.jpg");
var time = System.currentTimeMillis();
var originalImage = ImageIO.read(new File("C:/Users/Yonatan/Desktop/04.jpg"));
//TODO: 10/04/2023 transform originalImage for best Tesseract OCR results: grayscale, binarize, noise reduction, etc.
var originalImage = ImageIO.read(file);
var image = ImageHelper.convertImageToGrayscale(originalImage);
//image = ImageHelper.convertImageToBinary(originalImage);
image = ImageHelper.convertImageToBinary(image);
System.out.println(new ColorRecognizer().getColorAt(file, 0, 0, 10, 10));
var tess = new TesseractRecognizer.Builder()
.setDataPath(new File("C:/Proyectos/TFG/src/main/resources/tessdata/"))
......@@ -32,24 +37,23 @@ public class Main {
var words = tess.recognize(image, 80);
System.out.println(words);
var grid = new HashGrid2D<>(image.getWidth(), image.getHeight(), 4, 4);
words.forEach(grid::insert);
// merge(grid);
merge(grid);
int mergeCount = 1;
for (int i = 0; i < mergeCount; i++) {
merge(grid);
}
// flatten the grid to get a list of all the words
var sentences = grid.flatten().stream()
//.filter(element -> element instanceof Sentence)
.toList();
sentences.stream().sorted(Comparator.comparingInt(value -> value.bounds().y())).forEach(System.out::println);
// basic GUI to render the bounding boxes resulting of the OCR recognition as well as testing the word merge algorithm
var scale = 1.5;
var window = new JFrame();
window.setUndecorated(true);
window.setSize(new Dimension((int) (originalImage.getWidth() / scale), (int) (originalImage.getHeight() / scale)));
......@@ -98,28 +102,28 @@ public class Main {
grid.remove(word);
Sentence sentence;
if (word instanceof Sentence) sentence = (Sentence) word;
UnitGroup unitGroup;
if (word instanceof UnitGroup) unitGroup = (UnitGroup) word;
else {
sentence = new Sentence();
sentence.addWord((Unit) word, false);
unitGroup = new UnitGroup();
unitGroup.addWord((Unit) word, false);
}
for (HasBoundingBox collider : colliders) {
grid.remove(collider);
if (collider instanceof Sentence) {
sentence = Sentence.merge(sentence, (Sentence) collider);
if (collider instanceof UnitGroup) {
unitGroup = UnitGroup.merge(unitGroup, (UnitGroup) collider);
} else {
if (checked.contains(collider)) continue;
checked.add(collider);
sentence.addWord((Unit) collider, false);
unitGroup.addWord((Unit) collider, false);
}
}
sentence.recomputeOrder();
grid.insert(sentence);
unitGroup.recomputeOrder();
grid.insert(unitGroup);
}
}
}
\ No newline at end of file
package es.yonatan.tfg.boxing;
import es.yonatan.tfg.util.BoundingBox;
import es.yonatan.tfg.util.ColorData;
import org.jetbrains.annotations.NotNull;
import org.jetbrains.annotations.Nullable;
public record Unit(@NotNull String text, float confidence, @NotNull BoundingBox bounds, @Nullable ColorData colorData) implements HasBoundingBox {
public record Unit(@NotNull String text, float confidence, @NotNull BoundingBox bounds) implements HasBoundingBox {
public static final Unit NEXT_IMAGE_UNIT = new Unit("next_image", 100, new BoundingBox(-1,-1,-1,-1), null);
public static final Unit NEXT_IMAGE_UNIT = new Unit("next_image", 100, new BoundingBox(-1,-1,-1,-1));
@Override
public String toString() {
......
......@@ -9,25 +9,25 @@ import java.util.Comparator;
import java.util.LinkedList;
import java.util.List;
public class Sentence implements HasBoundingBox {
public class UnitGroup implements HasBoundingBox {
private final @NotNull LinkedList<Unit> words;
private @Nullable BoundingBox bounds;
public Sentence() {
public UnitGroup() {
words = new LinkedList<>();
}
public static Sentence merge(Sentence... sentences) {
return merge(List.of(sentences));
public static UnitGroup merge(UnitGroup... unitGroups) {
return merge(List.of(unitGroups));
}
public static Sentence merge(Collection<Sentence> sentences) {
var merged = new Sentence();
public static UnitGroup merge(Collection<UnitGroup> unitGroups) {
var merged = new UnitGroup();
for (Sentence sentence : sentences) {
merged.addWords(sentence.words, false);
for (UnitGroup unitGroup : unitGroups) {
merged.addWords(unitGroup.words, false);
}
merged.recomputeOrder();
return merged;
......
package es.yonatan.tfg.recognizer;
import java.awt.*;
import java.io.File;
public class ColorRecognizer implements IColorRecognizer {
@Override
public Color getColorAt(File image, int u, int v, int width, int height) {
// TODO: 4/04/2023 get primary color of image using leptonica library lept4j
return Color.WHITE;
}
}
package es.yonatan.tfg.recognizer;
import java.awt.*;
import java.io.File;
public interface IColorRecognizer {
Color getColorAt(File image, int u, int v, int width, int height);
}
......@@ -39,7 +39,7 @@ public final class TesseractRecognizer implements IRecognizer {
word.getText(),
word.getConfidence(),
BoundingBox.fromRect(word.getBoundingBox()
), null)));
))));
if (iterator.hasNext())
units.add(Unit.NEXT_IMAGE_UNIT);
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment