From ff63d656a4b4eb414ab315f7403e70145c78feb1 Mon Sep 17 00:00:00 2001
From: teddy Kernix <93723692+tgamiette@users.noreply.github.com>
Date: Mon, 17 Feb 2025 14:57:13 +0100
Subject: [PATCH] add api chunking + test
---
.idea/git_toolbox_prj.xml | 15 +++
.idea/jarRepositories.xml | 5 +
Makefile | 6 +-
java/pom.xml | 32 ++++++-
.../GoofyFiles/GoofyFilesApplication.java | 2 +-
.../com/goofy/GoofyFiles/chunking/Chunk.java | 33 +++++++
.../GoofyFiles/chunking/ChunkingService.java | 78 +++++++++++++++
.../goofy/GoofyFiles/chunking/RabinKarp.java | 33 +++++++
.../compression/CompressionService.java | 79 ++++++++++++++++
.../controller/api/ChunkingController.java | 52 ++++++++++
.../chunking/ChunkingPerformanceTest.java | 94 +++++++++++++++++++
11 files changed, 426 insertions(+), 3 deletions(-)
create mode 100644 .idea/git_toolbox_prj.xml
create mode 100644 java/src/main/java/com/goofy/GoofyFiles/chunking/Chunk.java
create mode 100644 java/src/main/java/com/goofy/GoofyFiles/chunking/ChunkingService.java
create mode 100644 java/src/main/java/com/goofy/GoofyFiles/chunking/RabinKarp.java
create mode 100644 java/src/main/java/com/goofy/GoofyFiles/compression/CompressionService.java
create mode 100644 java/src/main/java/com/goofy/GoofyFiles/controller/api/ChunkingController.java
create mode 100644 java/src/test/java/com/goofy/GoofyFiles/chunking/ChunkingPerformanceTest.java
diff --git a/.idea/git_toolbox_prj.xml b/.idea/git_toolbox_prj.xml
new file mode 100644
index 0000000..02b915b
--- /dev/null
+++ b/.idea/git_toolbox_prj.xml
@@ -0,0 +1,15 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/.idea/jarRepositories.xml b/.idea/jarRepositories.xml
index 712ab9d..947ef88 100644
--- a/.idea/jarRepositories.xml
+++ b/.idea/jarRepositories.xml
@@ -6,6 +6,11 @@
+
+
+
+
+
diff --git a/Makefile b/Makefile
index 1593de2..585cd6c 100644
--- a/Makefile
+++ b/Makefile
@@ -1,4 +1,4 @@
-.PHONY: docker-build docker-up docker-clean docker-compile docker-update
+.PHONY: docker-build docker-up docker-clean docker-compile docker-update docker-test-perf
docker-build:
docker-compose build
@@ -17,3 +17,7 @@ docker-update:
docker-compose exec app mvn clean package -f /source/pom.xml && \
docker-compose exec app cp /source/target/GoofyFiles-0.0.1-SNAPSHOT.jar /app/app.jar && \
docker-compose restart app
+
+# Exécuter les tests de performance
+docker-test-perf:
+ docker-compose exec app mvn test -f /source/pom.xml -Dtest=ChunkingPerformanceTest
diff --git a/java/pom.xml b/java/pom.xml
index 5d75d17..14f6e4b 100644
--- a/java/pom.xml
+++ b/java/pom.xml
@@ -29,6 +29,12 @@
17
+
+
+ jitpack.io
+ https://jitpack.io
+
+
org.springframework.boot
@@ -54,7 +60,31 @@
spring-boot-devtools
true
-
+
+ org.lz4
+ lz4-java
+ 1.8.0
+
+
+ com.github.luben
+ zstd-jni
+ 1.5.6-10
+
+
+ org.xerial.snappy
+ snappy-java
+ 1.1.10.7
+
+
+ com.github.themadcreator
+ rabinfingerprint
+ master
+
+
+ org.bouncycastle
+ bcprov-jdk18on
+ 1.77
+
diff --git a/java/src/main/java/com/goofy/GoofyFiles/GoofyFilesApplication.java b/java/src/main/java/com/goofy/GoofyFiles/GoofyFilesApplication.java
index 52d66aa..d81c68b 100644
--- a/java/src/main/java/com/goofy/GoofyFiles/GoofyFilesApplication.java
+++ b/java/src/main/java/com/goofy/GoofyFiles/GoofyFilesApplication.java
@@ -16,6 +16,6 @@ public static void main(String[] args) {
// Mapping de la racine pour afficher "Hello, World!"
@GetMapping("/")
public String helloWorld() {
- return "Hello, World!5";
+ return "Hello, World!";
}
}
\ No newline at end of file
diff --git a/java/src/main/java/com/goofy/GoofyFiles/chunking/Chunk.java b/java/src/main/java/com/goofy/GoofyFiles/chunking/Chunk.java
new file mode 100644
index 0000000..c4e0eb2
--- /dev/null
+++ b/java/src/main/java/com/goofy/GoofyFiles/chunking/Chunk.java
@@ -0,0 +1,33 @@
+package com.goofy.GoofyFiles.chunking;
+
+import java.util.Arrays;
+
+public class Chunk {
+ private final byte[] data;
+ private final String hash;
+ private final int originalSize;
+ private final int position;
+
+ public Chunk(byte[] data, String hash, int position) {
+ this.data = Arrays.copyOf(data, data.length);
+ this.hash = hash;
+ this.originalSize = data.length;
+ this.position = position;
+ }
+
+ public byte[] getData() {
+ return Arrays.copyOf(data, data.length);
+ }
+
+ public String getHash() {
+ return hash;
+ }
+
+ public int getOriginalSize() {
+ return originalSize;
+ }
+
+ public int getPosition() {
+ return position;
+ }
+}
diff --git a/java/src/main/java/com/goofy/GoofyFiles/chunking/ChunkingService.java b/java/src/main/java/com/goofy/GoofyFiles/chunking/ChunkingService.java
new file mode 100644
index 0000000..7fddb8c
--- /dev/null
+++ b/java/src/main/java/com/goofy/GoofyFiles/chunking/ChunkingService.java
@@ -0,0 +1,78 @@
+package com.goofy.GoofyFiles.chunking;
+
+import java.io.ByteArrayOutputStream;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.security.MessageDigest;
+import java.security.NoSuchAlgorithmException;
+import java.util.ArrayList;
+import java.util.List;
+
+import org.springframework.stereotype.Service;
+
+@Service
+public class ChunkingService {
+ private static final int MIN_CHUNK_SIZE = 4 * 1024; // 4KB
+ private static final int MAX_CHUNK_SIZE = 64 * 1024; // 64KB
+ private static final String HASH_ALGORITHM = "SHA-256";
+
+ private final RabinKarp rabinKarp;
+ private final MessageDigest messageDigest;
+
+ public ChunkingService() throws NoSuchAlgorithmException {
+ this.rabinKarp = new RabinKarp();
+ this.messageDigest = MessageDigest.getInstance(HASH_ALGORITHM);
+ }
+
+ public List chunkFile(File file) throws IOException {
+ List chunks = new ArrayList<>();
+ try (FileInputStream fis = new FileInputStream(file)) {
+ ByteArrayOutputStream currentChunk = new ByteArrayOutputStream();
+ int currentPosition = 0;
+ int b;
+ int currentChunkSize = 0;
+
+ while ((b = fis.read()) != -1) {
+ currentChunk.write(b);
+ currentChunkSize++;
+
+ if (currentChunkSize >= MIN_CHUNK_SIZE && rabinKarp.pushByte((byte) b) ||
+ currentChunkSize >= MAX_CHUNK_SIZE) {
+
+ // Créer un nouveau chunk
+ byte[] chunkData = currentChunk.toByteArray();
+ String hash = calculateHash(chunkData);
+ chunks.add(new Chunk(chunkData, hash, currentPosition));
+
+ // Réinitialiser pour le prochain chunk
+ currentPosition += currentChunkSize;
+ currentChunkSize = 0;
+ currentChunk.reset();
+ rabinKarp.reset();
+ }
+ }
+
+ // Traiter le dernier chunk s'il reste des données
+ if (currentChunkSize > 0) {
+ byte[] chunkData = currentChunk.toByteArray();
+ String hash = calculateHash(chunkData);
+ chunks.add(new Chunk(chunkData, hash, currentPosition));
+ }
+ }
+
+ return chunks;
+ }
+
+ private String calculateHash(byte[] data) {
+ messageDigest.reset();
+ byte[] hash = messageDigest.digest(data);
+ StringBuilder hexString = new StringBuilder();
+ for (byte b : hash) {
+ String hex = Integer.toHexString(0xff & b);
+ if (hex.length() == 1) hexString.append('0');
+ hexString.append(hex);
+ }
+ return hexString.toString();
+ }
+}
diff --git a/java/src/main/java/com/goofy/GoofyFiles/chunking/RabinKarp.java b/java/src/main/java/com/goofy/GoofyFiles/chunking/RabinKarp.java
new file mode 100644
index 0000000..e84dfd1
--- /dev/null
+++ b/java/src/main/java/com/goofy/GoofyFiles/chunking/RabinKarp.java
@@ -0,0 +1,33 @@
+package com.goofy.GoofyFiles.chunking;
+
+import org.rabinfingerprint.fingerprint.RabinFingerprintLong;
+import org.rabinfingerprint.polynomial.Polynomial;
+
+public class RabinKarp {
+ private static final int WINDOW_SIZE = 48;
+ private static final int MASK = (1 << 13) - 1; // Pour une taille de chunk moyenne de 8KB
+
+ private final RabinFingerprintLong fingerprint;
+ private int windowPos;
+
+ public RabinKarp() {
+ this.windowPos = 0;
+ // Utiliser un polynôme irréductible pour Rabin fingerprint
+ Polynomial polynomial = Polynomial.createFromLong(9223372036854775783L);
+ this.fingerprint = new RabinFingerprintLong(polynomial);
+ }
+
+ public void reset() {
+ this.windowPos = 0;
+ this.fingerprint.reset();
+ }
+
+ public boolean pushByte(byte b) {
+ // Ajouter le nouveau byte
+ fingerprint.pushByte(b);
+ windowPos++;
+
+ // Vérifier si c'est un point de coupure
+ return windowPos >= WINDOW_SIZE && (fingerprint.getFingerprintLong() & MASK) == 0;
+ }
+}
diff --git a/java/src/main/java/com/goofy/GoofyFiles/compression/CompressionService.java b/java/src/main/java/com/goofy/GoofyFiles/compression/CompressionService.java
new file mode 100644
index 0000000..f3a5078
--- /dev/null
+++ b/java/src/main/java/com/goofy/GoofyFiles/compression/CompressionService.java
@@ -0,0 +1,79 @@
+package com.goofy.GoofyFiles.compression;
+
+import org.springframework.stereotype.Service;
+import org.xerial.snappy.Snappy;
+
+import com.github.luben.zstd.Zstd;
+
+import net.jpountz.lz4.LZ4Compressor;
+import net.jpountz.lz4.LZ4Factory;
+import net.jpountz.lz4.LZ4FastDecompressor;
+
+@Service
+public class CompressionService {
+
+ private final LZ4Factory lz4Factory;
+ private final LZ4Compressor lz4Compressor;
+ private final LZ4FastDecompressor lz4Decompressor;
+
+ public enum CompressionType {
+ LZ4,
+ ZSTD,
+ SNAPPY
+ }
+
+ public CompressionService() {
+ this.lz4Factory = LZ4Factory.fastestInstance();
+ this.lz4Compressor = lz4Factory.fastCompressor();
+ this.lz4Decompressor = lz4Factory.fastDecompressor();
+ }
+
+ public byte[] compress(byte[] data, CompressionType type) {
+ try {
+ return switch (type) {
+ case LZ4 -> compressLZ4(data);
+ case ZSTD -> compressZstd(data);
+ case SNAPPY -> compressSnappy(data);
+ };
+ } catch (Exception e) {
+ throw new RuntimeException("Compression failed", e);
+ }
+ }
+
+ public byte[] decompress(byte[] compressedData, CompressionType type, int originalLength) {
+ try {
+ return switch (type) {
+ case LZ4 -> decompressLZ4(compressedData, originalLength);
+ case ZSTD -> decompressZstd(compressedData);
+ case SNAPPY -> decompressSnappy(compressedData);
+ };
+ } catch (Exception e) {
+ throw new RuntimeException("Decompression failed", e);
+ }
+ }
+
+ private byte[] compressLZ4(byte[] data) {
+ return lz4Compressor.compress(data);
+ }
+
+ private byte[] decompressLZ4(byte[] compressedData, int originalLength) {
+ return lz4Decompressor.decompress(compressedData, originalLength);
+ }
+
+ private byte[] compressZstd(byte[] data) {
+ return Zstd.compress(data);
+ }
+
+ private byte[] decompressZstd(byte[] compressedData) {
+ long originalSize = Zstd.getFrameContentSize(compressedData);
+ return Zstd.decompress(compressedData, (int) originalSize);
+ }
+
+ private byte[] compressSnappy(byte[] data) throws Exception {
+ return Snappy.compress(data);
+ }
+
+ private byte[] decompressSnappy(byte[] compressedData) throws Exception {
+ return Snappy.uncompress(compressedData);
+ }
+}
diff --git a/java/src/main/java/com/goofy/GoofyFiles/controller/api/ChunkingController.java b/java/src/main/java/com/goofy/GoofyFiles/controller/api/ChunkingController.java
new file mode 100644
index 0000000..a6bb410
--- /dev/null
+++ b/java/src/main/java/com/goofy/GoofyFiles/controller/api/ChunkingController.java
@@ -0,0 +1,52 @@
+package com.goofy.GoofyFiles.controller.api;
+
+import java.io.File;
+import java.io.IOException;
+import java.nio.file.Files;
+import java.util.List;
+import java.util.Map;
+
+import org.springframework.http.ResponseEntity;
+import org.springframework.web.bind.annotation.PostMapping;
+import org.springframework.web.bind.annotation.RequestMapping;
+import org.springframework.web.bind.annotation.RequestParam;
+import org.springframework.web.bind.annotation.RestController;
+import org.springframework.web.multipart.MultipartFile;
+
+import com.goofy.GoofyFiles.chunking.Chunk;
+import com.goofy.GoofyFiles.chunking.ChunkingService;
+
+@RestController
+@RequestMapping("api/chunking")
+public class ChunkingController {
+
+ private final ChunkingService chunkingService;
+
+ public ChunkingController(ChunkingService chunkingService) {
+ this.chunkingService = chunkingService;
+ }
+ @PostMapping("/analyze")
+ public ResponseEntity> analyzeFile(@RequestParam("file") MultipartFile file) {
+ try {
+ File tempFile = File.createTempFile("upload-", "-" + file.getOriginalFilename());
+ file.transferTo(tempFile);
+
+ List chunks = chunkingService.chunkFile(tempFile);
+
+ Map stats = Map.of(
+ "fileName", file.getOriginalFilename(),
+ "originalSize", file.getSize(),
+ "numberOfChunks", chunks.size(),
+ "averageChunkSize", file.getSize() / chunks.size(),
+ "uniqueChunks", chunks.stream().map(Chunk::getHash).distinct().count()
+ );
+
+ Files.delete(tempFile.toPath());
+
+ return ResponseEntity.ok(stats);
+ } catch (IOException e) {
+ return ResponseEntity.internalServerError()
+ .body(Map.of("error", "Failed to process file: " + e.getMessage()));
+ }
+ }
+}
diff --git a/java/src/test/java/com/goofy/GoofyFiles/chunking/ChunkingPerformanceTest.java b/java/src/test/java/com/goofy/GoofyFiles/chunking/ChunkingPerformanceTest.java
new file mode 100644
index 0000000..27437c8
--- /dev/null
+++ b/java/src/test/java/com/goofy/GoofyFiles/chunking/ChunkingPerformanceTest.java
@@ -0,0 +1,94 @@
+package com.goofy.GoofyFiles.chunking;
+
+import static org.junit.jupiter.api.Assertions.assertNotNull;
+import static org.junit.jupiter.api.Assertions.assertTrue;
+
+import java.io.File;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.security.NoSuchAlgorithmException;
+import java.util.List;
+import java.util.Random;
+
+import org.junit.jupiter.api.BeforeEach;
+import org.junit.jupiter.api.Test;
+import org.springframework.util.StopWatch;
+
+public class ChunkingPerformanceTest {
+
+ private ChunkingService chunkingService;
+ private static final int[] FILE_SIZES = {
+ 1 * 1024 * 1024, // 1 MB
+ 10 * 1024 * 1024, // 10 MB
+ 50 * 1024 * 1024 // 50 MB
+ };
+
+ @BeforeEach
+ void setUp() throws NoSuchAlgorithmException {
+ chunkingService = new ChunkingService();
+ }
+
+ @Test
+ void testChunkingPerformance() throws IOException {
+ System.out.println("\n=== Test de Performance du Chunking ===");
+ System.out.println("Format: Taille | Temps | Vitesse | Nb Chunks | Taille Moy | Duplication");
+ System.out.println("--------------------------------------------------------");
+
+ for (int fileSize : FILE_SIZES) {
+ File testFile = createTestFile(fileSize);
+
+ StopWatch stopWatch = new StopWatch();
+ stopWatch.start();
+ List chunks = chunkingService.chunkFile(testFile);
+ stopWatch.stop();
+
+ double timeInSeconds = stopWatch.getTotalTimeSeconds();
+ double speedMBps = (fileSize / (1024.0 * 1024.0)) / timeInSeconds;
+ double avgChunkSize = fileSize / (double) chunks.size();
+ long uniqueChunks = chunks.stream()
+ .map(Chunk::getHash)
+ .distinct()
+ .count();
+ double duplicationRate = 1.0 - ((double) uniqueChunks / chunks.size());
+
+ System.out.printf("%5.1f MB | %6.3f s | %6.1f MB/s | %8d | %7.0f B | %6.2f%%%n",
+ fileSize / (1024.0 * 1024.0),
+ timeInSeconds,
+ speedMBps,
+ chunks.size(),
+ avgChunkSize,
+ duplicationRate * 100);
+
+ testFile.delete();
+
+ assertNotNull(chunks);
+ assertTrue(chunks.size() > 0);
+ }
+ }
+
+ private File createTestFile(int size) throws IOException {
+ File file = File.createTempFile("perf-test-", ".dat");
+ file.deleteOnExit();
+
+ // Créer des données avec un certain degré de répétition
+ byte[] repeatingPattern = new byte[1024]; // 1KB pattern
+ new Random().nextBytes(repeatingPattern);
+
+ try (FileOutputStream fos = new FileOutputStream(file)) {
+ int written = 0;
+ while (written < size) {
+ // 70% chance d'écrire le pattern répétitif, 30% chance de données aléatoires
+ if (Math.random() < 0.7) {
+ fos.write(repeatingPattern);
+ } else {
+ byte[] randomData = new byte[1024];
+ new Random().nextBytes(randomData);
+ fos.write(randomData);
+ }
+ written += 1024;
+ }
+ }
+
+ return file;
+ }
+}