Skip to content
New issue

Have a question about this project? # for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “#”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? # to your account

add api chunking + test #3

Merged
merged 2 commits into from
Feb 17, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 15 additions & 0 deletions .idea/git_toolbox_prj.xml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

5 changes: 5 additions & 0 deletions .idea/jarRepositories.xml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 5 additions & 1 deletion Makefile
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
.PHONY: build up clean compile update
.PHONY: docker-test-perf build up clean compile update

build:
docker-compose build
Expand All @@ -17,3 +17,7 @@ update:
docker-compose exec app mvn clean package -f /source/pom.xml && \
docker-compose exec app cp /source/target/GoofyFiles-0.0.1-SNAPSHOT.jar /app/app.jar && \
docker-compose restart app

# Exécuter les tests de performance
docker-test-perf:
docker-compose exec app mvn test -f /source/pom.xml -Dtest=ChunkingPerformanceTest
32 changes: 31 additions & 1 deletion java/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,12 @@
<properties>
<java.version>17</java.version>
</properties>
<repositories>
<repository>
<id>jitpack.io</id>
<url>https://jitpack.io</url>
</repository>
</repositories>
<dependencies>
<dependency>
<groupId>org.springframework.boot</groupId>
Expand All @@ -54,7 +60,31 @@
<artifactId>spring-boot-devtools</artifactId>
<optional>true</optional>
</dependency>

<dependency>
<groupId>org.lz4</groupId>
<artifactId>lz4-java</artifactId>
<version>1.8.0</version>
</dependency>
<dependency>
<groupId>com.github.luben</groupId>
<artifactId>zstd-jni</artifactId>
<version>1.5.6-10</version>
</dependency>
<dependency>
<groupId>org.xerial.snappy</groupId>
<artifactId>snappy-java</artifactId>
<version>1.1.10.7</version>
</dependency>
<dependency>
<groupId>com.github.themadcreator</groupId>
<artifactId>rabinfingerprint</artifactId>
<version>master</version>
</dependency>
<dependency>
<groupId>org.bouncycastle</groupId>
<artifactId>bcprov-jdk18on</artifactId>
<version>1.77</version>
</dependency>
</dependencies>

<build>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,6 @@ public static void main(String[] args) {
// Mapping de la racine pour afficher "Hello, World!"
@GetMapping("/")
public String helloWorld() {
return "Hello, World!5";
return "Hello, World!";
}
}
33 changes: 33 additions & 0 deletions java/src/main/java/com/goofy/GoofyFiles/chunking/Chunk.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
package com.goofy.GoofyFiles.chunking;

import java.util.Arrays;

public class Chunk {
private final byte[] data;
private final String hash;
private final int originalSize;
private final int position;

public Chunk(byte[] data, String hash, int position) {
this.data = Arrays.copyOf(data, data.length);
this.hash = hash;
this.originalSize = data.length;
this.position = position;
}

public byte[] getData() {
return Arrays.copyOf(data, data.length);
}

public String getHash() {
return hash;
}

public int getOriginalSize() {
return originalSize;
}

public int getPosition() {
return position;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
package com.goofy.GoofyFiles.chunking;

import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException;
import java.util.ArrayList;
import java.util.List;

import org.springframework.stereotype.Service;

@Service
public class ChunkingService {
private static final int MIN_CHUNK_SIZE = 4 * 1024; // 4KB
private static final int MAX_CHUNK_SIZE = 64 * 1024; // 64KB
private static final String HASH_ALGORITHM = "SHA-256";

private final RabinKarp rabinKarp;
private final MessageDigest messageDigest;

public ChunkingService() throws NoSuchAlgorithmException {
this.rabinKarp = new RabinKarp();
this.messageDigest = MessageDigest.getInstance(HASH_ALGORITHM);
}

public List<Chunk> chunkFile(File file) throws IOException {
List<Chunk> chunks = new ArrayList<>();
try (FileInputStream fis = new FileInputStream(file)) {
ByteArrayOutputStream currentChunk = new ByteArrayOutputStream();
int currentPosition = 0;
int b;
int currentChunkSize = 0;

while ((b = fis.read()) != -1) {
currentChunk.write(b);
currentChunkSize++;

if (currentChunkSize >= MIN_CHUNK_SIZE && rabinKarp.pushByte((byte) b) ||
currentChunkSize >= MAX_CHUNK_SIZE) {

// Créer un nouveau chunk
byte[] chunkData = currentChunk.toByteArray();
String hash = calculateHash(chunkData);
chunks.add(new Chunk(chunkData, hash, currentPosition));

// Réinitialiser pour le prochain chunk
currentPosition += currentChunkSize;
currentChunkSize = 0;
currentChunk.reset();
rabinKarp.reset();
}
}

// Traiter le dernier chunk s'il reste des données
if (currentChunkSize > 0) {
byte[] chunkData = currentChunk.toByteArray();
String hash = calculateHash(chunkData);
chunks.add(new Chunk(chunkData, hash, currentPosition));
}
}

return chunks;
}

private String calculateHash(byte[] data) {
messageDigest.reset();
byte[] hash = messageDigest.digest(data);
StringBuilder hexString = new StringBuilder();
for (byte b : hash) {
String hex = Integer.toHexString(0xff & b);
if (hex.length() == 1) hexString.append('0');
hexString.append(hex);
}
return hexString.toString();
}
}
33 changes: 33 additions & 0 deletions java/src/main/java/com/goofy/GoofyFiles/chunking/RabinKarp.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
package com.goofy.GoofyFiles.chunking;

import org.rabinfingerprint.fingerprint.RabinFingerprintLong;
import org.rabinfingerprint.polynomial.Polynomial;

public class RabinKarp {
private static final int WINDOW_SIZE = 48;
private static final int MASK = (1 << 13) - 1; // Pour une taille de chunk moyenne de 8KB

private final RabinFingerprintLong fingerprint;
private int windowPos;

public RabinKarp() {
this.windowPos = 0;
// Utiliser un polynôme irréductible pour Rabin fingerprint
Polynomial polynomial = Polynomial.createFromLong(9223372036854775783L);
this.fingerprint = new RabinFingerprintLong(polynomial);
}

public void reset() {
this.windowPos = 0;
this.fingerprint.reset();
}

public boolean pushByte(byte b) {
// Ajouter le nouveau byte
fingerprint.pushByte(b);
windowPos++;

// Vérifier si c'est un point de coupure
return windowPos >= WINDOW_SIZE && (fingerprint.getFingerprintLong() & MASK) == 0;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
package com.goofy.GoofyFiles.compression;

import org.springframework.stereotype.Service;
import org.xerial.snappy.Snappy;

import com.github.luben.zstd.Zstd;

import net.jpountz.lz4.LZ4Compressor;
import net.jpountz.lz4.LZ4Factory;
import net.jpountz.lz4.LZ4FastDecompressor;

@Service
public class CompressionService {

private final LZ4Factory lz4Factory;
private final LZ4Compressor lz4Compressor;
private final LZ4FastDecompressor lz4Decompressor;

public enum CompressionType {
LZ4,
ZSTD,
SNAPPY
}

public CompressionService() {
this.lz4Factory = LZ4Factory.fastestInstance();
this.lz4Compressor = lz4Factory.fastCompressor();
this.lz4Decompressor = lz4Factory.fastDecompressor();
}

public byte[] compress(byte[] data, CompressionType type) {
try {
return switch (type) {
case LZ4 -> compressLZ4(data);
case ZSTD -> compressZstd(data);
case SNAPPY -> compressSnappy(data);
};
} catch (Exception e) {
throw new RuntimeException("Compression failed", e);
}
}

public byte[] decompress(byte[] compressedData, CompressionType type, int originalLength) {
try {
return switch (type) {
case LZ4 -> decompressLZ4(compressedData, originalLength);
case ZSTD -> decompressZstd(compressedData);
case SNAPPY -> decompressSnappy(compressedData);
};
} catch (Exception e) {
throw new RuntimeException("Decompression failed", e);
}
}

private byte[] compressLZ4(byte[] data) {
return lz4Compressor.compress(data);
}

private byte[] decompressLZ4(byte[] compressedData, int originalLength) {
return lz4Decompressor.decompress(compressedData, originalLength);
}

private byte[] compressZstd(byte[] data) {
return Zstd.compress(data);
}

private byte[] decompressZstd(byte[] compressedData) {
long originalSize = Zstd.getFrameContentSize(compressedData);
return Zstd.decompress(compressedData, (int) originalSize);
}

private byte[] compressSnappy(byte[] data) throws Exception {
return Snappy.compress(data);
}

private byte[] decompressSnappy(byte[] compressedData) throws Exception {
return Snappy.uncompress(compressedData);
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
package com.goofy.GoofyFiles.controller.api;

import java.io.File;
import java.io.IOException;
import java.nio.file.Files;
import java.util.List;
import java.util.Map;

import org.springframework.http.ResponseEntity;
import org.springframework.web.bind.annotation.PostMapping;
import org.springframework.web.bind.annotation.RequestMapping;
import org.springframework.web.bind.annotation.RequestParam;
import org.springframework.web.bind.annotation.RestController;
import org.springframework.web.multipart.MultipartFile;

import com.goofy.GoofyFiles.chunking.Chunk;
import com.goofy.GoofyFiles.chunking.ChunkingService;

@RestController
@RequestMapping("api/chunking")
public class ChunkingController {

private final ChunkingService chunkingService;

public ChunkingController(ChunkingService chunkingService) {
this.chunkingService = chunkingService;
}
@PostMapping("/analyze")
public ResponseEntity<?> analyzeFile(@RequestParam("file") MultipartFile file) {
try {
File tempFile = File.createTempFile("upload-", "-" + file.getOriginalFilename());
file.transferTo(tempFile);

List<Chunk> chunks = chunkingService.chunkFile(tempFile);

Map<String, Object> stats = Map.of(
"fileName", file.getOriginalFilename(),
"originalSize", file.getSize(),
"numberOfChunks", chunks.size(),
"averageChunkSize", file.getSize() / chunks.size(),
"uniqueChunks", chunks.stream().map(Chunk::getHash).distinct().count()
);

Files.delete(tempFile.toPath());

return ResponseEntity.ok(stats);
} catch (IOException e) {
return ResponseEntity.internalServerError()
.body(Map.of("error", "Failed to process file: " + e.getMessage()));
}
}
}
Loading