Skip to content
New issue

Have a question about this project? # for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “#”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? # to your account

Add encoding detection callback #2788

Merged
merged 4 commits into from
Nov 29, 2018
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions src/main/java/spoon/compiler/Environment.java
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@

import java.io.File;
import java.nio.charset.Charset;
import java.util.function.Function;
import java.util.function.Supplier;

/**
Expand Down Expand Up @@ -378,11 +379,21 @@ void report(Processor<?> processor, Level level,
*/
Charset getEncoding();

/**
* Get callback, which is used to detect encoding for each file separately
*/
Function<byte[], Charset> getEncodingDetectionCallback();

/**
* Set the encoding to use for parsing source code
*/
void setEncoding(Charset encoding);

/**
* Set callback, which is used to detect encoding for each file separately
*/
void setEncodingDetectionCallback(Function<byte[], Charset> callback);

/**
* Set the output type used for processing files
*/
Expand Down
13 changes: 13 additions & 0 deletions src/main/java/spoon/support/StandardEnvironment.java
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@
import java.util.List;
import java.util.Map;
import java.util.TreeMap;
import java.util.function.Function;
import java.util.function.Supplier;


Expand Down Expand Up @@ -100,6 +101,8 @@ public class StandardEnvironment implements Serializable, Environment {

private transient Charset encoding = Charset.defaultCharset();

private transient Function<byte[], Charset> encodingDetectionCallback;

private int complianceLevel = DEFAULT_CODE_COMPLIANCE_LEVEL;

private transient OutputDestinationHandler outputDestinationHandler = new DefaultOutputDestinationHandler(new File(Launcher.OUTPUTDIR), this);
Expand Down Expand Up @@ -582,11 +585,21 @@ public Charset getEncoding() {
return this.encoding;
}

@Override
public Function<byte[], Charset> getEncodingDetectionCallback() {
return encodingDetectionCallback;
}

@Override
public void setEncoding(Charset encoding) {
this.encoding = encoding;
}

@Override
public void setEncodingDetectionCallback(Function<byte[], Charset> encodingDetectionCallback) {
this.encodingDetectionCallback = encodingDetectionCallback;
}

@Override
public void setOutputType(OutputType outputType) {
this.outputType = outputType;
Expand Down
12 changes: 10 additions & 2 deletions src/main/java/spoon/support/compiler/jdt/FileCompilerConfig.java
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
package spoon.support.compiler.jdt;

import java.io.InputStream;
import java.nio.charset.Charset;
import java.util.ArrayList;
import java.util.List;

Expand Down Expand Up @@ -64,8 +65,15 @@ public void initializeCompiler(JDTBatchCompiler compiler) {

String fName = f.isActualFile() ? f.getPath() : f.getName();
inputStream = f.getContent();
char[] content = IOUtils.toCharArray(inputStream, jdtCompiler.getEnvironment().getEncoding());
cuList.add(new CompilationUnit(content, fName, jdtCompiler.getEnvironment().getEncoding().displayName()));
if (jdtCompiler.getEnvironment().getEncodingDetectionCallback() == null) {
char[] content = IOUtils.toCharArray(inputStream, jdtCompiler.getEnvironment().getEncoding());
cuList.add(new CompilationUnit(content, fName, jdtCompiler.getEnvironment().getEncoding().displayName()));
} else {
byte[] bytes = IOUtils.toByteArray(inputStream);
Charset encoding = jdtCompiler.getEnvironment().getEncodingDetectionCallback().apply(bytes);
char[] content = new String(bytes, encoding).toCharArray();
cuList.add(new CompilationUnit(content, fName, encoding.displayName()));
}
IOUtils.closeQuietly(inputStream);
}
} catch (Exception e) {
Expand Down
40 changes: 40 additions & 0 deletions src/test/java/spoon/test/compilationunit/TestCompilationUnit.java
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@
import org.apache.commons.lang3.StringUtils;
import org.junit.Test;
import spoon.Launcher;
import spoon.SpoonException;
import spoon.reflect.CtModel;
import spoon.reflect.cu.CompilationUnit;
import spoon.reflect.cu.SourcePosition;
import spoon.reflect.cu.position.BodyHolderSourcePosition;
Expand All @@ -42,12 +44,14 @@
import java.util.List;

import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertNotEquals;
import static org.junit.Assert.assertNotNull;
import static org.junit.Assert.assertNull;
import static org.junit.Assert.assertSame;
import static org.junit.Assert.assertTrue;
import static org.junit.Assert.fail;


/**
* Created by urli on 18/08/2017.
*/
Expand Down Expand Up @@ -267,4 +271,40 @@ public void visitCtCompilationUnit(CtCompilationUnit compilationUnit) {
}
}.scan(type.getFactory().getModel().getRootPackage());
}

private Charset detectEncodingDummy(byte[] fileBytes) {
if (fileBytes.length == 76) {
return Charset.forName("Cp1251");
} else if (fileBytes.length == 86) {
return Charset.forName("UTF-8");
}
throw new SpoonException("unexpected length");
}

@Test
public void testDifferentEncodings() throws Exception {
//contract: both utf-8 and cp1251 files in the same project should be handled properly
final Launcher launcher = new Launcher();
launcher.addInputResource("./src/test/resources/encodings/Cp1251.java");
launcher.addInputResource("./src/test/resources/encodings/Utf8.java");
launcher.getEnvironment().setEncodingDetectionCallback(this::detectEncodingDummy);
CtModel model = launcher.buildModel();

CtType<?> utf8Type = model.getAllTypes()
.stream()
.filter(t -> "Utf8".equals(t.getSimpleName()))
.findFirst()
.get();

CtType<?> cp1251Type = model.getAllTypes()
.stream()
.filter(t -> "Cp1251".equals(t.getSimpleName()))
.findFirst()
.get();

assertEquals("\"Привет мир\"", utf8Type.getField("s1").getAssignment().toString());
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I love this expected value :-)

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If you don't read Cyrillic, it means "Hello World" :)

assertEquals("\"Привет мир\"", cp1251Type.getField("s1").getAssignment().toString());
assertEquals(utf8Type.getField("s1"), cp1251Type.getField("s1"));
assertNotEquals(utf8Type.getField("s2"), cp1251Type.getField("s2"));
}
}
4 changes: 4 additions & 0 deletions src/test/resources/encodings/Cp1251.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
public class Cp1251 {
String s1 = "������ ���";
String s2 = "���"
}
4 changes: 4 additions & 0 deletions src/test/resources/encodings/Utf8.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
public class Utf8 {
String s1 = "Привет мир";
String s2 = "ГДЕ"
}