Skip to content
New issue

Have a question about this project? # for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “#”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? # to your account

Add encoding detection callback #2788

Merged
merged 4 commits into from
Nov 29, 2018
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions src/main/java/spoon/compiler/Environment.java
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@

import org.apache.log4j.Level;
import spoon.OutputType;
import spoon.compiler.builder.EncodingProvider;
import spoon.support.modelobs.FineModelChangeListener;
import spoon.processing.FileGenerator;
import spoon.processing.ProblemFixer;
Expand Down Expand Up @@ -378,11 +379,21 @@ void report(Processor<?> processor, Level level,
*/
Charset getEncoding();

/**
* Get encoding provider, which is used to detect encoding for each file separately
*/
EncodingProvider getEncodingProvider();

/**
* Set the encoding to use for parsing source code
*/
void setEncoding(Charset encoding);

/**
* Set encoding provider, which is used to detect encoding for each file separately
*/
void setEncodingProvider(EncodingProvider encodingProvider);

/**
* Set the output type used for processing files
*/
Expand Down
28 changes: 28 additions & 0 deletions src/main/java/spoon/compiler/SpoonFile.java
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,13 @@
*/
package spoon.compiler;

import org.apache.commons.io.IOUtils;
import spoon.SpoonException;

import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.nio.charset.Charset;

/**
* This interface represents files that can be used as resources for the Spoon
Expand All @@ -41,4 +47,26 @@ public interface SpoonFile extends SpoonResource {
* @return
*/
boolean isActualFile();

/**
* Gets the file content as a char array, considering encoding or encoding
* provider.
*/
default char[] getContentChars(Environment env) {
byte[] bytes;
try {
InputStream contentStream = getContent();
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The contentStream must be closed. The best is:

try (InputStream contentStream = getContent()) {

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ooops, my bad, thanks.

ByteArrayOutputStream outputStream = new ByteArrayOutputStream();
IOUtils.copy(contentStream, outputStream);
bytes = outputStream.toByteArray();
} catch (IOException e) {
throw new SpoonException(e);
}
if (env.getEncodingProvider() == null) {
return new String(bytes, env.getEncoding()).toCharArray();
} else {
Charset encoding = env.getEncodingProvider().detectEncoding(this, bytes);
return new String(bytes, encoding).toCharArray();
}
}
}
28 changes: 28 additions & 0 deletions src/main/java/spoon/compiler/builder/EncodingProvider.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
/**
* Copyright (C) 2006-2018 INRIA and contributors
* Spoon - http://spoon.gforge.inria.fr/
*
* This software is governed by the CeCILL-C License under French law and
* abiding by the rules of distribution of free software. You can use, modify
* and/or redistribute the software under the terms of the CeCILL-C license as
* circulated by CEA, CNRS and INRIA at http://www.cecill.info.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the CeCILL-C License for more details.
*
* The fact that you are presently reading this means that you have had
* knowledge of the CeCILL-C license and that you accept its terms.
*/
package spoon.compiler.builder;

import spoon.compiler.SpoonFile;
import java.nio.charset.Charset;

public interface EncodingProvider {

/**
* User-defined function, which is used to detect encoding for each file
*/
Charset detectEncoding(SpoonFile file, byte[] fileBytes);
}
13 changes: 13 additions & 0 deletions src/main/java/spoon/support/StandardEnvironment.java
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
import spoon.compiler.InvalidClassPathException;
import spoon.compiler.SpoonFile;
import spoon.compiler.SpoonFolder;
import spoon.compiler.builder.EncodingProvider;
import spoon.processing.FileGenerator;
import spoon.processing.ProblemFixer;
import spoon.processing.ProcessingManager;
Expand Down Expand Up @@ -100,6 +101,8 @@ public class StandardEnvironment implements Serializable, Environment {

private transient Charset encoding = Charset.defaultCharset();

private transient EncodingProvider encodingProvider;

private int complianceLevel = DEFAULT_CODE_COMPLIANCE_LEVEL;

private transient OutputDestinationHandler outputDestinationHandler = new DefaultOutputDestinationHandler(new File(Launcher.OUTPUTDIR), this);
Expand Down Expand Up @@ -582,11 +585,21 @@ public Charset getEncoding() {
return this.encoding;
}

@Override
public EncodingProvider getEncodingProvider() {
return encodingProvider;
}

@Override
public void setEncoding(Charset encoding) {
this.encoding = encoding;
}

@Override
public void setEncodingProvider(EncodingProvider encodingProvider) {
this.encodingProvider = encodingProvider;
}

@Override
public void setOutputType(OutputType outputType) {
this.outputType = outputType;
Expand Down
6 changes: 6 additions & 0 deletions src/main/java/spoon/support/compiler/VirtualFile.java
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
*/
package spoon.support.compiler;

import spoon.compiler.Environment;
import spoon.compiler.SpoonFile;
import spoon.compiler.SpoonFolder;

Expand Down Expand Up @@ -44,6 +45,11 @@ public InputStream getContent() {
return new ByteArrayInputStream(content.getBytes());
}

@Override
public char[] getContentChars(Environment env) {
return content.toCharArray();
}

@Override
public boolean isJava() {
return true;
Expand Down
26 changes: 8 additions & 18 deletions src/main/java/spoon/support/compiler/jdt/FileCompilerConfig.java
Original file line number Diff line number Diff line change
Expand Up @@ -16,15 +16,13 @@
*/
package spoon.support.compiler.jdt;

import java.io.InputStream;
import java.util.ArrayList;
import java.util.List;

import spoon.SpoonException;
import spoon.SpoonModelBuilder;
import spoon.compiler.Environment;
import spoon.compiler.SpoonFile;

import org.apache.commons.io.IOUtils;
import org.eclipse.jdt.internal.compiler.batch.CompilationUnit;

public class FileCompilerConfig implements SpoonModelBuilder.InputType {
Expand Down Expand Up @@ -53,24 +51,16 @@ public FileCompilerConfig(List<SpoonFile> files) {
public void initializeCompiler(JDTBatchCompiler compiler) {
JDTBasedSpoonCompiler jdtCompiler = compiler.getJdtCompiler();
List<CompilationUnit> cuList = new ArrayList<>();
InputStream inputStream = null;

try {
for (SpoonFile f : getFiles(compiler)) {
for (SpoonFile f : getFiles(compiler)) {

if (compiler.filesToBeIgnored.contains(f.getPath())) {
continue;
}

String fName = f.isActualFile() ? f.getPath() : f.getName();
inputStream = f.getContent();
char[] content = IOUtils.toCharArray(inputStream, jdtCompiler.getEnvironment().getEncoding());
cuList.add(new CompilationUnit(content, fName, jdtCompiler.getEnvironment().getEncoding().displayName()));
IOUtils.closeQuietly(inputStream);
if (compiler.filesToBeIgnored.contains(f.getPath())) {
continue;
}
} catch (Exception e) {
IOUtils.closeQuietly(inputStream);
throw new SpoonException(e);

String fName = f.isActualFile() ? f.getPath() : f.getName();
Environment env = jdtCompiler.getEnvironment();
cuList.add(new CompilationUnit(f.getContentChars(env), fName, env.getEncoding().displayName()));
}

compiler.setCompilationUnits(cuList.toArray(new CompilationUnit[0]));
Expand Down
2 changes: 1 addition & 1 deletion src/test/java/spoon/test/api/FileSystemFolderTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ public void testLauncherWithWrongPathAsInput() {
try {
spoon.buildModel();
} catch (SpoonException spe) {
Throwable containedException = spe.getCause().getCause();
Throwable containedException = spe.getCause();
assertTrue(containedException instanceof FileNotFoundException);
}
}
Expand Down
41 changes: 41 additions & 0 deletions src/test/java/spoon/test/compilationunit/TestCompilationUnit.java
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,9 @@
import org.apache.commons.lang3.StringUtils;
import org.junit.Test;
import spoon.Launcher;
import spoon.SpoonException;
import spoon.compiler.SpoonFile;
import spoon.reflect.CtModel;
import spoon.reflect.cu.CompilationUnit;
import spoon.reflect.cu.SourcePosition;
import spoon.reflect.cu.position.BodyHolderSourcePosition;
Expand All @@ -42,12 +45,14 @@
import java.util.List;

import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertNotEquals;
import static org.junit.Assert.assertNotNull;
import static org.junit.Assert.assertNull;
import static org.junit.Assert.assertSame;
import static org.junit.Assert.assertTrue;
import static org.junit.Assert.fail;


/**
* Created by urli on 18/08/2017.
*/
Expand Down Expand Up @@ -267,4 +272,40 @@ public void visitCtCompilationUnit(CtCompilationUnit compilationUnit) {
}
}.scan(type.getFactory().getModel().getRootPackage());
}

private Charset detectEncodingDummy(SpoonFile unused, byte[] fileBytes) {
if (fileBytes.length == 76) {
return Charset.forName("Cp1251");
} else if (fileBytes.length == 86) {
return Charset.forName("UTF-8");
}
throw new SpoonException("unexpected length");
}

@Test
public void testDifferentEncodings() throws Exception {
//contract: both utf-8 and cp1251 files in the same project should be handled properly
final Launcher launcher = new Launcher();
launcher.addInputResource("./src/test/resources/encodings/Cp1251.java");
launcher.addInputResource("./src/test/resources/encodings/Utf8.java");
launcher.getEnvironment().setEncodingProvider(this::detectEncodingDummy);
CtModel model = launcher.buildModel();

CtType<?> utf8Type = model.getAllTypes()
.stream()
.filter(t -> "Utf8".equals(t.getSimpleName()))
.findFirst()
.get();

CtType<?> cp1251Type = model.getAllTypes()
.stream()
.filter(t -> "Cp1251".equals(t.getSimpleName()))
.findFirst()
.get();

assertEquals("\"Привет мир\"", utf8Type.getField("s1").getAssignment().toString());
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I love this expected value :-)

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If you don't read Cyrillic, it means "Hello World" :)

assertEquals("\"Привет мир\"", cp1251Type.getField("s1").getAssignment().toString());
assertEquals(utf8Type.getField("s1"), cp1251Type.getField("s1"));
assertNotEquals(utf8Type.getField("s2"), cp1251Type.getField("s2"));
}
}
4 changes: 4 additions & 0 deletions src/test/resources/encodings/Cp1251.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
public class Cp1251 {
String s1 = "������ ���";
String s2 = "���"
}
4 changes: 4 additions & 0 deletions src/test/resources/encodings/Utf8.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
public class Utf8 {
String s1 = "Привет мир";
String s2 = "ГДЕ"
}