From 539d60c62d3a919cec8681d94876f11bacde7f36 Mon Sep 17 00:00:00 2001 From: Egor18 Date: Tue, 27 Nov 2018 17:37:09 +0300 Subject: [PATCH 1/4] add encoding detection callback --- src/main/java/spoon/compiler/Environment.java | 11 +++++ .../spoon/support/StandardEnvironment.java | 13 ++++++ .../compiler/jdt/FileCompilerConfig.java | 12 +++++- .../compilationunit/TestCompilationUnit.java | 40 +++++++++++++++++++ src/test/resources/encodings/Cp1251.java | 4 ++ src/test/resources/encodings/Utf8.java | 4 ++ 6 files changed, 82 insertions(+), 2 deletions(-) create mode 100644 src/test/resources/encodings/Cp1251.java create mode 100644 src/test/resources/encodings/Utf8.java diff --git a/src/main/java/spoon/compiler/Environment.java b/src/main/java/spoon/compiler/Environment.java index bed10f1470c..5e525338d41 100644 --- a/src/main/java/spoon/compiler/Environment.java +++ b/src/main/java/spoon/compiler/Environment.java @@ -34,6 +34,7 @@ import java.io.File; import java.nio.charset.Charset; +import java.util.function.Function; import java.util.function.Supplier; /** @@ -378,11 +379,21 @@ void report(Processor processor, Level level, */ Charset getEncoding(); + /** + * Get callback, which is used to detect encoding for each file separately + */ + Function getEncodingDetectionCallback(); + /** * Set the encoding to use for parsing source code */ void setEncoding(Charset encoding); + /** + * Set callback, which is used to detect encoding for each file separately + */ + void setEncodingDetectionCallback(Function callback); + /** * Set the output type used for processing files */ diff --git a/src/main/java/spoon/support/StandardEnvironment.java b/src/main/java/spoon/support/StandardEnvironment.java index ccc4474c6cb..88bb602518f 100644 --- a/src/main/java/spoon/support/StandardEnvironment.java +++ b/src/main/java/spoon/support/StandardEnvironment.java @@ -55,6 +55,7 @@ import java.util.List; import java.util.Map; import java.util.TreeMap; +import java.util.function.Function; import java.util.function.Supplier; @@ -100,6 +101,8 @@ public class StandardEnvironment implements Serializable, Environment { private transient Charset encoding = Charset.defaultCharset(); + private transient Function encodingDetectionCallback; + private int complianceLevel = DEFAULT_CODE_COMPLIANCE_LEVEL; private transient OutputDestinationHandler outputDestinationHandler = new DefaultOutputDestinationHandler(new File(Launcher.OUTPUTDIR), this); @@ -582,11 +585,21 @@ public Charset getEncoding() { return this.encoding; } + @Override + public Function getEncodingDetectionCallback() { + return encodingDetectionCallback; + } + @Override public void setEncoding(Charset encoding) { this.encoding = encoding; } + @Override + public void setEncodingDetectionCallback(Function encodingDetectionCallback) { + this.encodingDetectionCallback = encodingDetectionCallback; + } + @Override public void setOutputType(OutputType outputType) { this.outputType = outputType; diff --git a/src/main/java/spoon/support/compiler/jdt/FileCompilerConfig.java b/src/main/java/spoon/support/compiler/jdt/FileCompilerConfig.java index b74071a4dfe..e77c44a3257 100644 --- a/src/main/java/spoon/support/compiler/jdt/FileCompilerConfig.java +++ b/src/main/java/spoon/support/compiler/jdt/FileCompilerConfig.java @@ -17,6 +17,7 @@ package spoon.support.compiler.jdt; import java.io.InputStream; +import java.nio.charset.Charset; import java.util.ArrayList; import java.util.List; @@ -64,8 +65,15 @@ public void initializeCompiler(JDTBatchCompiler compiler) { String fName = f.isActualFile() ? f.getPath() : f.getName(); inputStream = f.getContent(); - char[] content = IOUtils.toCharArray(inputStream, jdtCompiler.getEnvironment().getEncoding()); - cuList.add(new CompilationUnit(content, fName, jdtCompiler.getEnvironment().getEncoding().displayName())); + if (jdtCompiler.getEnvironment().getEncodingDetectionCallback() == null) { + char[] content = IOUtils.toCharArray(inputStream, jdtCompiler.getEnvironment().getEncoding()); + cuList.add(new CompilationUnit(content, fName, jdtCompiler.getEnvironment().getEncoding().displayName())); + } else { + byte[] bytes = IOUtils.toByteArray(inputStream); + Charset encoding = jdtCompiler.getEnvironment().getEncodingDetectionCallback().apply(bytes); + char[] content = new String(bytes, encoding).toCharArray(); + cuList.add(new CompilationUnit(content, fName, encoding.displayName())); + } IOUtils.closeQuietly(inputStream); } } catch (Exception e) { diff --git a/src/test/java/spoon/test/compilationunit/TestCompilationUnit.java b/src/test/java/spoon/test/compilationunit/TestCompilationUnit.java index 3c3029f3ab9..8d95f52cc31 100644 --- a/src/test/java/spoon/test/compilationunit/TestCompilationUnit.java +++ b/src/test/java/spoon/test/compilationunit/TestCompilationUnit.java @@ -19,6 +19,8 @@ import org.apache.commons.lang3.StringUtils; import org.junit.Test; import spoon.Launcher; +import spoon.SpoonException; +import spoon.reflect.CtModel; import spoon.reflect.cu.CompilationUnit; import spoon.reflect.cu.SourcePosition; import spoon.reflect.cu.position.BodyHolderSourcePosition; @@ -42,12 +44,14 @@ import java.util.List; import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNotEquals; import static org.junit.Assert.assertNotNull; import static org.junit.Assert.assertNull; import static org.junit.Assert.assertSame; import static org.junit.Assert.assertTrue; import static org.junit.Assert.fail; + /** * Created by urli on 18/08/2017. */ @@ -267,4 +271,40 @@ public void visitCtCompilationUnit(CtCompilationUnit compilationUnit) { } }.scan(type.getFactory().getModel().getRootPackage()); } + + private Charset detectEncodingDummy(byte[] fileBytes) { + if (fileBytes.length == 76) { + return Charset.forName("Cp1251"); + } else if (fileBytes.length == 86) { + return Charset.forName("UTF-8"); + } + throw new SpoonException("unexpected length"); + } + + @Test + public void testDifferentEncodings() throws Exception { + //contract: both utf-8 and cp1251 files in the same project should be handled properly + final Launcher launcher = new Launcher(); + launcher.addInputResource("./src/test/resources/encodings/Cp1251.java"); + launcher.addInputResource("./src/test/resources/encodings/Utf8.java"); + launcher.getEnvironment().setEncodingDetectionCallback(this::detectEncodingDummy); + CtModel model = launcher.buildModel(); + + CtType utf8Type = model.getAllTypes() + .stream() + .filter(t -> "Utf8".equals(t.getSimpleName())) + .findFirst() + .get(); + + CtType cp1251Type = model.getAllTypes() + .stream() + .filter(t -> "Cp1251".equals(t.getSimpleName())) + .findFirst() + .get(); + + assertEquals("\"Привет мир\"", utf8Type.getField("s1").getAssignment().toString()); + assertEquals("\"Привет мир\"", cp1251Type.getField("s1").getAssignment().toString()); + assertEquals(utf8Type.getField("s1"), cp1251Type.getField("s1")); + assertNotEquals(utf8Type.getField("s2"), cp1251Type.getField("s2")); + } } diff --git a/src/test/resources/encodings/Cp1251.java b/src/test/resources/encodings/Cp1251.java new file mode 100644 index 00000000000..ef7c5e155e9 --- /dev/null +++ b/src/test/resources/encodings/Cp1251.java @@ -0,0 +1,4 @@ +public class Cp1251 { + String s1 = "Ïðèâåò ìèð"; + String s2 = "ÀÁÂ" +} diff --git a/src/test/resources/encodings/Utf8.java b/src/test/resources/encodings/Utf8.java new file mode 100644 index 00000000000..36332192315 --- /dev/null +++ b/src/test/resources/encodings/Utf8.java @@ -0,0 +1,4 @@ +public class Utf8 { + String s1 = "Привет мир"; + String s2 = "ГДЕ" +} From 0ce2e1c7da63244717541af17c4313d8eaf9142c Mon Sep 17 00:00:00 2001 From: Egor18 Date: Thu, 29 Nov 2018 11:28:15 +0300 Subject: [PATCH 2/4] refactor api --- src/main/java/spoon/compiler/Environment.java | 10 +++--- src/main/java/spoon/compiler/SpoonFile.java | 25 ++++++++++++++ .../compiler/builder/EncodingProvider.java | 28 +++++++++++++++ .../spoon/support/StandardEnvironment.java | 12 +++---- .../spoon/support/compiler/VirtualFile.java | 13 +++++++ .../compiler/jdt/FileCompilerConfig.java | 34 +++++-------------- .../spoon/test/api/FileSystemFolderTest.java | 2 +- .../compilationunit/TestCompilationUnit.java | 5 +-- 8 files changed, 89 insertions(+), 40 deletions(-) create mode 100644 src/main/java/spoon/compiler/builder/EncodingProvider.java diff --git a/src/main/java/spoon/compiler/Environment.java b/src/main/java/spoon/compiler/Environment.java index 5e525338d41..d36de910d50 100644 --- a/src/main/java/spoon/compiler/Environment.java +++ b/src/main/java/spoon/compiler/Environment.java @@ -18,6 +18,7 @@ import org.apache.log4j.Level; import spoon.OutputType; +import spoon.compiler.builder.EncodingProvider; import spoon.support.modelobs.FineModelChangeListener; import spoon.processing.FileGenerator; import spoon.processing.ProblemFixer; @@ -34,7 +35,6 @@ import java.io.File; import java.nio.charset.Charset; -import java.util.function.Function; import java.util.function.Supplier; /** @@ -380,9 +380,9 @@ void report(Processor processor, Level level, Charset getEncoding(); /** - * Get callback, which is used to detect encoding for each file separately + * Get encoding provider, which is used to detect encoding for each file separately */ - Function getEncodingDetectionCallback(); + EncodingProvider getEncodingProvider(); /** * Set the encoding to use for parsing source code @@ -390,9 +390,9 @@ void report(Processor processor, Level level, void setEncoding(Charset encoding); /** - * Set callback, which is used to detect encoding for each file separately + * Set encoding provider, which is used to detect encoding for each file separately */ - void setEncodingDetectionCallback(Function callback); + void setEncodingProvider(EncodingProvider encodingProvider); /** * Set the output type used for processing files diff --git a/src/main/java/spoon/compiler/SpoonFile.java b/src/main/java/spoon/compiler/SpoonFile.java index 7ca3fcb714d..75b1c4767ac 100644 --- a/src/main/java/spoon/compiler/SpoonFile.java +++ b/src/main/java/spoon/compiler/SpoonFile.java @@ -16,7 +16,11 @@ */ package spoon.compiler; +import spoon.SpoonException; + +import java.io.IOException; import java.io.InputStream; +import java.nio.charset.Charset; /** * This interface represents files that can be used as resources for the Spoon @@ -41,4 +45,25 @@ public interface SpoonFile extends SpoonResource { * @return */ boolean isActualFile(); + + /** + * Gets the file content as a char array, considering encoding or encoding + * provider. + */ + default char[] getContentChars(Environment env) { + byte[] bytes; + try { + InputStream contentStream = getContent(); + bytes = new byte[contentStream.available()]; + contentStream.read(bytes); + } catch (IOException e) { + throw new SpoonException(e); + } + if (env.getEncodingProvider() == null) { + return new String(bytes, env.getEncoding()).toCharArray(); + } else { + Charset encoding = env.getEncodingProvider().detectEncoding(this, bytes); + return new String(bytes, encoding).toCharArray(); + } + } } diff --git a/src/main/java/spoon/compiler/builder/EncodingProvider.java b/src/main/java/spoon/compiler/builder/EncodingProvider.java new file mode 100644 index 00000000000..0965bdbe13b --- /dev/null +++ b/src/main/java/spoon/compiler/builder/EncodingProvider.java @@ -0,0 +1,28 @@ +/** + * Copyright (C) 2006-2018 INRIA and contributors + * Spoon - http://spoon.gforge.inria.fr/ + * + * This software is governed by the CeCILL-C License under French law and + * abiding by the rules of distribution of free software. You can use, modify + * and/or redistribute the software under the terms of the CeCILL-C license as + * circulated by CEA, CNRS and INRIA at http://www.cecill.info. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the CeCILL-C License for more details. + * + * The fact that you are presently reading this means that you have had + * knowledge of the CeCILL-C license and that you accept its terms. + */ +package spoon.compiler.builder; + +import spoon.compiler.SpoonFile; +import java.nio.charset.Charset; + +public interface EncodingProvider { + + /** + * User-defined function, which is used to detect encoding for each file + */ + Charset detectEncoding(SpoonFile file, byte[] fileBytes); +} diff --git a/src/main/java/spoon/support/StandardEnvironment.java b/src/main/java/spoon/support/StandardEnvironment.java index 88bb602518f..7b0bb031273 100644 --- a/src/main/java/spoon/support/StandardEnvironment.java +++ b/src/main/java/spoon/support/StandardEnvironment.java @@ -26,6 +26,7 @@ import spoon.compiler.InvalidClassPathException; import spoon.compiler.SpoonFile; import spoon.compiler.SpoonFolder; +import spoon.compiler.builder.EncodingProvider; import spoon.processing.FileGenerator; import spoon.processing.ProblemFixer; import spoon.processing.ProcessingManager; @@ -55,7 +56,6 @@ import java.util.List; import java.util.Map; import java.util.TreeMap; -import java.util.function.Function; import java.util.function.Supplier; @@ -101,7 +101,7 @@ public class StandardEnvironment implements Serializable, Environment { private transient Charset encoding = Charset.defaultCharset(); - private transient Function encodingDetectionCallback; + private transient EncodingProvider encodingProvider; private int complianceLevel = DEFAULT_CODE_COMPLIANCE_LEVEL; @@ -586,8 +586,8 @@ public Charset getEncoding() { } @Override - public Function getEncodingDetectionCallback() { - return encodingDetectionCallback; + public EncodingProvider getEncodingProvider() { + return encodingProvider; } @Override @@ -596,8 +596,8 @@ public void setEncoding(Charset encoding) { } @Override - public void setEncodingDetectionCallback(Function encodingDetectionCallback) { - this.encodingDetectionCallback = encodingDetectionCallback; + public void setEncodingProvider(EncodingProvider encodingProvider) { + this.encodingProvider = encodingProvider; } @Override diff --git a/src/main/java/spoon/support/compiler/VirtualFile.java b/src/main/java/spoon/support/compiler/VirtualFile.java index 556362fbc6a..80c23b6cfcb 100644 --- a/src/main/java/spoon/support/compiler/VirtualFile.java +++ b/src/main/java/spoon/support/compiler/VirtualFile.java @@ -16,12 +16,14 @@ */ package spoon.support.compiler; +import spoon.compiler.Environment; import spoon.compiler.SpoonFile; import spoon.compiler.SpoonFolder; import java.io.ByteArrayInputStream; import java.io.File; import java.io.InputStream; +import java.nio.charset.Charset; public class VirtualFile implements SpoonFile { public static final String VIRTUAL_FILE_NAME = "virtual_file"; @@ -44,6 +46,17 @@ public InputStream getContent() { return new ByteArrayInputStream(content.getBytes()); } + @Override + public char[] getContentChars(Environment env) { + byte[] bytes = content.getBytes(); + if (env.getEncodingProvider() == null) { + return new String(bytes, env.getEncoding()).toCharArray(); + } else { + Charset encoding = env.getEncodingProvider().detectEncoding(this, bytes); + return new String(bytes, encoding).toCharArray(); + } + } + @Override public boolean isJava() { return true; diff --git a/src/main/java/spoon/support/compiler/jdt/FileCompilerConfig.java b/src/main/java/spoon/support/compiler/jdt/FileCompilerConfig.java index e77c44a3257..09e883148c3 100644 --- a/src/main/java/spoon/support/compiler/jdt/FileCompilerConfig.java +++ b/src/main/java/spoon/support/compiler/jdt/FileCompilerConfig.java @@ -16,16 +16,13 @@ */ package spoon.support.compiler.jdt; -import java.io.InputStream; -import java.nio.charset.Charset; import java.util.ArrayList; import java.util.List; -import spoon.SpoonException; import spoon.SpoonModelBuilder; +import spoon.compiler.Environment; import spoon.compiler.SpoonFile; -import org.apache.commons.io.IOUtils; import org.eclipse.jdt.internal.compiler.batch.CompilationUnit; public class FileCompilerConfig implements SpoonModelBuilder.InputType { @@ -54,31 +51,16 @@ public FileCompilerConfig(List files) { public void initializeCompiler(JDTBatchCompiler compiler) { JDTBasedSpoonCompiler jdtCompiler = compiler.getJdtCompiler(); List cuList = new ArrayList<>(); - InputStream inputStream = null; - try { - for (SpoonFile f : getFiles(compiler)) { + for (SpoonFile f : getFiles(compiler)) { - if (compiler.filesToBeIgnored.contains(f.getPath())) { - continue; - } - - String fName = f.isActualFile() ? f.getPath() : f.getName(); - inputStream = f.getContent(); - if (jdtCompiler.getEnvironment().getEncodingDetectionCallback() == null) { - char[] content = IOUtils.toCharArray(inputStream, jdtCompiler.getEnvironment().getEncoding()); - cuList.add(new CompilationUnit(content, fName, jdtCompiler.getEnvironment().getEncoding().displayName())); - } else { - byte[] bytes = IOUtils.toByteArray(inputStream); - Charset encoding = jdtCompiler.getEnvironment().getEncodingDetectionCallback().apply(bytes); - char[] content = new String(bytes, encoding).toCharArray(); - cuList.add(new CompilationUnit(content, fName, encoding.displayName())); - } - IOUtils.closeQuietly(inputStream); + if (compiler.filesToBeIgnored.contains(f.getPath())) { + continue; } - } catch (Exception e) { - IOUtils.closeQuietly(inputStream); - throw new SpoonException(e); + + String fName = f.isActualFile() ? f.getPath() : f.getName(); + Environment env = jdtCompiler.getEnvironment(); + cuList.add(new CompilationUnit(f.getContentChars(env), fName, env.getEncoding().displayName())); } compiler.setCompilationUnits(cuList.toArray(new CompilationUnit[0])); diff --git a/src/test/java/spoon/test/api/FileSystemFolderTest.java b/src/test/java/spoon/test/api/FileSystemFolderTest.java index b061dc2303c..7dff1cc3cbb 100644 --- a/src/test/java/spoon/test/api/FileSystemFolderTest.java +++ b/src/test/java/spoon/test/api/FileSystemFolderTest.java @@ -46,7 +46,7 @@ public void testLauncherWithWrongPathAsInput() { try { spoon.buildModel(); } catch (SpoonException spe) { - Throwable containedException = spe.getCause().getCause(); + Throwable containedException = spe.getCause(); assertTrue(containedException instanceof FileNotFoundException); } } diff --git a/src/test/java/spoon/test/compilationunit/TestCompilationUnit.java b/src/test/java/spoon/test/compilationunit/TestCompilationUnit.java index 8d95f52cc31..350c9d32072 100644 --- a/src/test/java/spoon/test/compilationunit/TestCompilationUnit.java +++ b/src/test/java/spoon/test/compilationunit/TestCompilationUnit.java @@ -20,6 +20,7 @@ import org.junit.Test; import spoon.Launcher; import spoon.SpoonException; +import spoon.compiler.SpoonFile; import spoon.reflect.CtModel; import spoon.reflect.cu.CompilationUnit; import spoon.reflect.cu.SourcePosition; @@ -272,7 +273,7 @@ public void visitCtCompilationUnit(CtCompilationUnit compilationUnit) { }.scan(type.getFactory().getModel().getRootPackage()); } - private Charset detectEncodingDummy(byte[] fileBytes) { + private Charset detectEncodingDummy(SpoonFile unused, byte[] fileBytes) { if (fileBytes.length == 76) { return Charset.forName("Cp1251"); } else if (fileBytes.length == 86) { @@ -287,7 +288,7 @@ public void testDifferentEncodings() throws Exception { final Launcher launcher = new Launcher(); launcher.addInputResource("./src/test/resources/encodings/Cp1251.java"); launcher.addInputResource("./src/test/resources/encodings/Utf8.java"); - launcher.getEnvironment().setEncodingDetectionCallback(this::detectEncodingDummy); + launcher.getEnvironment().setEncodingProvider(this::detectEncodingDummy); CtModel model = launcher.buildModel(); CtType utf8Type = model.getAllTypes() From 6eb5039adbfbf06f4e4a39befc768f805d13efde Mon Sep 17 00:00:00 2001 From: Egor18 Date: Thu, 29 Nov 2018 12:03:24 +0300 Subject: [PATCH 3/4] fix --- src/main/java/spoon/compiler/SpoonFile.java | 7 +++++-- src/main/java/spoon/support/compiler/VirtualFile.java | 9 +-------- 2 files changed, 6 insertions(+), 10 deletions(-) diff --git a/src/main/java/spoon/compiler/SpoonFile.java b/src/main/java/spoon/compiler/SpoonFile.java index 75b1c4767ac..b4f49d51a91 100644 --- a/src/main/java/spoon/compiler/SpoonFile.java +++ b/src/main/java/spoon/compiler/SpoonFile.java @@ -16,8 +16,10 @@ */ package spoon.compiler; +import org.apache.commons.io.IOUtils; import spoon.SpoonException; +import java.io.ByteArrayOutputStream; import java.io.IOException; import java.io.InputStream; import java.nio.charset.Charset; @@ -54,8 +56,9 @@ default char[] getContentChars(Environment env) { byte[] bytes; try { InputStream contentStream = getContent(); - bytes = new byte[contentStream.available()]; - contentStream.read(bytes); + ByteArrayOutputStream outputStream = new ByteArrayOutputStream(); + IOUtils.copy(contentStream, outputStream); + bytes = outputStream.toByteArray(); } catch (IOException e) { throw new SpoonException(e); } diff --git a/src/main/java/spoon/support/compiler/VirtualFile.java b/src/main/java/spoon/support/compiler/VirtualFile.java index 80c23b6cfcb..9696b9bf376 100644 --- a/src/main/java/spoon/support/compiler/VirtualFile.java +++ b/src/main/java/spoon/support/compiler/VirtualFile.java @@ -23,7 +23,6 @@ import java.io.ByteArrayInputStream; import java.io.File; import java.io.InputStream; -import java.nio.charset.Charset; public class VirtualFile implements SpoonFile { public static final String VIRTUAL_FILE_NAME = "virtual_file"; @@ -48,13 +47,7 @@ public InputStream getContent() { @Override public char[] getContentChars(Environment env) { - byte[] bytes = content.getBytes(); - if (env.getEncodingProvider() == null) { - return new String(bytes, env.getEncoding()).toCharArray(); - } else { - Charset encoding = env.getEncodingProvider().detectEncoding(this, bytes); - return new String(bytes, encoding).toCharArray(); - } + return content.toCharArray(); } @Override From 7b417f386ec125c9baa4afba15a28b90455ae2dc Mon Sep 17 00:00:00 2001 From: Egor18 Date: Thu, 29 Nov 2018 12:21:32 +0300 Subject: [PATCH 4/4] close contentStream --- src/main/java/spoon/compiler/SpoonFile.java | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/main/java/spoon/compiler/SpoonFile.java b/src/main/java/spoon/compiler/SpoonFile.java index b4f49d51a91..8b2eb86d4ba 100644 --- a/src/main/java/spoon/compiler/SpoonFile.java +++ b/src/main/java/spoon/compiler/SpoonFile.java @@ -54,8 +54,7 @@ public interface SpoonFile extends SpoonResource { */ default char[] getContentChars(Environment env) { byte[] bytes; - try { - InputStream contentStream = getContent(); + try (InputStream contentStream = getContent()) { ByteArrayOutputStream outputStream = new ByteArrayOutputStream(); IOUtils.copy(contentStream, outputStream); bytes = outputStream.toByteArray();