From 3ef37f92ef1b794e75628ede283ab519ad543626 Mon Sep 17 00:00:00 2001 From: Viraj Rane Date: Sun, 29 Nov 2020 17:57:42 +0530 Subject: [PATCH] Bugfix: non-english characters encoding (fixed support for UTF-8) (#3) --- core/pom.xml | 6 + .../core/servlets/ExcelToJSONServlet.java | 428 ++++++++++-------- .../main/content/META-INF/vault/filter.xml | 2 +- .../content/jcr_root/content/.content.xml | 25 - .../jcr_root/content/dam/json/.content.xml | 8 + .../content/{ => dam}/json/_rep_policy.xml | 0 .../jcr_root/content/json/.content.xml | 4 - 7 files changed, 254 insertions(+), 219 deletions(-) delete mode 100644 ui.apps/src/main/content/jcr_root/content/.content.xml create mode 100644 ui.apps/src/main/content/jcr_root/content/dam/json/.content.xml rename ui.apps/src/main/content/jcr_root/content/{ => dam}/json/_rep_policy.xml (100%) delete mode 100644 ui.apps/src/main/content/jcr_root/content/json/.content.xml diff --git a/core/pom.xml b/core/pom.xml index ff64e45..14fb507 100644 --- a/core/pom.xml +++ b/core/pom.xml @@ -102,6 +102,12 @@ poi-ooxml 4.1.0 + + + org.apache.commons + commons-lang3 + 3.6 + org.json diff --git a/core/src/main/java/com/excel/json/core/servlets/ExcelToJSONServlet.java b/core/src/main/java/com/excel/json/core/servlets/ExcelToJSONServlet.java index c498258..4329a16 100644 --- a/core/src/main/java/com/excel/json/core/servlets/ExcelToJSONServlet.java +++ b/core/src/main/java/com/excel/json/core/servlets/ExcelToJSONServlet.java @@ -1,207 +1,257 @@ package com.excel.json.core.servlets; -import java.io.ByteArrayInputStream; -import java.io.IOException; -import java.io.InputStream; -import java.util.ArrayList; -import java.util.Calendar; -import java.util.HashMap; -import java.util.Iterator; -import java.util.Map; -import org.json.JSONArray; - -import javax.jcr.Node; -import javax.jcr.NodeIterator; -import javax.jcr.PathNotFoundException; -import javax.jcr.RepositoryException; -import javax.jcr.Session; -import javax.servlet.Servlet; -import javax.servlet.ServletException; - -import org.apache.poi.ss.usermodel.Workbook; +import com.day.cq.dam.api.Asset; +import com.day.cq.dam.api.AssetManager; +import org.apache.commons.fileupload.servlet.ServletFileUpload; +import org.apache.commons.io.FilenameUtils; +import org.apache.commons.lang3.StringUtils; import org.apache.poi.hssf.usermodel.HSSFWorkbook; -import org.apache.poi.xssf.usermodel.XSSFWorkbook; -import org.apache.poi.ss.usermodel.Sheet; -import org.apache.poi.ss.usermodel.Row; import org.apache.poi.ss.usermodel.Cell; +import org.apache.poi.ss.usermodel.Row; +import org.apache.poi.ss.usermodel.Sheet; +import org.apache.poi.ss.usermodel.Workbook; +import org.apache.poi.xssf.usermodel.XSSFWorkbook; import org.apache.sling.api.SlingHttpServletRequest; import org.apache.sling.api.SlingHttpServletResponse; -import org.apache.sling.api.servlets.HttpConstants; -import org.apache.sling.api.servlets.SlingAllMethodsServlet; -import org.apache.commons.fileupload.servlet.ServletFileUpload; -import org.apache.commons.io.FilenameUtils; import org.apache.sling.api.request.RequestParameter; import org.apache.sling.api.resource.LoginException; import org.apache.sling.api.resource.ResourceResolver; import org.apache.sling.api.resource.ResourceResolverFactory; +import org.apache.sling.api.servlets.HttpConstants; +import org.apache.sling.api.servlets.SlingAllMethodsServlet; +import org.json.JSONArray; import org.osgi.framework.Constants; import org.osgi.service.component.annotations.Component; import org.osgi.service.component.annotations.Reference; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -@Component(service = Servlet.class, property = { Constants.SERVICE_DESCRIPTION + "=Excel to JSON Converter Servlet", - "sling.servlet.methods=" + HttpConstants.METHOD_POST, "sling.servlet.paths=" + "/apps/get/json/from/xls" }) +import javax.servlet.Servlet; +import java.io.ByteArrayInputStream; +import java.io.IOException; +import java.io.InputStream; +import java.nio.charset.StandardCharsets; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.Iterator; +import java.util.Map; +import java.util.Objects; + +/** + * @author https://github.com/vsr061 + * This servlet parses an Excel file table stored in a first sheet to JSON array + * where each object in the array represents a row in the Excel file and object + * member keys are the table header values. + * ___________________________________________ + * | first name | last name | profile avatar | + * ------------------------------------------- + * | Viraj | Rane | pacman | + * ------------------------------------------- + * | Json | Jose | ninja | + * ------------------------------------------- + * + * will get parsed to + * + * { + * [ + * { + * "first name": "Viraj", + * "last name": "Rane", + * "profile avatar": "pacman" + * }, + * { + * "first name": "Json", + * "last name": "Jose", + * "profile avatar": "ninja" + * } + * ] + * } + */ +@Component( + service = Servlet.class, + property = + { + Constants.SERVICE_DESCRIPTION + "=Excel to JSON Converter Servlet", + "sling.servlet.methods=" + HttpConstants.METHOD_POST, + "sling.servlet.paths=" + "/apps/get/json/from/xls" + } +) public class ExcelToJSONServlet extends SlingAllMethodsServlet { - protected static final String XLSX = "xlsx"; - protected static final String XLS = "xls"; - protected static final String DEFAULT_FILE_EXTENSION = "json"; - protected static final String DEFAULT_FILE_NAME = "renderedJSON." + DEFAULT_FILE_EXTENSION; - - private static final long serialVersionUID = 1L; - - private static final Logger LOGGER = LoggerFactory.getLogger(ExcelToJSONServlet.class); - - // Inject a Sling ResourceResolverFactory - @Reference - private ResourceResolverFactory resolverFactory; - - @Override - protected void doPost(SlingHttpServletRequest request, SlingHttpServletResponse response) - throws ServletException, IOException { - // TODO Auto-generated method stub - try { - final boolean isMultipart = ServletFileUpload.isMultipartContent(request); - - if (isMultipart) { - - final Map params = request.getRequestParameterMap(); - - Iterator> parmsIterator = params.entrySet().iterator(); - - Map.Entry pairs = parmsIterator.next(); - RequestParameter[] parameterArray = pairs.getValue(); - RequestParameter param = parameterArray[0]; - - final InputStream stream = param.getInputStream(); - - String mimeType = FilenameUtils.getExtension(param.getFileName()); - Workbook workbook = null; - - if (mimeType.equals(XLSX)) { - workbook = new XSSFWorkbook(stream); - } - - if (mimeType.equals(XLS)) { - workbook = new HSSFWorkbook(stream); - } - - // The InputStream represents the excel file - ArrayList> excelData = readExcel(workbook); - - stream.close(); - workbook.close(); - - InputStream jsonStream = null; - if (excelData != null) { - JSONArray parsedJSON = new JSONArray(excelData); - jsonStream = new ByteArrayInputStream(parsedJSON.toString().getBytes()); - } - - pairs = parmsIterator.next(); - parameterArray = pairs.getValue(); - param = parameterArray[0]; - - String destPath = param.getString(); - - storeFileinJCR(destPath, DEFAULT_FILE_NAME, DEFAULT_FILE_EXTENSION, jsonStream); - jsonStream.close(); - - response.setStatus(200); - response.getWriter().print("Successfully parsed the file!"); - - } - } - - catch (Exception e) { - e.printStackTrace(); - response.setStatus(403); - response.getWriter().print("Please check the inputs!"); - } - } - - // Read Excel File - private ArrayList> readExcel(Workbook workbook) throws IOException { - - ArrayList> resultList = new ArrayList>(); - String[] tableHeader = getTableHeader(workbook); - - if (tableHeader != null) { - Sheet sheet = workbook.getSheetAt(0); - Iterator rowIterator = sheet.iterator(); - while (rowIterator.hasNext()) { - Row row = rowIterator.next(); - - if (row.getRowNum() == 0) { - continue; - } - - HashMap rowData = new HashMap(); - Iterator cellIterator = row.cellIterator(); - - while (cellIterator.hasNext()) { - Cell cell = cellIterator.next(); - rowData.put(tableHeader[cell.getColumnIndex()], cell.toString()); - } - - resultList.add(rowData); - } - } - - return resultList; - - } - - // Get table's header / first row - private String[] getTableHeader(Workbook workbook) throws IOException { - - Sheet sheet = workbook.getSheetAt(0); - Iterator rowIterator = sheet.iterator(); - - Row row = rowIterator.next(); - Iterator cellIterator = row.cellIterator(); - String[] tableHeader = new String[row.getLastCellNum()]; - - int index = 0; - while (cellIterator.hasNext()) { - Cell cell = cellIterator.next(); - tableHeader[index] = cell.toString(); - index++; - } - - return tableHeader; - - } - - // Store file in JCR - private void storeFileinJCR(String destPath, String fileName, String mimetype, InputStream jsonStream) - throws LoginException, PathNotFoundException, RepositoryException { - Map serviceNameParam = new HashMap(); - serviceNameParam.put(ResourceResolverFactory.SUBSERVICE, "excel-to-json"); - ResourceResolver resolver = resolverFactory.getServiceResourceResolver(serviceNameParam); - Session session = resolver.adaptTo(Session.class); - - Node node = session.getNode(destPath); - NodeIterator childNodes = node.getNodes(fileName); - - if (childNodes.hasNext()) { - childNodes.nextNode().remove(); - } - - javax.jcr.ValueFactory valueFactory = session.getValueFactory(); - javax.jcr.Binary contentValue = valueFactory.createBinary(jsonStream); - Node fileNode = node.addNode(fileName, "nt:file"); - fileNode.addMixin("mix:referenceable"); - Node resNode = fileNode.addNode("jcr:content", "nt:resource"); - resNode.setProperty("jcr:mimeType", mimetype); - resNode.setProperty("jcr:data", contentValue); - Calendar lastModified = Calendar.getInstance(); - lastModified.setTimeInMillis(lastModified.getTimeInMillis()); - resNode.setProperty("jcr:lastModified", lastModified); - session.save(); - session.logout(); - - } - -} \ No newline at end of file + private static final String XLSX = "xlsx"; + private static final String XLS = "xls"; + private static final String JSON_MIME_TYPE = "application/json"; + private static final String JSON_FILE_EXTENSION = ".json"; + private static final String SUB_SERVICE_NAME = "excel-to-json"; + + private static final Logger LOGGER = LoggerFactory.getLogger(ExcelToJSONServlet.class); + + @Reference + protected ResourceResolverFactory resolverFactory; + + @Override + protected void doPost(SlingHttpServletRequest request, SlingHttpServletResponse response) + throws IOException { + + final boolean isMultipart = ServletFileUpload.isMultipartContent(request); + final Map params = request.getRequestParameterMap(); + Iterator> paramsIterator = params.entrySet().iterator(); + + Map.Entry pairs = paramsIterator.next(); + RequestParameter[] parameterArray = pairs.getValue(); + + //Uploaded file + RequestParameter file = parameterArray[0]; + + //File's mime type + String mimeType = FilenameUtils.getExtension(file.getFileName()); + + pairs = paramsIterator.next(); + parameterArray = pairs.getValue(); + RequestParameter path = parameterArray[0]; + + //JCR path to store the parsed JSON as an asset + String destinationPath = path.getString(); + + if (isMultipart + && StringUtils.isNotEmpty(mimeType) + && mimeType.contains(XLS) + && StringUtils.isNotBlank(destinationPath) + ) { + Workbook workbook = null; + InputStream jsonStream = null; + try (final InputStream stream = file.getInputStream()) { + + if (Objects.nonNull(stream)) { + + if (mimeType.equals(XLSX)) { + workbook = new XSSFWorkbook(stream); + } else if (mimeType.equals(XLS)) { + workbook = new HSSFWorkbook(stream); + } else { + LOGGER.error("{}: Unsupported file type!", mimeType); + sendStatus( + response, + SlingHttpServletResponse.SC_BAD_REQUEST, + mimeType.concat(" Unsupported file type!") + ); + return; + } + + //Get list of map objects + ArrayList> excelData = readExcel(workbook); + + //Parse list of map objects to JSON array + JSONArray parsedJSON = new JSONArray(excelData); + + /* + Convert JSON array to stream + ** Keep encoding as UTF-8 ** + */ + jsonStream = new ByteArrayInputStream(parsedJSON.toString().getBytes(StandardCharsets.UTF_8)); + + //store stream in JCR as AEM asset + Asset fileInJCR = + storeFileInJCR( + destinationPath, + FilenameUtils.getBaseName(file.getFileName()).concat(JSON_FILE_EXTENSION), + jsonStream); + + int statusCode = Objects.nonNull(fileInJCR) + ? SlingHttpServletResponse.SC_OK + : SlingHttpServletResponse.SC_INTERNAL_SERVER_ERROR; + String message = Objects.nonNull(fileInJCR) + ? "Successfully parsed the file!" + : "Error occurred while saving the file! Check system user permissions"; + sendStatus(response, statusCode, message); + + } else { + sendStatus( + response, + SlingHttpServletResponse.SC_BAD_REQUEST, + "Empty file!" + ); + } + } catch (LoginException e) { + LOGGER.error("Error Occurred: {}", e.getMessage()); + sendStatus( + response, + SlingHttpServletResponse.SC_INTERNAL_SERVER_ERROR, + "Error occurred while saving the file!" + ); + } finally { + if (Objects.nonNull(workbook) && Objects.nonNull(jsonStream)) { + workbook.close(); + jsonStream.close(); + } + } + } else { + sendStatus( + response, + SlingHttpServletResponse.SC_BAD_REQUEST, + "Unsupported request payload!" + ); + } + } + + private void sendStatus(SlingHttpServletResponse response, int status, String message) throws IOException { + response.setCharacterEncoding(StandardCharsets.UTF_8.displayName()); + response.setContentType("text/plain"); + response.setStatus(status); + response.getWriter().print(message); + } + + // Read Excel File + private ArrayList> readExcel(Workbook workbook) { + ArrayList> resultList = new ArrayList<>(); + String[] tableHeader = getTableHeader(workbook); + Sheet sheet = workbook.getSheetAt(0); + for (Row row : sheet) { + if (row.getRowNum() == 0) { + continue; + } + HashMap rowData = new HashMap<>(); + Iterator cellIterator = row.cellIterator(); + while (cellIterator.hasNext()) { + Cell cell = cellIterator.next(); + rowData.put(tableHeader[cell.getColumnIndex()], cell.getRichStringCellValue().getString()); + } + if (!rowData.isEmpty()) { + resultList.add(rowData); + } + } + return resultList; + } + + // Get table's header / first row + private String[] getTableHeader(Workbook workbook) { + Sheet sheet = workbook.getSheetAt(0); + Iterator rowIterator = sheet.iterator(); + Row row = rowIterator.next(); + Iterator cellIterator = row.cellIterator(); + String[] tableHeader = new String[row.getLastCellNum()]; + for (int index = 0; cellIterator.hasNext(); index++) { + Cell cell = cellIterator.next(); + tableHeader[index] = cell.getRichStringCellValue().getString(); + } + return tableHeader; + } + + // Store file in JCR + private Asset storeFileInJCR(String destinationPath, String fileName, InputStream jsonStream) + throws LoginException { + Map serviceNameParam = new HashMap<>(); + serviceNameParam.put(ResourceResolverFactory.SUBSERVICE, SUB_SERVICE_NAME); + ResourceResolver resolver = resolverFactory.getServiceResourceResolver(serviceNameParam); + AssetManager assetManager = resolver.adaptTo(AssetManager.class); + if (Objects.nonNull(assetManager)) { + return assetManager.createAsset( + destinationPath.concat("/".concat(fileName)), + jsonStream, + JSON_MIME_TYPE, + true); + } else { + return null; + } + } +} diff --git a/ui.apps/src/main/content/META-INF/vault/filter.xml b/ui.apps/src/main/content/META-INF/vault/filter.xml index 054969e..ecc05e9 100644 --- a/ui.apps/src/main/content/META-INF/vault/filter.xml +++ b/ui.apps/src/main/content/META-INF/vault/filter.xml @@ -2,6 +2,6 @@ - + diff --git a/ui.apps/src/main/content/jcr_root/content/.content.xml b/ui.apps/src/main/content/jcr_root/content/.content.xml deleted file mode 100644 index ffb59b9..0000000 --- a/ui.apps/src/main/content/jcr_root/content/.content.xml +++ /dev/null @@ -1,25 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - diff --git a/ui.apps/src/main/content/jcr_root/content/dam/json/.content.xml b/ui.apps/src/main/content/jcr_root/content/dam/json/.content.xml new file mode 100644 index 0000000..f945d0b --- /dev/null +++ b/ui.apps/src/main/content/jcr_root/content/dam/json/.content.xml @@ -0,0 +1,8 @@ + + + + diff --git a/ui.apps/src/main/content/jcr_root/content/json/_rep_policy.xml b/ui.apps/src/main/content/jcr_root/content/dam/json/_rep_policy.xml similarity index 100% rename from ui.apps/src/main/content/jcr_root/content/json/_rep_policy.xml rename to ui.apps/src/main/content/jcr_root/content/dam/json/_rep_policy.xml diff --git a/ui.apps/src/main/content/jcr_root/content/json/.content.xml b/ui.apps/src/main/content/jcr_root/content/json/.content.xml deleted file mode 100644 index 792d2ea..0000000 --- a/ui.apps/src/main/content/jcr_root/content/json/.content.xml +++ /dev/null @@ -1,4 +0,0 @@ - -