From d51df3408b9ce5f8d0f73d9038b46834e32f3c29 Mon Sep 17 00:00:00 2001 From: Gleb Gadyatskiy Date: Thu, 1 Jun 2017 14:59:41 -0400 Subject: [PATCH] Added support for UTF-8 characters less 32 or above 126 codes (out of ASCII range) --- src/main/java/com/jeldoclet/XMLNode.java | 99 +++++++++++++++++++----- 1 file changed, 79 insertions(+), 20 deletions(-) diff --git a/src/main/java/com/jeldoclet/XMLNode.java b/src/main/java/com/jeldoclet/XMLNode.java index 491169d..6c6a56b 100644 --- a/src/main/java/com/jeldoclet/XMLNode.java +++ b/src/main/java/com/jeldoclet/XMLNode.java @@ -6,7 +6,6 @@ import java.util.HashMap; import java.util.Iterator; import java.util.Vector; -import java.util.regex.Pattern; /** * Represents an XML node @@ -178,7 +177,7 @@ public void save(String dir, String fileName, boolean includeNamespace, String o */ public String toString(String tabs) { - StringBuffer out = new StringBuffer(); + StringBuilder out = new StringBuilder(); out.append( tabs + "<" + _namespacePrefix + _type ); Iterator attrIterator = _attributes.keySet().iterator(); @@ -218,28 +217,88 @@ public String toString(String tabs) return out.toString(); } - /** - * Encodes strings as XML. Check for <, >, ', ", &. +// /** +// * Encodes strings as XML. Check for <, >, ', ", &. +// * +// * @param in The input string +// * @return The encoded string. +// */ +// static protected String encode( String in ) +// { +// Pattern ampPat = Pattern.compile( "&" ); +// Pattern ltPat = Pattern.compile( "<" ); +// Pattern gtPat = Pattern.compile( ">" ); +// Pattern aposPat = Pattern.compile( "\'" ); +// Pattern quotPat = Pattern.compile( "\"" ); +// +// String out = new String( in ); +// +// out = (ampPat.matcher(out)).replaceAll("&"); +// out = (ltPat.matcher(out)).replaceAll("<"); +// out = (gtPat.matcher(out)).replaceAll(">"); +// out = (aposPat.matcher(out)).replaceAll("'"); +// out = (quotPat.matcher(out)).replaceAll("""); +// +// return out; +// } + + /** + * Returns the string where all non-ascii and <, &, > are encoded as numeric entities. I.e. "<A & B >" + * .... (insert result here). The result is safe to include anywhere in a text field in an XML-string. If there was + * no characters to protect, the original string is returned. * - * @param in The input string - * @return The encoded string. + * @param originalUnprotectedString + * original string which may contain characters either reserved in XML or with different representation + * in different encodings (like 8859-1 and UFT-8) + * @see https://stackoverflow.com/questions/439298/best-way-to-encode-text-data-for-xml-in-java + * @return */ - static protected String encode( String in ) - { - Pattern ampPat = Pattern.compile( "&" ); - Pattern ltPat = Pattern.compile( "<" ); - Pattern gtPat = Pattern.compile( ">" ); - Pattern aposPat = Pattern.compile( "\'" ); - Pattern quotPat = Pattern.compile( "\"" ); + static String encode(String originalUnprotectedString) { + if (originalUnprotectedString == null) { + return null; + } + boolean anyCharactersProtected = false; - String out = new String( in ); + StringBuilder stringBuffer = new StringBuilder(originalUnprotectedString.length()); + for (int i = 0; i < originalUnprotectedString.length(); i++) { + char ch = originalUnprotectedString.charAt(i); - out = (ampPat.matcher(out)).replaceAll("&"); - out = (ltPat.matcher(out)).replaceAll("<"); - out = (gtPat.matcher(out)).replaceAll(">"); - out = (aposPat.matcher(out)).replaceAll("'"); - out = (quotPat.matcher(out)).replaceAll("""); + if (ch<32 || ch>126) { + // control characters or unicode but not Ascii + stringBuffer.append("&#" + (int) ch + ";"); + anyCharactersProtected = true; + } else + switch (ch) + { + case '<': + stringBuffer.append("<"); + anyCharactersProtected = true; + break; + case '>': + stringBuffer.append(">"); + anyCharactersProtected = true; + break; + case '&': + stringBuffer.append("&"); + anyCharactersProtected = true; + break; + case '\'': + stringBuffer.append("'"); + anyCharactersProtected = true; + break; + case '"': + stringBuffer.append("""); + anyCharactersProtected = true; + break; + default: + stringBuffer.append(ch); + anyCharactersProtected = true; + } + } + if (anyCharactersProtected == false) { + return originalUnprotectedString; + } - return out; + return stringBuffer.toString(); } }