Skip to content

Commit 291c94d

Browse files
authored
Merge pull request #1 from cnsgithub/kml-encoding-20
resolves OWASP#20 - HTML encoding for KML
2 parents 81b9fdc + d1c1bed commit 291c94d

File tree

6 files changed

+224
-5
lines changed

6 files changed

+224
-5
lines changed

core/src/main/java/org/owasp/encoder/Encode.java

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -861,6 +861,30 @@ public static void forXmlComment(Writer out, String input)
861861
encode(Encoders.XML_COMMENT_ENCODER, out, input);
862862
}
863863

864+
/**
865+
* Encoder for KML.
866+
*
867+
* @param input the input to encode
868+
* @return the encoded result
869+
*/
870+
public static String forKml(String input) {
871+
return encode(Encoders.KML_ENCODER, input);
872+
}
873+
874+
/**
875+
* See {@link #forKml(String)} for description of encoding. This
876+
* version writes directly to a Writer without an intervening string.
877+
*
878+
* @param out where to write encoded output
879+
* @param input the input string to encode
880+
* @throws IOException if thrown by writer
881+
*/
882+
public static void forKml(Writer out, String input)
883+
throws IOException
884+
{
885+
encode(Encoders.KML_ENCODER, out, input);
886+
}
887+
864888
/**
865889
* Encodes data for an XML CDATA section. On the chance that the input
866890
* contains a terminating {@code "]]>"}, it will be replaced by

core/src/main/java/org/owasp/encoder/Encoders.java

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,10 @@ public final class Encoders {
8888
* Name of {@linkplain Encode#forXmlComment(String) XML comment} context.
8989
*/
9090
public static final String XML_COMMENT = "xml-comment";
91+
/**
92+
* Name of {@linkplain Encode#forKml(String) KML} context.
93+
*/
94+
public static final String KML = "kml";
9195
/**
9296
* Name of {@linkplain Encode#forCDATA(String) CDATA} context.
9397
*/
@@ -160,6 +164,11 @@ public final class Encoders {
160164
*/
161165
static final XMLCommentEncoder XML_COMMENT_ENCODER
162166
= map(XML_COMMENT, new XMLCommentEncoder());
167+
/**
168+
* Encoder for KML contexts.
169+
*/
170+
static final KMLEncoder KML_ENCODER
171+
= map(KML, new KMLEncoder());
163172
/**
164173
* Encoder for CDATA contexts.
165174
*/
Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
// Copyright (c) 2012 Jeff Ichnowski
2+
// All rights reserved.
3+
//
4+
// Redistribution and use in source and binary forms, with or without
5+
// modification, are permitted provided that the following conditions
6+
// are met:
7+
//
8+
// * Redistributions of source code must retain the above
9+
// copyright notice, this list of conditions and the following
10+
// disclaimer.
11+
//
12+
// * Redistributions in binary form must reproduce the above
13+
// copyright notice, this list of conditions and the following
14+
// disclaimer in the documentation and/or other materials
15+
// provided with the distribution.
16+
//
17+
// * Neither the name of the OWASP nor the names of its
18+
// contributors may be used to endorse or promote products
19+
// derived from this software without specific prior written
20+
// permission.
21+
//
22+
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
23+
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
24+
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
25+
// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
26+
// COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
27+
// INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
28+
// (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
29+
// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30+
// HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
31+
// STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
32+
// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
33+
// OF THE POSSIBILITY OF SUCH DAMAGE.
34+
package org.owasp.encoder;
35+
36+
import java.nio.CharBuffer;
37+
import java.nio.charset.CoderResult;
38+
39+
/**
40+
* KMLEncoder -- Special case of XML encoding using numeric character entities (e.g. < instead of entity references (e.g. <).
41+
* This encoder should be used instead of {@link XMLEncoder} to address some shortcomings in the KML specification and the way Google Earth (at least the desktop version) interprets HTML.
42+
*
43+
* @see <a href="http://kml4earth.appspot.com/kmlErrata.html?#encoding">KML Reference Errata</a>
44+
* @see <a href="https://github.com/OWASP/owasp-java-encoder/issues/20">OWASP Issue</a>
45+
*
46+
* @author cnsgithub
47+
*/
48+
class KMLEncoder extends XMLEncoder {
49+
50+
@Override
51+
protected CoderResult encodeArrays(CharBuffer input, CharBuffer output, boolean endOfInput) {
52+
return super.encodeArrays(input, output, endOfInput, true);
53+
}
54+
55+
}

core/src/main/java/org/owasp/encoder/XMLEncoder.java

Lines changed: 41 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -75,15 +75,15 @@ class XMLEncoder extends Encoder {
7575
/**
7676
* The encoded length of an ampersand.
7777
*/
78-
static final int AMP_LENGTH = 5;
78+
static final int AMP_LENGTH = 5, AMP_NUMERIC_LENGTH = 5;
7979
/**
8080
* The encoded length of a less-than sign.
8181
*/
82-
static final int LT_LENGTH = 4;
82+
static final int LT_LENGTH = 4, LT_NUMERIC_LENGTH = 5;
8383
/**
8484
* The encoded length of a greater-than sign.
8585
*/
86-
static final int GT_LENGTH = 4;
86+
static final int GT_LENGTH = 4, GT_NUMERIC_LENGTH = 5;
8787
/**
8888
* The encoded length of an apostrophe.
8989
*/
@@ -245,6 +245,10 @@ public int firstEncodedOffset(String input, int off, int len) {
245245
* {@inheritDoc}
246246
*/
247247
protected CoderResult encodeArrays(CharBuffer input, CharBuffer output, boolean endOfInput) {
248+
return encodeArrays(input, output, endOfInput, false);
249+
}
250+
251+
protected CoderResult encodeArrays(CharBuffer input, CharBuffer output, boolean endOfInput, boolean avoidEntityReferences) {
248252
final char[] in = input.array();
249253
final char[] out = output.array();
250254
int i = input.arrayOffset() + input.position();
@@ -264,6 +268,17 @@ protected CoderResult encodeArrays(CharBuffer input, CharBuffer output, boolean
264268
} else {
265269
switch (ch) {
266270
case '&':
271+
if (avoidEntityReferences) {
272+
if (j + AMP_NUMERIC_LENGTH > m) {
273+
return overflow(input, i, output, j);
274+
}
275+
out[j++] = '&';
276+
out[j++] = '#';
277+
out[j++] = '3';
278+
out[j++] = '8';
279+
out[j++] = ';';
280+
break;
281+
}
267282
if (j + AMP_LENGTH > m) {
268283
return overflow(input, i, output, j);
269284
}
@@ -274,6 +289,17 @@ protected CoderResult encodeArrays(CharBuffer input, CharBuffer output, boolean
274289
out[j++] = ';';
275290
break;
276291
case '<':
292+
if (avoidEntityReferences) {
293+
if (j + LT_NUMERIC_LENGTH > m) {
294+
return overflow(input, i, output, j);
295+
}
296+
out[j++] = '&';
297+
out[j++] = '#';
298+
out[j++] = '6';
299+
out[j++] = '0';
300+
out[j++] = ';';
301+
break;
302+
}
277303
if (j + LT_LENGTH > m) {
278304
return overflow(input, i, output, j);
279305
}
@@ -283,6 +309,18 @@ protected CoderResult encodeArrays(CharBuffer input, CharBuffer output, boolean
283309
out[j++] = ';';
284310
break;
285311
case '>':
312+
if (avoidEntityReferences) {
313+
if (j + GT_NUMERIC_LENGTH > m) {
314+
return overflow(input, i, output, j);
315+
}
316+
out[j++] = '&';
317+
out[j++] = '#';
318+
out[j++] = '6';
319+
out[j++] = '2';
320+
out[j++] = ';';
321+
break;
322+
323+
}
286324
if (j + GT_LENGTH > m) {
287325
return overflow(input, i, output, j);
288326
}
Lines changed: 93 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,93 @@
1+
// Copyright (c) 2012 Jeff Ichnowski
2+
// All rights reserved.
3+
//
4+
// Redistribution and use in source and binary forms, with or without
5+
// modification, are permitted provided that the following conditions
6+
// are met:
7+
//
8+
// * Redistributions of source code must retain the above
9+
// copyright notice, this list of conditions and the following
10+
// disclaimer.
11+
//
12+
// * Redistributions in binary form must reproduce the above
13+
// copyright notice, this list of conditions and the following
14+
// disclaimer in the documentation and/or other materials
15+
// provided with the distribution.
16+
//
17+
// * Neither the name of the OWASP nor the names of its
18+
// contributors may be used to endorse or promote products
19+
// derived from this software without specific prior written
20+
// permission.
21+
//
22+
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
23+
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
24+
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
25+
// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
26+
// COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
27+
// INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
28+
// (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
29+
// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30+
// HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
31+
// STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
32+
// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
33+
// OF THE POSSIBILITY OF SUCH DAMAGE.
34+
35+
package org.owasp.encoder;
36+
37+
import junit.framework.Test;
38+
import junit.framework.TestCase;
39+
import junit.framework.TestSuite;
40+
41+
/**
42+
* KMLEncoderTest -- test suite for the KMLEncoder.
43+
*
44+
* @author cnsgithub
45+
*/
46+
public class KMLEncoderTest extends TestCase {
47+
48+
public static Test suite() {
49+
TestSuite suite = new TestSuite();
50+
EncoderTestSuiteBuilder builder = new EncoderTestSuiteBuilder(new KMLEncoder(), "-safe-", "-&-")
51+
.encode("&#60;strike&#62;foo &#38; bar&#60;/strike&#62;", "<strike>foo & bar</strike>")
52+
.encode("invalid-control-characters", " b ", "\0b\26")
53+
.encode("valid-surrogate-pair", "\ud800\udc00", "\ud800\udc00")
54+
.encode("missing-low-surrogate", " ", "\ud800")
55+
.encode("missing-high-surrogate", " ", "\udc00")
56+
.encode("valid-upper-char", "\ufffd", "\ufffd")
57+
.encode("invalid-upper-char", " ", "\uffff")
58+
.invalid(0, 0x1f)
59+
.valid("\t\r\n")
60+
.valid(' ', Character.MAX_CODE_POINT)
61+
.invalid(0x7f, 0x9f)
62+
.valid("\u0085")
63+
.invalid(Character.MIN_SURROGATE, Character.MAX_SURROGATE)
64+
.invalid(0xfdd0, 0xfdef)
65+
.invalid(0xfffe, 0xffff)
66+
.invalid(0x1fffe, 0x1ffff)
67+
.invalid(0x2fffe, 0x2ffff)
68+
.invalid(0x3fffe, 0x3ffff)
69+
.invalid(0x4fffe, 0x4ffff)
70+
.invalid(0x5fffe, 0x5ffff)
71+
.invalid(0x6fffe, 0x6ffff)
72+
.invalid(0x7fffe, 0x7ffff)
73+
.invalid(0x8fffe, 0x8ffff)
74+
.invalid(0x9fffe, 0x9ffff)
75+
.invalid(0xafffe, 0xaffff)
76+
.invalid(0xbfffe, 0xbffff)
77+
.invalid(0xcfffe, 0xcffff)
78+
.invalid(0xdfffe, 0xdffff)
79+
.invalid(0xefffe, 0xeffff)
80+
.invalid(0xffffe, 0xfffff)
81+
.invalid(0x10fffe, 0x10ffff);
82+
83+
builder.encoded("&><\'\"")
84+
.encode("&#39;", "\'")
85+
.encode("&#34;", "\"")
86+
.encode("safe", "safe");
87+
88+
suite.addTest(builder.validSuite().invalidSuite(XMLEncoder.INVALID_CHARACTER_REPLACEMENT).encodedSuite().build());
89+
return suite;
90+
}
91+
92+
}
93+

pom.xml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -261,8 +261,8 @@
261261
<groupId>org.apache.maven.plugins</groupId>
262262
<artifactId>maven-compiler-plugin</artifactId>
263263
<configuration>
264-
<source>1.5</source>
265-
<target>1.5</target>
264+
<source>1.6</source>
265+
<target>1.6</target>
266266
</configuration>
267267
</plugin>
268268
<plugin>

0 commit comments

Comments
 (0)