logo       

[HtmlUnit] CVS Commit: src/xdocs: Encode path part of urls containing ille: msg#00024

java.htmlunit.devel

Subject: [HtmlUnit] CVS Commit: src/xdocs: Encode path part of urls containing illegal

Log Message:
-----------
Encode path part of urls containing illegal characters (fix for bug 1437068)

Modified Files:
--------------
htmlunit/src/test/java/com/gargoylesoftware/htmlunit:
WebClientTest.java

(http://cvs.sourceforge.net/viewcvs.py/htmlunit/htmlunit/src/test/java/com/gargoylesoftware/htmlunit/WebClientTest.java)
htmlunit/src/java/com/gargoylesoftware/htmlunit:
WebClient.java

(http://cvs.sourceforge.net/viewcvs.py/htmlunit/htmlunit/src/java/com/gargoylesoftware/htmlunit/WebClient.java)
htmlunit/src/xdocs:
changes.xml

(http://cvs.sourceforge.net/viewcvs.py/htmlunit/htmlunit/src/xdocs/changes.xml)

Revision Data
-------------
Index: WebClient.java
===================================================================
RCS file:
/cvsroot/htmlunit/htmlunit/src/java/com/gargoylesoftware/htmlunit/WebClient.java,v
retrieving revision 1.118
retrieving revision 1.119
diff -Lsrc/java/com/gargoylesoftware/htmlunit/WebClient.java
-Lsrc/java/com/gargoylesoftware/htmlunit/WebClient.java -u -d -r1.118 -r1.119
--- src/java/com/gargoylesoftware/htmlunit/WebClient.java
+++ src/java/com/gargoylesoftware/htmlunit/WebClient.java
@@ -1392,49 +1392,66 @@
}

/**
- * Encodes illegal parameter in query string (if any) as done by browsers.
- * Example: change "http://first?a=b c" to "http://first?a=b%20c";
+ * Encodes illegal parameter in path or query string (if any) as done by
browsers.
+ * Example: changes "http://first?a=b c" to "http://first?a=b%20c";
* @param url the url to encode
* @return the provided url if no change needed, the fixed url else
* @throws MalformedURLException if the new URL could note be instantiated
* @throws URIException if the default protocol charset is not supported
*/
protected URL encodeUrl(final URL url) throws MalformedURLException,
URIException {
- // just look at urls with query string (better test?)
- final String str = url.toExternalForm();
- final int queryStart = url.toExternalForm().indexOf('?');
- if (queryStart != -1) {
- // extract query string: browsers seem not to encode everything,
for instance not "#"
- final String query;
- final int anchorStart = str.indexOf('#');
- if (anchorStart < queryStart) {
- query = str.substring(queryStart);
+ final String path = url.getPath();
+ final String fixedPath = encode(path, URI.allowed_abs_path);
+ final String query = url.getQuery();
+ final String fixedQuery = encode(query, URI.allowed_query);
+
+ if (!StringUtils.equals(path, fixedPath) || !StringUtils.equals(query,
fixedQuery)) {
+ final StringBuffer newUrl = new StringBuffer();
+ newUrl.append(url.getProtocol());
+ newUrl.append("://");
+ newUrl.append(url.getHost());
+ if (url.getPort() != -1) {
+ newUrl.append(":");
+ newUrl.append(url.getPort());
}
- else {
- query = str.substring(queryStart, anchorStart);
+ newUrl.append(fixedPath);
+ if (url.getUserInfo() != null) {
+ newUrl.append(url.getUserInfo());
}
-
- // url may be partially encoded like "http://first?a=b%20c&d=e f"
-// // don't re-encode the %'s from already encoded items
- final BitSet partiallyEncodedQuery = new BitSet(256);
- partiallyEncodedQuery.set('%');
- partiallyEncodedQuery.or(URI.allowed_query);
- final String fixedQuery = URIUtil.encode(query,
partiallyEncodedQuery);
- if (query.equals(fixedQuery)) {
- return url;
+ if (fixedQuery != null) {
+ newUrl.append("?");
+ newUrl.append(fixedQuery);
}
- else {
- final StringBuffer newUrl = new StringBuffer(str);
- newUrl.replace(queryStart, queryStart + query.length(),
fixedQuery);
- return new URL(newUrl.toString());
+ if (url.getRef() != null) {
+ newUrl.append("#");
+ newUrl.append(url.getRef());
}
+
+ return new URL(newUrl.toString());
}
else {
return url;
}
+ }

+ /**
+ * Encodes unallowed characters in a string
+ * @param str the string to encode
+ * @param allowed the allowed characters
+ * @return the encoded string
+ * @throws URIException if encoding fails
+ */
+ private String encode(final String str, final BitSet allowed) throws
URIException {
+ if (str == null) {
+ return null;
+ }
+ final BitSet bits = new BitSet(str.length());
+ bits.set('%');
+ bits.or(allowed);
+ return URIUtil.encode(str, bits);
}

+
/**
* Remove the focus to the specified component. This will trigger any
relevant javascript
* event handlers.
Index: WebClientTest.java
===================================================================
RCS file:
/cvsroot/htmlunit/htmlunit/src/test/java/com/gargoylesoftware/htmlunit/WebClientTest.java,v
retrieving revision 1.67
retrieving revision 1.68
diff -Lsrc/test/java/com/gargoylesoftware/htmlunit/WebClientTest.java
-Lsrc/test/java/com/gargoylesoftware/htmlunit/WebClientTest.java -u -d -r1.67
-r1.68
--- src/test/java/com/gargoylesoftware/htmlunit/WebClientTest.java
+++ src/test/java/com/gargoylesoftware/htmlunit/WebClientTest.java
@@ -626,10 +626,10 @@
}

/**
- * Test that the query string is encoded to be valid.
+ * Test that the path and query string are encoded to be valid.
* @throws Exception If something goes wrong.
*/
- public void testLoadPage_EncodeQueryString() throws Exception {
+ public void testLoadPage_EncodeRequest() throws Exception {
final String htmlContent
= "<html><head><title>foo</title></head><body>"
+ "</body></html>";
@@ -642,34 +642,31 @@

// with query string not encoded
HtmlPage page = (HtmlPage) client.getPage(new URL("http://first?a=b
c&d=" + ((char) 0xE9) + ((char) 0xE8)));
- assertEquals(
- "http://first?a=b%20c&d=%C3%A9%C3%A8";,
- page.getWebResponse().getUrl());
-
+ assertEquals("http://first?a=b%20c&d=%C3%A9%C3%A8";,
page.getWebResponse().getUrl());

// with query string already encoded
page = (HtmlPage) client.getPage(new
URL("http://first?a=b%20c&d=%C3%A9%C3%A8";));
- assertEquals(
- "http://first?a=b%20c&d=%C3%A9%C3%A8";,
- page.getWebResponse().getUrl().toExternalForm());
+ assertEquals("http://first?a=b%20c&d=%C3%A9%C3%A8";,
page.getWebResponse().getUrl());

// with query string partially encoded
page = (HtmlPage) client.getPage(new URL("http://first?a=b%20c&d=e
f"));
- assertEquals(
- "http://first?a=b%20c&d=e%20f";,
- page.getWebResponse().getUrl().toExternalForm());
+ assertEquals("http://first?a=b%20c&d=e%20f";,
page.getWebResponse().getUrl());

// with anchor
page = (HtmlPage) client.getPage(new URL("http://first?a=b
c#myAnchor"));
- assertEquals(
- "http://first?a=b%20c#myAnchor";,
- page.getWebResponse().getUrl().toExternalForm());
+ assertEquals("http://first?a=b%20c#myAnchor";,
page.getWebResponse().getUrl());

// with query string containing encoded "&", "=", "+", ",", and "$"
page = (HtmlPage) client.getPage(new
URL("http://first?a=%26%3D%20%2C%24";));
- assertEquals(
- "http://first?a=%26%3D%20%2C%24";,
- page.getWebResponse().getUrl().toExternalForm());
+ assertEquals("http://first?a=%26%3D%20%2C%24";,
page.getWebResponse().getUrl());
+
+ // with character to encode in path
+ page = (HtmlPage) client.getPage(new URL("http://first/page 1.html"));
+ assertEquals("http://first/page%201.html";,
page.getWebResponse().getUrl());
+
+ // with character to encode in path
+ page = (HtmlPage) client.getPage(new URL("http://first/page 1.html"));
+ assertEquals("http://first/page%201.html";,
page.getWebResponse().getUrl());
}

/**
Index: changes.xml
===================================================================
RCS file: /cvsroot/htmlunit/htmlunit/src/xdocs/changes.xml,v
retrieving revision 1.492
retrieving revision 1.493
diff -Lsrc/xdocs/changes.xml -Lsrc/xdocs/changes.xml -u -d -r1.492 -r1.493
--- src/xdocs/changes.xml
+++ src/xdocs/changes.xml
@@ -7,6 +7,9 @@

<body>
<release version="next">
+ <action type="update" dev="mguillem" id="1437068">
+ Encode path part of urls containing illegal
characters.
+ </action>
<action type="update" dev="mguillem" id="1436102" due-to="Brad
Murray">
Added support for firing onkeydown events.
</action>


-------------------------------------------------------
This SF.Net email is sponsored by xPML, a groundbreaking scripting language
that extends applications into web and mobile media. Attend the live webcast
and join the prime developer group breaking into this new coding territory!
http://sel.as-us.falkag.net/sel?cmd=lnk&kid=110944&bid=241720&dat=121642


<Prev in Thread] Current Thread [Next in Thread>
Google Custom Search

News | FAQ | advertise