|
[HtmlUnit] CVS Commit: src/xdocs: Encode path part of urls containing ille: msg#00024java.htmlunit.devel
Log Message: ----------- Encode path part of urls containing illegal characters (fix for bug 1437068) Modified Files: -------------- htmlunit/src/test/java/com/gargoylesoftware/htmlunit: WebClientTest.java (http://cvs.sourceforge.net/viewcvs.py/htmlunit/htmlunit/src/test/java/com/gargoylesoftware/htmlunit/WebClientTest.java) htmlunit/src/java/com/gargoylesoftware/htmlunit: WebClient.java (http://cvs.sourceforge.net/viewcvs.py/htmlunit/htmlunit/src/java/com/gargoylesoftware/htmlunit/WebClient.java) htmlunit/src/xdocs: changes.xml (http://cvs.sourceforge.net/viewcvs.py/htmlunit/htmlunit/src/xdocs/changes.xml) Revision Data ------------- Index: WebClient.java =================================================================== RCS file: /cvsroot/htmlunit/htmlunit/src/java/com/gargoylesoftware/htmlunit/WebClient.java,v retrieving revision 1.118 retrieving revision 1.119 diff -Lsrc/java/com/gargoylesoftware/htmlunit/WebClient.java -Lsrc/java/com/gargoylesoftware/htmlunit/WebClient.java -u -d -r1.118 -r1.119 --- src/java/com/gargoylesoftware/htmlunit/WebClient.java +++ src/java/com/gargoylesoftware/htmlunit/WebClient.java @@ -1392,49 +1392,66 @@ } /** - * Encodes illegal parameter in query string (if any) as done by browsers. - * Example: change "http://first?a=b c" to "http://first?a=b%20c" + * Encodes illegal parameter in path or query string (if any) as done by browsers. + * Example: changes "http://first?a=b c" to "http://first?a=b%20c" * @param url the url to encode * @return the provided url if no change needed, the fixed url else * @throws MalformedURLException if the new URL could note be instantiated * @throws URIException if the default protocol charset is not supported */ protected URL encodeUrl(final URL url) throws MalformedURLException, URIException { - // just look at urls with query string (better test?) - final String str = url.toExternalForm(); - final int queryStart = url.toExternalForm().indexOf('?'); - if (queryStart != -1) { - // extract query string: browsers seem not to encode everything, for instance not "#" - final String query; - final int anchorStart = str.indexOf('#'); - if (anchorStart < queryStart) { - query = str.substring(queryStart); + final String path = url.getPath(); + final String fixedPath = encode(path, URI.allowed_abs_path); + final String query = url.getQuery(); + final String fixedQuery = encode(query, URI.allowed_query); + + if (!StringUtils.equals(path, fixedPath) || !StringUtils.equals(query, fixedQuery)) { + final StringBuffer newUrl = new StringBuffer(); + newUrl.append(url.getProtocol()); + newUrl.append("://"); + newUrl.append(url.getHost()); + if (url.getPort() != -1) { + newUrl.append(":"); + newUrl.append(url.getPort()); } - else { - query = str.substring(queryStart, anchorStart); + newUrl.append(fixedPath); + if (url.getUserInfo() != null) { + newUrl.append(url.getUserInfo()); } - - // url may be partially encoded like "http://first?a=b%20c&d=e f" -// // don't re-encode the %'s from already encoded items - final BitSet partiallyEncodedQuery = new BitSet(256); - partiallyEncodedQuery.set('%'); - partiallyEncodedQuery.or(URI.allowed_query); - final String fixedQuery = URIUtil.encode(query, partiallyEncodedQuery); - if (query.equals(fixedQuery)) { - return url; + if (fixedQuery != null) { + newUrl.append("?"); + newUrl.append(fixedQuery); } - else { - final StringBuffer newUrl = new StringBuffer(str); - newUrl.replace(queryStart, queryStart + query.length(), fixedQuery); - return new URL(newUrl.toString()); + if (url.getRef() != null) { + newUrl.append("#"); + newUrl.append(url.getRef()); } + + return new URL(newUrl.toString()); } else { return url; } + } + /** + * Encodes unallowed characters in a string + * @param str the string to encode + * @param allowed the allowed characters + * @return the encoded string + * @throws URIException if encoding fails + */ + private String encode(final String str, final BitSet allowed) throws URIException { + if (str == null) { + return null; + } + final BitSet bits = new BitSet(str.length()); + bits.set('%'); + bits.or(allowed); + return URIUtil.encode(str, bits); } + /** * Remove the focus to the specified component. This will trigger any relevant javascript * event handlers. Index: WebClientTest.java =================================================================== RCS file: /cvsroot/htmlunit/htmlunit/src/test/java/com/gargoylesoftware/htmlunit/WebClientTest.java,v retrieving revision 1.67 retrieving revision 1.68 diff -Lsrc/test/java/com/gargoylesoftware/htmlunit/WebClientTest.java -Lsrc/test/java/com/gargoylesoftware/htmlunit/WebClientTest.java -u -d -r1.67 -r1.68 --- src/test/java/com/gargoylesoftware/htmlunit/WebClientTest.java +++ src/test/java/com/gargoylesoftware/htmlunit/WebClientTest.java @@ -626,10 +626,10 @@ } /** - * Test that the query string is encoded to be valid. + * Test that the path and query string are encoded to be valid. * @throws Exception If something goes wrong. */ - public void testLoadPage_EncodeQueryString() throws Exception { + public void testLoadPage_EncodeRequest() throws Exception { final String htmlContent = "<html><head><title>foo</title></head><body>" + "</body></html>"; @@ -642,34 +642,31 @@ // with query string not encoded HtmlPage page = (HtmlPage) client.getPage(new URL("http://first?a=b c&d=" + ((char) 0xE9) + ((char) 0xE8))); - assertEquals( - "http://first?a=b%20c&d=%C3%A9%C3%A8", - page.getWebResponse().getUrl()); - + assertEquals("http://first?a=b%20c&d=%C3%A9%C3%A8", page.getWebResponse().getUrl()); // with query string already encoded page = (HtmlPage) client.getPage(new URL("http://first?a=b%20c&d=%C3%A9%C3%A8")); - assertEquals( - "http://first?a=b%20c&d=%C3%A9%C3%A8", - page.getWebResponse().getUrl().toExternalForm()); + assertEquals("http://first?a=b%20c&d=%C3%A9%C3%A8", page.getWebResponse().getUrl()); // with query string partially encoded page = (HtmlPage) client.getPage(new URL("http://first?a=b%20c&d=e f")); - assertEquals( - "http://first?a=b%20c&d=e%20f", - page.getWebResponse().getUrl().toExternalForm()); + assertEquals("http://first?a=b%20c&d=e%20f", page.getWebResponse().getUrl()); // with anchor page = (HtmlPage) client.getPage(new URL("http://first?a=b c#myAnchor")); - assertEquals( - "http://first?a=b%20c#myAnchor", - page.getWebResponse().getUrl().toExternalForm()); + assertEquals("http://first?a=b%20c#myAnchor", page.getWebResponse().getUrl()); // with query string containing encoded "&", "=", "+", ",", and "$" page = (HtmlPage) client.getPage(new URL("http://first?a=%26%3D%20%2C%24")); - assertEquals( - "http://first?a=%26%3D%20%2C%24", - page.getWebResponse().getUrl().toExternalForm()); + assertEquals("http://first?a=%26%3D%20%2C%24", page.getWebResponse().getUrl()); + + // with character to encode in path + page = (HtmlPage) client.getPage(new URL("http://first/page 1.html")); + assertEquals("http://first/page%201.html", page.getWebResponse().getUrl()); + + // with character to encode in path + page = (HtmlPage) client.getPage(new URL("http://first/page 1.html")); + assertEquals("http://first/page%201.html", page.getWebResponse().getUrl()); } /** Index: changes.xml =================================================================== RCS file: /cvsroot/htmlunit/htmlunit/src/xdocs/changes.xml,v retrieving revision 1.492 retrieving revision 1.493 diff -Lsrc/xdocs/changes.xml -Lsrc/xdocs/changes.xml -u -d -r1.492 -r1.493 --- src/xdocs/changes.xml +++ src/xdocs/changes.xml @@ -7,6 +7,9 @@ <body> <release version="next"> + <action type="update" dev="mguillem" id="1437068"> + Encode path part of urls containing illegal characters. + </action> <action type="update" dev="mguillem" id="1436102" due-to="Brad Murray"> Added support for firing onkeydown events. </action> ------------------------------------------------------- This SF.Net email is sponsored by xPML, a groundbreaking scripting language that extends applications into web and mobile media. Attend the live webcast and join the prime developer group breaking into this new coding territory! http://sel.as-us.falkag.net/sel?cmd=lnk&kid=110944&bid=241720&dat=121642
|
|
| <Prev in Thread] | Current Thread | [Next in Thread> |
|---|---|---|
| Previous by Date: | Re: [HtmlUnit] Too many files in htmlunit-1.8-src?, Brad Clarke |
|---|---|
| Next by Date: | [HtmlUnit] CVS Commit: /htmlunit: upgraded dom4j from 1.5 to 1.6.1, mguillem |
| Previous by Thread: | [HtmlUnit] Too many files in htmlunit-1.8-src?, Marc Guillemot |
| Next by Thread: | [HtmlUnit] CVS Commit: /htmlunit: upgraded dom4j from 1.5 to 1.6.1, mguillem |
| Indexes: | [Date] [Thread] [Top] [All Lists] |
| News | FAQ | advertise |