由于 &language 参数,ESAPI 验证 URL 失败
ESAPI validate URL fails due to &language parameter
我正在尝试使用 ESAPI 验证来验证 URL,但由于 &lang
,我的验证失败了。如果我删除语言参数,则验证成功。请检查我的 URL 模式并告诉我此模式失败的原因。
String url="http://google.com:000/menu.jsp?userid=test&age=22&language=hindi";
ESAPI.validator().getValidInput("URL", url,"URL",100000,false);
正则表达式
Validator.URL=^(ht|f)tp(s?)\:\/\/[0-9a-zA-Z]([-.\w]*[0-9a-zA-Z])*(:(0-9)*)*(\/?)([a-zA-Z0-9\-\.\?\,\:\'\/\\\+=&%\&language\$#_]*)?$
这是设计使然。这里发生的事情是您的输入被 ESAPI 的规范化方法捕获,该方法正在检测 URI 是否同时包含 URI 编码和 HTML 实体编码。
ESAPI 的下一个版本将包括一个方便的方法,但是它需要一些步骤。
便捷方法:
/**
* {@inheritDoc}
*/
public boolean isValidURI(String context, String input, boolean allowNull) {
boolean isValid = false;
URI compliantURI = this.getRfcCompliantURI(input);
try{
if(null != compliantURI){
String canonicalizedURI = getCanonicalizedURI(compliantURI);
//if getCanonicalizedURI doesn't throw an IntrusionException, then the URI contains no mixed or
//double-encoding attacks.
logger.info(Logger.SECURITY_SUCCESS, "We did not detect any mixed or multiple encoding in the uri:[" + input + "]");
Validator v = ESAPI.validator();
//This part will use the regex from validation.properties. This regex should be super-simple, and
//used mainly to restrict certain parts of a URL.
Pattern p = ESAPI.securityConfiguration().getValidationPattern( "URL" );
//We're doing this instead of using the normal validator API, because it will canonicalize the input again
//and if the URI has any queries that also happen to match HTML entities, like ¶
//it will cease conforming to the regex we now specify for a URL.
isValid = p.matcher(canonicalizedURI).matches();
}
}catch (IntrusionException e){
logger.error(Logger.SECURITY_FAILURE, e.getMessage());
isValid = false;
}
return isValid;
}
/**
* This does alot. This will extract each piece of a URI according to parse zone, and it will construct
* a canonicalized String representing a version of the URI that is safe to run regex against to it.
*
* @param dirtyUri
* @return
* @throws IntrusionException
*/
public String getCanonicalizedURI(URI dirtyUri) throws IntrusionException{
// From RFC-3986 section 3
// URI = scheme ":" hier-part [ "?" query ] [ "#" fragment ]
//
// hier-part = "//" authority path-abempty
// / path-absolute
// / path-rootless
// / path-empty
// The following are two example URIs and their component parts:
//
// foo://example.com:8042/over/there?name=ferret#nose
// \_/ \______________/\_________/ \_________/ \__/
// | | | | |
// scheme authority path query fragment
// | _____________________|__
// / \ / \
// urn:example:animal:ferret:nose
Map<UriSegment, String> parseMap = new EnumMap<UriSegment, String>(UriSegment.class);
parseMap.put(UriSegment.SCHEME, dirtyUri.getScheme());
//authority = [ userinfo "@" ] host [ ":" port ]
parseMap.put(UriSegment.AUTHORITY, dirtyUri.getRawAuthority());
parseMap.put(UriSegment.SCHEMSPECIFICPART, dirtyUri.getRawSchemeSpecificPart());
parseMap.put(UriSegment.HOST, dirtyUri.getHost());
//if port is undefined, it will return -1
Integer port = new Integer(dirtyUri.getPort());
parseMap.put(UriSegment.PORT, port == -1 ? "": port.toString());
parseMap.put(UriSegment.PATH, dirtyUri.getRawPath());
parseMap.put(UriSegment.QUERY, dirtyUri.getRawQuery());
parseMap.put(UriSegment.FRAGMENT, dirtyUri.getRawFragment());
//Now we canonicalize each part and build our string.
StringBuilder sb = new StringBuilder();
//Replace all the items in the map with canonicalized versions.
Set<UriSegment> set = parseMap.keySet();
SecurityConfiguration sg = ESAPI.securityConfiguration();
// boolean restrictMixed = sg.getBooleanProp("AllowMixedEncoding");
// boolean restrictMultiple = sg.getBooleanProp("AllowMultipleEncoding");
boolean allowMixed = sg.getAllowMixedEncoding();
boolean allowMultiple = sg.getAllowMultipleEncoding();
for(UriSegment seg: set){
String value = encoder.canonicalize(parseMap.get(seg), allowMultiple, allowMixed);
value = value == null ? "" : value;
//In the case of a uri query, we need to break up and canonicalize the internal parts of the query.
if(seg == UriSegment.QUERY && null != parseMap.get(seg)){
StringBuilder qBuilder = new StringBuilder();
try {
Map<String, List<String>> canonicalizedMap = this.splitQuery(dirtyUri);
Set<Entry<String, List<String>>> query = canonicalizedMap.entrySet();
Iterator<Entry<String, List<String>>> i = query.iterator();
while(i.hasNext()){
Entry<String, List<String>> e = i.next();
String key = (String) e.getKey();
String qVal = "";
List<String> list = (List<String>) e.getValue();
if(!list.isEmpty()){
qVal = list.get(0);
}
qBuilder.append(key)
.append("=")
.append(qVal);
if(i.hasNext()){
qBuilder.append("&");
}
}
value = qBuilder.toString();
} catch (UnsupportedEncodingException e) {
logger.debug(Logger.EVENT_FAILURE, "decoding error when parsing [" + dirtyUri.toString() + "]");
}
}
//Check if the port is -1, if it is, omit it from the output.
if(seg == UriSegment.PORT){
if("-1" == parseMap.get(seg)){
value = "";
}
}
parseMap.put(seg, value );
}
return buildUrl(parseMap);
}
/**
* The meat of this method was taken from Whosebug:
* It has been modified to return a canonicalized key and value pairing.
*
* @param java URI
* @return a map of canonicalized query parameters.
* @throws UnsupportedEncodingException
*/
public Map<String, List<String>> splitQuery(URI uri) throws UnsupportedEncodingException {
final Map<String, List<String>> query_pairs = new LinkedHashMap<String, List<String>>();
final String[] pairs = uri.getQuery().split("&");
for (String pair : pairs) {
final int idx = pair.indexOf("=");
final String key = idx > 0 ? encoder.canonicalize(pair.substring(0, idx)) : pair;
if (!query_pairs.containsKey(key)) {
query_pairs.put(key, new LinkedList<String>());
}
final String value = idx > 0 && pair.length() > idx + 1 ? URLDecoder.decode(pair.substring(idx + 1), "UTF-8") : null;
query_pairs.get(key).add(encoder.canonicalize(value));
}
return query_pairs;
}
public enum UriSegment {
AUTHORITY, SCHEME, SCHEMSPECIFICPART, USERINFO, HOST, PORT, PATH, QUERY, FRAGMENT
}
参考码还活着here.
请注意,这部分内容将被重构到 DefaultEncoder class,因为规范化方法属于那里。
编写的代码在 ESAPI 项目代码中经过了大量测试,但我可能忘记了一两个方法。您可以克隆并编译 esapi 项目 as-is,但某些组织不允许您使用 bleeding-edge non-release 二进制文件。
我正在尝试使用 ESAPI 验证来验证 URL,但由于 &lang
,我的验证失败了。如果我删除语言参数,则验证成功。请检查我的 URL 模式并告诉我此模式失败的原因。
String url="http://google.com:000/menu.jsp?userid=test&age=22&language=hindi";
ESAPI.validator().getValidInput("URL", url,"URL",100000,false);
正则表达式
Validator.URL=^(ht|f)tp(s?)\:\/\/[0-9a-zA-Z]([-.\w]*[0-9a-zA-Z])*(:(0-9)*)*(\/?)([a-zA-Z0-9\-\.\?\,\:\'\/\\\+=&%\&language\$#_]*)?$
这是设计使然。这里发生的事情是您的输入被 ESAPI 的规范化方法捕获,该方法正在检测 URI 是否同时包含 URI 编码和 HTML 实体编码。
ESAPI 的下一个版本将包括一个方便的方法,但是它需要一些步骤。
便捷方法:
/**
* {@inheritDoc}
*/
public boolean isValidURI(String context, String input, boolean allowNull) {
boolean isValid = false;
URI compliantURI = this.getRfcCompliantURI(input);
try{
if(null != compliantURI){
String canonicalizedURI = getCanonicalizedURI(compliantURI);
//if getCanonicalizedURI doesn't throw an IntrusionException, then the URI contains no mixed or
//double-encoding attacks.
logger.info(Logger.SECURITY_SUCCESS, "We did not detect any mixed or multiple encoding in the uri:[" + input + "]");
Validator v = ESAPI.validator();
//This part will use the regex from validation.properties. This regex should be super-simple, and
//used mainly to restrict certain parts of a URL.
Pattern p = ESAPI.securityConfiguration().getValidationPattern( "URL" );
//We're doing this instead of using the normal validator API, because it will canonicalize the input again
//and if the URI has any queries that also happen to match HTML entities, like ¶
//it will cease conforming to the regex we now specify for a URL.
isValid = p.matcher(canonicalizedURI).matches();
}
}catch (IntrusionException e){
logger.error(Logger.SECURITY_FAILURE, e.getMessage());
isValid = false;
}
return isValid;
}
/**
* This does alot. This will extract each piece of a URI according to parse zone, and it will construct
* a canonicalized String representing a version of the URI that is safe to run regex against to it.
*
* @param dirtyUri
* @return
* @throws IntrusionException
*/
public String getCanonicalizedURI(URI dirtyUri) throws IntrusionException{
// From RFC-3986 section 3
// URI = scheme ":" hier-part [ "?" query ] [ "#" fragment ]
//
// hier-part = "//" authority path-abempty
// / path-absolute
// / path-rootless
// / path-empty
// The following are two example URIs and their component parts:
//
// foo://example.com:8042/over/there?name=ferret#nose
// \_/ \______________/\_________/ \_________/ \__/
// | | | | |
// scheme authority path query fragment
// | _____________________|__
// / \ / \
// urn:example:animal:ferret:nose
Map<UriSegment, String> parseMap = new EnumMap<UriSegment, String>(UriSegment.class);
parseMap.put(UriSegment.SCHEME, dirtyUri.getScheme());
//authority = [ userinfo "@" ] host [ ":" port ]
parseMap.put(UriSegment.AUTHORITY, dirtyUri.getRawAuthority());
parseMap.put(UriSegment.SCHEMSPECIFICPART, dirtyUri.getRawSchemeSpecificPart());
parseMap.put(UriSegment.HOST, dirtyUri.getHost());
//if port is undefined, it will return -1
Integer port = new Integer(dirtyUri.getPort());
parseMap.put(UriSegment.PORT, port == -1 ? "": port.toString());
parseMap.put(UriSegment.PATH, dirtyUri.getRawPath());
parseMap.put(UriSegment.QUERY, dirtyUri.getRawQuery());
parseMap.put(UriSegment.FRAGMENT, dirtyUri.getRawFragment());
//Now we canonicalize each part and build our string.
StringBuilder sb = new StringBuilder();
//Replace all the items in the map with canonicalized versions.
Set<UriSegment> set = parseMap.keySet();
SecurityConfiguration sg = ESAPI.securityConfiguration();
// boolean restrictMixed = sg.getBooleanProp("AllowMixedEncoding");
// boolean restrictMultiple = sg.getBooleanProp("AllowMultipleEncoding");
boolean allowMixed = sg.getAllowMixedEncoding();
boolean allowMultiple = sg.getAllowMultipleEncoding();
for(UriSegment seg: set){
String value = encoder.canonicalize(parseMap.get(seg), allowMultiple, allowMixed);
value = value == null ? "" : value;
//In the case of a uri query, we need to break up and canonicalize the internal parts of the query.
if(seg == UriSegment.QUERY && null != parseMap.get(seg)){
StringBuilder qBuilder = new StringBuilder();
try {
Map<String, List<String>> canonicalizedMap = this.splitQuery(dirtyUri);
Set<Entry<String, List<String>>> query = canonicalizedMap.entrySet();
Iterator<Entry<String, List<String>>> i = query.iterator();
while(i.hasNext()){
Entry<String, List<String>> e = i.next();
String key = (String) e.getKey();
String qVal = "";
List<String> list = (List<String>) e.getValue();
if(!list.isEmpty()){
qVal = list.get(0);
}
qBuilder.append(key)
.append("=")
.append(qVal);
if(i.hasNext()){
qBuilder.append("&");
}
}
value = qBuilder.toString();
} catch (UnsupportedEncodingException e) {
logger.debug(Logger.EVENT_FAILURE, "decoding error when parsing [" + dirtyUri.toString() + "]");
}
}
//Check if the port is -1, if it is, omit it from the output.
if(seg == UriSegment.PORT){
if("-1" == parseMap.get(seg)){
value = "";
}
}
parseMap.put(seg, value );
}
return buildUrl(parseMap);
}
/**
* The meat of this method was taken from Whosebug:
* It has been modified to return a canonicalized key and value pairing.
*
* @param java URI
* @return a map of canonicalized query parameters.
* @throws UnsupportedEncodingException
*/
public Map<String, List<String>> splitQuery(URI uri) throws UnsupportedEncodingException {
final Map<String, List<String>> query_pairs = new LinkedHashMap<String, List<String>>();
final String[] pairs = uri.getQuery().split("&");
for (String pair : pairs) {
final int idx = pair.indexOf("=");
final String key = idx > 0 ? encoder.canonicalize(pair.substring(0, idx)) : pair;
if (!query_pairs.containsKey(key)) {
query_pairs.put(key, new LinkedList<String>());
}
final String value = idx > 0 && pair.length() > idx + 1 ? URLDecoder.decode(pair.substring(idx + 1), "UTF-8") : null;
query_pairs.get(key).add(encoder.canonicalize(value));
}
return query_pairs;
}
public enum UriSegment {
AUTHORITY, SCHEME, SCHEMSPECIFICPART, USERINFO, HOST, PORT, PATH, QUERY, FRAGMENT
}
参考码还活着here.
请注意,这部分内容将被重构到 DefaultEncoder class,因为规范化方法属于那里。
编写的代码在 ESAPI 项目代码中经过了大量测试,但我可能忘记了一两个方法。您可以克隆并编译 esapi 项目 as-is,但某些组织不允许您使用 bleeding-edge non-release 二进制文件。