转载

因为 Java 和 Php 在获取客户端 cookie 方式不同引发的 bug

Java

请求信息

GET / HTTP/1.1
Host: localhost:7003
Connection: keep-alive
Pragma: no-cache
Cache-Control: no-cache
Upgrade-Insecure-Requests: 1
User-Agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.103 Safari/537.36
Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3
Accept-Encoding: gzip, deflate, br
Accept-Language: zh-CN,zh;q=0.9,en;q=0.8,zh-TW;q=0.7,ja;q=0.6,pt;q=0.5
Cookie: test2=ab+cd; test1=ab%2Bcd

服务端

@Controller
@Slf4j
public class MainController {

    @Autowired
    private HttpServletRequest request;

    @GetMapping("/")
    public @ResponseBody
    String index() {
        Cookie[] cookies = request.getCookies();
        if (null != cookies) {
            for (Cookie cookie : cookies) {
                log.info(cookie.getName() + "=" + cookie.getValue());
            }
        }
        return "index";
    }
}

控制台输出

2019-05-16 18:03:32.770  INFO 10114 --- [nio-7003-exec-1] net.mengkang.demo.MainController         : test2=ab+cd
2019-05-16 18:03:32.770  INFO 10114 --- [nio-7003-exec-1] net.mengkang.demo.MainController         : test1=ab%2Bcd

Php

GET / HTTP/1.1
Host: localhost:8084
Connection: keep-alive
Pragma: no-cache
Cache-Control: no-cache
Upgrade-Insecure-Requests: 1
User-Agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.103 Safari/537.36
Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3
Accept-Encoding: gzip, deflate, br
Accept-Language: zh-CN,zh;q=0.9,en;q=0.8,zh-TW;q=0.7,ja;q=0.6,pt;q=0.5
Cookie: test2=ab+cd; test1=ab%2Bcd

服务端

var_exprot($_COOKIE);
array (
  'test2' => 'ab cd',
  'test1' => 'ab+cd',
)

结果对比

发现 Java 是不会对 cookie 数据做任何处理,但是 php 则会默认进行一次 urldecode 操作,这导致了,两边系统里面获取同一 cookie 时,结果不一致的 bug。

类似的问题 PHP 在解析外部变量时的一个 BUG

Php 源码分析

主要查看两处源码

main/php_variables.c
ext/standard/url.c
SAPI_API SAPI_TREAT_DATA_FUNC(php_default_treat_data)
{
...
    switch (arg) {
        case PARSE_GET:
        case PARSE_STRING:
            separator = PG(arg_separator).input;
            break;
        case PARSE_COOKIE:
            separator = ";/0"; //可以在我们浏览器里看到请求的header里面cookie的分隔符就是这个
            break;
    }

    var = php_strtok_r(res, separator, &strtok_buf);

    while (var) {
        val = strchr(var, '=');

        if (arg == PARSE_COOKIE) {
            /* Remove leading spaces from cookie names, needed for multi-cookie header where ; can be followed by a space */
            while (isspace(*var)) {
                var++;
            }
            if (var == val || *var == '/0') {
                goto next_cookie;
            }
        }

        if (++count > PG(max_input_vars)) {
            php_error_docref(NULL, E_WARNING, "Input variables exceeded " ZEND_LONG_FMT ". To increase the limit change max_input_vars in php.ini.", PG(max_input_vars));
            break;
        }

        if (val) { /* have a value */
            size_t val_len;
            size_t new_val_len;

            *val++ = '/0';
            php_url_decode(var, strlen(var));
            val_len = php_url_decode(val, strlen(val));
            val = estrndup(val, val_len);
            if (sapi_module.input_filter(arg, var, &val, val_len, &new_val_len)) {
                php_register_variable_safe(var, val, new_val_len, &array);
            }
            efree(val);
        } else {
            size_t val_len;
            size_t new_val_len;

            php_url_decode(var, strlen(var));
            val_len = 0;
            val = estrndup("", val_len);
            if (sapi_module.input_filter(arg, var, &val, val_len, &new_val_len)) {
                php_register_variable_safe(var, val, new_val_len, &array);
            }
            efree(val);
        }
next_cookie:
        var = php_strtok_r(NULL, separator, &strtok_buf);
    }

    if (free_buffer) {
        efree(res);
    }
}

我们看到cookie的值会被执行 php_url_decode 操作,下面附带其源码,且加上一段测试代码

#include <stdio.h>
#include <ctype.h>
#include <memory.h>

static int php_htoi(char *s) {
    int value;
    int c;

    c = ((unsigned char *) s)[0];
    if (isupper(c))
        c = tolower(c);
    value = (c >= '0' && c <= '9' ? c - '0' : c - 'a' + 10) * 16;

    c = ((unsigned char *) s)[1];
    if (isupper(c))
        c = tolower(c);
    value += c >= '0' && c <= '9' ? c - '0' : c - 'a' + 10;

    return (value);
}

size_t php_url_decode(char *str, size_t len) {
    char *dest = str;
    char *data = str;

    while (len--) {
        if (*data == '+') {
            *dest = ' ';
        } else if (*data == '%' && len >= 2 && isxdigit((int) *(data + 1)) && isxdigit((int) *(data + 2))) {
            *dest = (char) php_htoi(data + 1);
            data += 2;
            len -= 2;
        } else {
            *dest = *data;
        }
        data++;
        dest++;
    }
    *dest = '/0';
    return dest - str;
}

int main() {
    char a[6] = {"ab+cd"};
    php_url_decode(a, strlen(a));
    printf("%s/n", a);

    return 0;

}

上面 php_url_decode 用到了 php_htoi ,这个是因为 urlencode 是按照 rfc1738 对字符串中除了 -_. 之外的所有非字母数字字符都将被替换成百分号(%)后跟两位十六进制数。 htoi 作用就是 Converting Hexadecimal Digits Into Integers 。然后把计算出来的整型转换为 char ,存回处理完之后的字符数组里。

扩展讨论

rawurlencodeurlencode 的区别是什么?

手册上的解释是:

urlencode 返回字符串,此字符串中除了 -_. 之外的所有非字母数字字符都将被替换成百分号(%)后跟两位十六进制数,空格则编码为加号(+)。此编码与 WWW 表单 POST 数据的编码方式是一样的,同时与 application/x-www-form-urlencoded 的媒体类型编码方式一样。由于历史原因,此编码在将空格编码为加号(+)方面与 » RFC3986 编码(参见 rawurlencode())不同。

PHPAPI size_t php_raw_url_decode(char *str, size_t len)
{
    char *dest = str;
    char *data = str;

    while (len--) {
        if (*data == '%' && len >= 2 && isxdigit((int) *(data + 1))
            && isxdigit((int) *(data + 2))) {
#ifndef CHARSET_EBCDIC
            *dest = (char) php_htoi(data + 1);
#else
            *dest = os_toebcdic[(char) php_htoi(data + 1)];
#endif
            data += 2;
            len -= 2;
        } else {
            *dest = *data;
        }
        data++;
        dest++;
    }
    *dest = '/0';
    return dest - str;
}

通过源码可以看到就是对 + 处理没有了。

总结

不管是 $_GET , $_POST , $_COOKIE 的数据都会经过 urldecode 的二手数据,这个导致和JAVA那边获取的cookie值不一样了就。

原文  https://segmentfault.com/a/1190000019229559
正文到此结束
Loading...