diff --git a/strings/ctype-utf8.c b/strings/ctype-utf8.c index 6f1aa41acaaac..563eaa4e87658 100644 --- a/strings/ctype-utf8.c +++ b/strings/ctype-utf8.c @@ -2554,14 +2554,14 @@ my_mb_wc_filename(CHARSET_INFO *cs __attribute__((unused)), } } - if (s + 4 > e) - return MY_CS_TOOSMALL4; + if (s + 5 > e) + return MY_CS_TOOSMALL5; if ((byte1= hexlo(byte1)) >= 0 && (byte2= hexlo(byte2)) >= 0) { int byte3= hexlo(s[3]); - int byte4= hexlo(s[3] ? s[4] : 0); + int byte4= hexlo(s[4]); if (byte3 >=0 && byte4 >=0) { *pwc= (byte1 << 12) + (byte2 << 8) + (byte3 << 4) + byte4; diff --git a/unittest/strings/strings-t.c b/unittest/strings/strings-t.c index d4bf7a0ccda0e..10f7a6d2e2396 100644 --- a/unittest/strings/strings-t.c +++ b/unittest/strings/strings-t.c @@ -1704,13 +1704,29 @@ test_strnncollsp_char() } +/* + my_mb_wc_filename() decodes a 5-byte '@HHHH' escape but only checked that + 4 bytes were available before reading the 4th hex digit at s[4], one byte + past the supplied end pointer. "@000" is such a truncated escape ('0','0' + has no entry in the 3-byte table, so it reaches the hex branch). With end= + buf + 4 the decoder must report MY_CS_TOOSMALL5 instead of reading buf[4]. +*/ +static int test_mb_wc_filename_truncated() +{ + CHARSET_INFO *cs= &my_charset_filename; + uchar buf[5]= {'@', '0', '0', '0', '1'}; + my_wc_t wc= 0; + return my_ci_mb_wc(cs, &wc, buf, buf + 4); +} + + int main(int ac, char **av) { size_t i, failed= 0; MY_INIT(av[0]); - plan(4); + plan(5); diag("Testing my_like_range_xxx() functions"); for (i= 0; i < array_elements(charset_list); i++) @@ -1736,6 +1752,10 @@ int main(int ac, char **av) failed= test_strnncollsp_char(); ok(failed == 0, "Testing cs->coll->strnncollsp_char()"); + diag("Testing my_charset_filename mb_wc end-pointer bounds"); + ok(test_mb_wc_filename_truncated() == MY_CS_TOOSMALL5, + "filename decoder does not read past the end pointer"); + my_end(0); return exit_status();