From libc-alpha-return-36620-listarch-libc-alpha=sources dot redhat dot com at sourceware dot org Tue Jan 29 16:13:50 2013 Return-Path: Delivered-To: listarch-libc-alpha at sources dot redhat dot com Received: (qmail 32082 invoked by alias); 29 Jan 2013 16:13:48 -0000 Received: (qmail 32049 invoked by uid 22791); 29 Jan 2013 16:13:43 -0000 X-SWARE-Spam-Status: No, hits=-5.2 required=5.0 tests=AWL,BAYES_00,KHOP_RCVD_UNTRUST,RCVD_IN_DNSWL_HI,RP_MATCHES_RCVD,TW_BK X-Spam-Check-By: sourceware.org From: Andreas Schwab To: libc-alpha at sourceware dot org Subject: [PATCH] Fix buffer overrun in regexp matcher X-Yow: Are you selling NYLON OIL WELLS?? If so, we can use TWO DOZEN!! Date: Tue, 29 Jan 2013 17:13:35 +0100 Message-ID: User-Agent: Gnus/5.13 (Gnus v5.13) Emacs/24.2.92 (gnu/linux) MIME-Version: 1.0 Content-Type: text/plain; charset=utf-8 Content-Transfer-Encoding: 8bit Mailing-List: contact libc-alpha-help at sourceware dot org; run by ezmlm Precedence: bulk List-Id: List-Subscribe: List-Archive: List-Post: List-Help: , Sender: libc-alpha-owner at sourceware dot org Delivered-To: mailing list libc-alpha at sourceware dot org When extending regex buffers, make sure we allocate enough room for the state log. Merely doubling the space may not be enough if the current node has accepted a long run of characters. This part of the code only triggers with multibyte characters. Andreas. [BZ #15078] * posix/regexec.c (extend_buffers): Add parameter min_len. (check_matching): Pass minimum needed length. (clean_state_log_if_needed): Likewise. (get_subexp): Likewise. * posix/Makefile (tests): Add bug-regex34. (bug-regex34-ENV): Define. * posix/bug-regex34.c: New file. diff --git a/posix/Makefile b/posix/Makefile index 57672d8..6ceb440 100644 --- a/posix/Makefile +++ b/posix/Makefile @@ -86,7 +86,7 @@ tests := tstgetopt testfnm runtests runptests \ tst-rfc3484-3 \ tst-getaddrinfo3 tst-fnmatch2 tst-cpucount tst-cpuset \ bug-getopt1 bug-getopt2 bug-getopt3 bug-getopt4 \ - bug-getopt5 tst-getopt_long1 + bug-getopt5 tst-getopt_long1 bug-regex34 xtests := bug-ga2 ifeq (yes,$(build-shared)) test-srcs := globtest @@ -199,6 +199,7 @@ bug-regex26-ENV = LOCPATH=$(common-objpfx)localedata bug-regex30-ENV = LOCPATH=$(common-objpfx)localedata bug-regex32-ENV = LOCPATH=$(common-objpfx)localedata bug-regex33-ENV = LOCPATH=$(common-objpfx)localedata +bug-regex34-ENV = LOCPATH=$(common-objpfx)localedata tst-rxspencer-ARGS = --utf8 rxspencer/tests tst-rxspencer-ENV = LOCPATH=$(common-objpfx)localedata tst-pcre-ARGS = PCRE.tests diff --git a/posix/bug-regex34.c b/posix/bug-regex34.c new file mode 100644 index 0000000..bb3b613 --- /dev/null +++ b/posix/bug-regex34.c @@ -0,0 +1,46 @@ +/* Test re_search with multi-byte characters in UTF-8. + Copyright (C) 2013 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#define _GNU_SOURCE 1 +#include +#include +#include +#include + +static int +do_test (void) +{ + struct re_pattern_buffer r; + /* ????????x */ + const char *s = "\xe1\x80\x80\xe1\x80\xbb\xe1\x80\xbd\xe1\x80\x94\xe1\x80\xba\xe1\x80\xaf\xe1\x80\x95\xe1\x80\xbax"; + + if (setlocale (LC_ALL, "en_US.UTF-8") == NULL) + { + puts ("setlocale failed"); + return 1; + } + memset (&r, 0, sizeof (r)); + + re_compile_pattern ("[^x]x", 5, &r); + /* This was triggering a buffer overflow. */ + re_search (&r, s, strlen (s), 0, strlen (s), 0); + return 0; +} + +#define TEST_FUNCTION do_test () +#include "../test-skeleton.c" diff --git a/posix/regexec.c b/posix/regexec.c index 7f2de85..5ca2bf6 100644 --- a/posix/regexec.c +++ b/posix/regexec.c @@ -197,7 +197,7 @@ static int group_nodes_into_DFAstates (const re_dfa_t *dfa, static int check_node_accept (const re_match_context_t *mctx, const re_token_t *node, int idx) internal_function; -static reg_errcode_t extend_buffers (re_match_context_t *mctx) +static reg_errcode_t extend_buffers (re_match_context_t *mctx, int min_len) internal_function; /* Entry point for POSIX code. */ @@ -1160,7 +1160,7 @@ check_matching (re_match_context_t *mctx, int fl_longest_match, || (BE (next_char_idx >= mctx->input.valid_len, 0) && mctx->input.valid_len < mctx->input.len)) { - err = extend_buffers (mctx); + err = extend_buffers (mctx, next_char_idx + 1); if (BE (err != REG_NOERROR, 0)) { assert (err == REG_ESPACE); @@ -1738,7 +1738,7 @@ clean_state_log_if_needed (re_match_context_t *mctx, int next_state_log_idx) && mctx->input.valid_len < mctx->input.len)) { reg_errcode_t err; - err = extend_buffers (mctx); + err = extend_buffers (mctx, next_state_log_idx + 1); if (BE (err != REG_NOERROR, 0)) return err; } @@ -2792,7 +2792,7 @@ get_subexp (re_match_context_t *mctx, int bkref_node, int bkref_str_idx) if (bkref_str_off >= mctx->input.len) break; - err = extend_buffers (mctx); + err = extend_buffers (mctx, bkref_str_off + 1); if (BE (err != REG_NOERROR, 0)) return err; @@ -4102,7 +4102,7 @@ check_node_accept (const re_match_context_t *mctx, const re_token_t *node, static reg_errcode_t internal_function __attribute_warn_unused_result__ -extend_buffers (re_match_context_t *mctx) +extend_buffers (re_match_context_t *mctx, int min_len) { reg_errcode_t ret; re_string_t *pstr = &mctx->input; @@ -4111,8 +4111,10 @@ extend_buffers (re_match_context_t *mctx) if (BE (INT_MAX / 2 / sizeof (re_dfastate_t *) <= pstr->bufs_len, 0)) return REG_ESPACE; - /* Double the lengthes of the buffers. */ - ret = re_string_realloc_buffers (pstr, MIN (pstr->len, pstr->bufs_len * 2)); + /* Double the lengthes of the buffers, but allocate at least MIN_LEN. */ + ret = re_string_realloc_buffers (pstr, + MAX (min_len, + MIN (pstr->len, pstr->bufs_len * 2))); if (BE (ret != REG_NOERROR, 0)) return ret; -- 1.8.1.2 -- Andreas Schwab, SUSE Labs, schwab@suse.de GPG Key fingerprint = 0196 BAD8 1CE9 1970 F4BE 1748 E4D4 88E3 0EEA B9D7 "And now for something completely different."