diff --git a/enc/unicode.c b/enc/unicode.c index cbfc6cdf589681..07497cdbe46731 100644 --- a/enc/unicode.c +++ b/enc/unicode.c @@ -682,15 +682,13 @@ onigenc_unicode_case_map(OnigCaseFoldType* flagP, *pp += codepoint_length; if (code <= 'z') { /* ASCII comes first */ - if (code >= 'a' && code <= 'z') { + if (code >= 'a' /*&& code <= 'z'*/) { if (flags & ONIGENC_CASE_UPCASE) { MODIFIED; if (flags & ONIGENC_CASE_FOLD_TURKISH_AZERI && code == 'i') code = I_WITH_DOT_ABOVE; - else { - code -= 'a'; - code += 'A'; - } + else + code -= 'a' - 'A'; } } else if (code >= 'A' && code <= 'Z') { diff --git a/regcomp.c b/regcomp.c index e389e6f1209af5..0ecf162556b777 100644 --- a/regcomp.c +++ b/regcomp.c @@ -2803,14 +2803,11 @@ get_head_value_node(Node* node, int exact, regex_t* reg) case NT_STR: { StrNode* sn = NSTR(node); - if (sn->end <= sn->s) break; - if (exact != 0 && - !NSTRING_IS_RAW(node) && IS_IGNORECASE(reg->options)) { - } - else { + if (exact == 0 || + NSTRING_IS_RAW(node) || !IS_IGNORECASE(reg->options)) { n = node; } } @@ -3301,6 +3298,14 @@ setup_subexp_call(Node* node, ScanEnv* env) } #endif +#define IN_ALT (1<<0) +#define IN_NOT (1<<1) +#define IN_REPEAT (1<<2) +#define IN_VAR_REPEAT (1<<3) +#define IN_CALL (1<<4) +#define IN_RECCALL (1<<5) +#define IN_LOOK_BEHIND (1<<6) + /* divide different length alternatives in look-behind. (?<=A|B) ==> (?<=A)|(?<=B) (? (?s; end = sn->end; if (start >= end) return 0; + is_in_look_behind = (state & IN_LOOK_BEHIND) != 0; + r = 0; top_root = root = prev_node = snode = NULL_NODE; alt_num = 1; @@ -3630,7 +3640,7 @@ expand_case_fold_string(Node* node, regex_t* reg) len = enclen(reg->enc, p, end); varlen = is_case_fold_variable_len(n, items, len); - if (n == 0 || varlen == 0) { + if (n == 0 || varlen == 0 || is_in_look_behind) { if (IS_NULL(snode)) { if (IS_NULL(root) && IS_NOT_NULL(prev_node)) { onig_node_free(top_root); @@ -3889,13 +3899,6 @@ setup_comb_exp_check(Node* node, int state, ScanEnv* env) } #endif -#define IN_ALT (1<<0) -#define IN_NOT (1<<1) -#define IN_REPEAT (1<<2) -#define IN_VAR_REPEAT (1<<3) -#define IN_CALL (1<<4) -#define IN_RECCALL (1<<5) - /* setup_tree does the following work. 1. check empty loop. (set qn->target_empty_info) 2. expand ignore-case in char class. @@ -3937,7 +3940,7 @@ setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env) case NT_STR: if (IS_IGNORECASE(reg->options) && !NSTRING_IS_RAW(node)) { - r = expand_case_fold_string(node, reg); + r = expand_case_fold_string(node, reg, state); } break; @@ -4180,7 +4183,7 @@ setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env) if (r < 0) return r; if (r > 0) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN; if (NTYPE(node) != NT_ANCHOR) goto restart; - r = setup_tree(an->target, reg, state, env); + r = setup_tree(an->target, reg, (state | IN_LOOK_BEHIND), env); if (r != 0) return r; r = setup_look_behind(node, reg, env); } @@ -4193,7 +4196,8 @@ setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env) if (r < 0) return r; if (r > 0) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN; if (NTYPE(node) != NT_ANCHOR) goto restart; - r = setup_tree(an->target, reg, (state | IN_NOT), env); + r = setup_tree(an->target, reg, (state | IN_NOT | IN_LOOK_BEHIND), + env); if (r != 0) return r; r = setup_look_behind(node, reg, env); } @@ -4209,104 +4213,49 @@ setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env) return r; } -#ifndef USE_SUNDAY_QUICK_SEARCH -/* set skip map for Boyer-Moore search */ +/* set skip map for Sunday's quick search */ static int set_bm_skip(UChar* s, UChar* end, regex_t* reg, UChar skip[], int** int_skip, int ignore_case) { OnigDistance i, len; int clen, flen, n, j, k; - UChar *p, buf[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM][ONIGENC_MBC_CASE_FOLD_MAXLEN]; + UChar *p, buf[ONIGENC_MBC_CASE_FOLD_MAXLEN]; OnigCaseFoldCodeItem items[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM]; OnigEncoding enc = reg->enc; len = end - s; if (len < ONIG_CHAR_TABLE_SIZE) { - for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) skip[i] = (UChar )len; - - n = 0; - for (i = 0; i < len - 1; i += clen) { - p = s + i; - if (ignore_case) - n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc, reg->case_fold_flag, - p, end, items); - clen = enclen(enc, p, end); - if (p + clen > end) - clen = (int )(end - p); - - for (j = 0; j < n; j++) { - if ((items[j].code_len != 1) || (items[j].byte_len != clen)) - return 1; /* different length isn't supported. */ - flen = ONIGENC_CODE_TO_MBC(enc, items[j].code[0], buf[j]); - if (flen != clen) - return 1; /* different length isn't supported. */ - } - for (j = 0; j < clen; j++) { - skip[s[i + j]] = (UChar )(len - 1 - i - j); - for (k = 0; k < n; k++) { - skip[buf[k][j]] = (UChar )(len - 1 - i - j); - } - } - } - } - else { -# if OPT_EXACT_MAXLEN < ONIG_CHAR_TABLE_SIZE - /* This should not happen. */ - return ONIGERR_TYPE_BUG; -# else - if (IS_NULL(*int_skip)) { - *int_skip = (int* )xmalloc(sizeof(int) * ONIG_CHAR_TABLE_SIZE); - if (IS_NULL(*int_skip)) return ONIGERR_MEMORY; - } - for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) (*int_skip)[i] = (int )len; - - n = 0; - for (i = 0; i < len - 1; i += clen) { - p = s + i; - if (ignore_case) + if (ignore_case) { + for (i = 0; i < len; i += clen) { + p = s + i; n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc, reg->case_fold_flag, - p, end, items); - clen = enclen(enc, p, end); - if (p + clen > end) - clen = (int )(end - p); - - for (j = 0; j < n; j++) { - if ((items[j].code_len != 1) || (items[j].byte_len != clen)) - return 1; /* different length isn't supported. */ - flen = ONIGENC_CODE_TO_MBC(enc, items[j].code[0], buf[j]); - if (flen != clen) - return 1; /* different length isn't supported. */ - } - for (j = 0; j < clen; j++) { - (*int_skip)[s[i + j]] = (int )(len - 1 - i - j); - for (k = 0; k < n; k++) { - (*int_skip)[buf[k][j]] = (int )(len - 1 - i - j); + p, end, items); + clen = enclen(enc, p, end); + if (p + clen > end) + clen = (int )(end - p); + + for (j = 0; j < n; j++) { + if ((items[j].code_len != 1) || (items[j].byte_len != clen)) { + /* Different length isn't supported. Stop optimization at here. */ + end = p; + goto endcheck; + } + flen = ONIGENC_CODE_TO_MBC(enc, items[j].code[0], buf); + if (flen != clen) { + /* Different length isn't supported. Stop optimization at here. */ + end = p; + goto endcheck; + } } } +endcheck: + ; } -# endif - } - return 0; -} - -#else /* USE_SUNDAY_QUICK_SEARCH */ - -/* set skip map for Sunday's quick search */ -static int -set_bm_skip(UChar* s, UChar* end, regex_t* reg, - UChar skip[], int** int_skip, int ignore_case) -{ - OnigDistance i, len; - int clen, flen, n, j, k; - UChar *p, buf[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM][ONIGENC_MBC_CASE_FOLD_MAXLEN]; - OnigCaseFoldCodeItem items[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM]; - OnigEncoding enc = reg->enc; - - len = end - s; - if (len < ONIG_CHAR_TABLE_SIZE) { - for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) skip[i] = (UChar )(len + 1); + len = end - s; + for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) + skip[i] = (UChar )(len + 1); n = 0; for (i = 0; i < len; i += clen) { p = s + i; @@ -4317,17 +4266,11 @@ set_bm_skip(UChar* s, UChar* end, regex_t* reg, if (p + clen > end) clen = (int )(end - p); - for (j = 0; j < n; j++) { - if ((items[j].code_len != 1) || (items[j].byte_len != clen)) - return 1; /* different length isn't supported. */ - flen = ONIGENC_CODE_TO_MBC(enc, items[j].code[0], buf[j]); - if (flen != clen) - return 1; /* different length isn't supported. */ - } for (j = 0; j < clen; j++) { skip[s[i + j]] = (UChar )(len - i - j); for (k = 0; k < n; k++) { - skip[buf[k][j]] = (UChar )(len - i - j); + ONIGENC_CODE_TO_MBC(enc, items[k].code[0], buf); + skip[buf[j]] = (UChar )(len - i - j); } } } @@ -4369,9 +4312,8 @@ set_bm_skip(UChar* s, UChar* end, regex_t* reg, } # endif } - return 0; + return (int)len; } -#endif /* USE_SUNDAY_QUICK_SEARCH */ typedef struct { OnigDistance min; /* min byte length */ @@ -5049,7 +4991,7 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env) if (NSTRING_IS_DONT_GET_OPT_INFO(node)) { int n = onigenc_strlen(env->enc, sn->s, sn->end); - max = ONIGENC_MBC_MAXLEN_DIST(env->enc) * (OnigDistance)n; + max = (OnigDistance )ONIGENC_MBC_MAXLEN_DIST(env->enc) * (OnigDistance)n; } else { concat_opt_exact_info_str(&opt->exb, sn->s, sn->end, @@ -5232,7 +5174,7 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env) r = optimize_node_left(qn->target, &nopt, env); if (r) break; - if (/*qn->lower == 0 &&*/ IS_REPEAT_INFINITE(qn->upper)) { + if (qn->lower == 0 && IS_REPEAT_INFINITE(qn->upper)) { if (env->mmd.max == 0 && NTYPE(qn->target) == NT_CANY && qn->greedy) { if (IS_MULTILINE(env->options)) @@ -5342,7 +5284,6 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env) static int set_optimize_exact_info(regex_t* reg, OptExactInfo* e) { - int r; int allow_reverse; if (e->len == 0) return 0; @@ -5357,15 +5298,18 @@ set_optimize_exact_info(regex_t* reg, OptExactInfo* e) if (e->ignore_case > 0) { if (e->len >= 3 || (e->len >= 2 && allow_reverse)) { - r = set_bm_skip(reg->exact, reg->exact_end, reg, + e->len = set_bm_skip(reg->exact, reg->exact_end, reg, reg->map, &(reg->int_map), 1); - if (r == 0) { + reg->exact_end = reg->exact + e->len; + if (e->len >= 3) { reg->optimize = (allow_reverse != 0 ? ONIG_OPTIMIZE_EXACT_BM_IC : ONIG_OPTIMIZE_EXACT_BM_NOT_REV_IC); } - else { + else if (e->len > 0) { reg->optimize = ONIG_OPTIMIZE_EXACT_IC; } + else + return 0; } else { reg->optimize = ONIG_OPTIMIZE_EXACT_IC; @@ -5373,15 +5317,10 @@ set_optimize_exact_info(regex_t* reg, OptExactInfo* e) } else { if (e->len >= 3 || (e->len >= 2 && allow_reverse)) { - r = set_bm_skip(reg->exact, reg->exact_end, reg, - reg->map, &(reg->int_map), 0); - if (r == 0) { - reg->optimize = (allow_reverse != 0 - ? ONIG_OPTIMIZE_EXACT_BM : ONIG_OPTIMIZE_EXACT_BM_NOT_REV); - } - else { - reg->optimize = ONIG_OPTIMIZE_EXACT; - } + set_bm_skip(reg->exact, reg->exact_end, reg, + reg->map, &(reg->int_map), 0); + reg->optimize = (allow_reverse != 0 + ? ONIG_OPTIMIZE_EXACT_BM : ONIG_OPTIMIZE_EXACT_BM_NOT_REV); } else { reg->optimize = ONIG_OPTIMIZE_EXACT; diff --git a/regenc.c b/regenc.c index 0afdf22cb7bdc1..823aacc28e615b 100644 --- a/regenc.c +++ b/regenc.c @@ -984,7 +984,7 @@ onigenc_ascii_only_case_map(OnigCaseFoldType* flagP, const OnigUChar** pp, const if (code >= 'a' && code <= 'z' && (flags & ONIGENC_CASE_UPCASE)) { flags |= ONIGENC_CASE_MODIFIED; - code += 'A' - 'a'; + code -= 'a' - 'A'; } else if (code >= 'A' && code <= 'Z' && (flags & (ONIGENC_CASE_DOWNCASE | ONIGENC_CASE_FOLD))) { @@ -1013,7 +1013,7 @@ onigenc_single_byte_ascii_only_case_map(OnigCaseFoldType* flagP, const OnigUChar if (code >= 'a' && code <= 'z' && (flags & ONIGENC_CASE_UPCASE)) { flags |= ONIGENC_CASE_MODIFIED; - code += 'A' - 'a'; + code -= 'a' - 'A'; } else if (code >= 'A' && code <= 'Z' && (flags & (ONIGENC_CASE_DOWNCASE | ONIGENC_CASE_FOLD))) { diff --git a/regexec.c b/regexec.c index b8d174ec8ed472..eec3e236631805 100644 --- a/regexec.c +++ b/regexec.c @@ -2742,7 +2742,6 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, /* default behavior: return first-matching result. */ goto finish; - NEXT; CASE(OP_EXACT1) MOP_IN(OP_EXACT1); DATA_ENSURE(1); @@ -3316,40 +3315,36 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, if (ON_STR_BEGIN(s) || !ONIGENC_IS_MBC_WORD(encode, sprev, end)) { MOP_OUT; JUMP; - } + } } goto fail; - NEXT; CASE(OP_ASCII_WORD_BEGIN) MOP_IN(OP_ASCII_WORD_BEGIN); if (DATA_ENSURE_CHECK1 && ONIGENC_IS_MBC_ASCII_WORD(encode, s, end)) { if (ON_STR_BEGIN(s) || !ONIGENC_IS_MBC_ASCII_WORD(encode, sprev, end)) { MOP_OUT; JUMP; - } + } } goto fail; - NEXT; CASE(OP_WORD_END) MOP_IN(OP_WORD_END); if (!ON_STR_BEGIN(s) && ONIGENC_IS_MBC_WORD(encode, sprev, end)) { if (ON_STR_END(s) || !ONIGENC_IS_MBC_WORD(encode, s, end)) { MOP_OUT; JUMP; - } + } } goto fail; - NEXT; CASE(OP_ASCII_WORD_END) MOP_IN(OP_ASCII_WORD_END); if (!ON_STR_BEGIN(s) && ONIGENC_IS_MBC_ASCII_WORD(encode, sprev, end)) { if (ON_STR_END(s) || !ONIGENC_IS_MBC_ASCII_WORD(encode, s, end)) { MOP_OUT; JUMP; - } + } } goto fail; - NEXT; #endif CASE(OP_BEGIN_BUF) MOP_IN(OP_BEGIN_BUF); @@ -3379,10 +3374,9 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, #endif && !ON_STR_END(s)) { MOP_OUT; - JUMP; + JUMP; } goto fail; - NEXT; CASE(OP_END_LINE) MOP_IN(OP_END_LINE); if (ON_STR_END(s)) { @@ -3398,10 +3392,9 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, } else if (ONIGENC_IS_MBC_NEWLINE_EX(encode, s, str, end, option, 1)) { MOP_OUT; - JUMP; + JUMP; } goto fail; - NEXT; CASE(OP_SEMI_END_BUF) MOP_IN(OP_SEMI_END_BUF); if (ON_STR_END(s)) { @@ -3433,7 +3426,6 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, #endif } goto fail; - NEXT; CASE(OP_BEGIN_POSITION) MOP_IN(OP_BEGIN_POSITION); if (s != msa->gpos) @@ -3499,12 +3491,10 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, CASE(OP_BACKREF1) MOP_IN(OP_BACKREF1); mem = 1; goto backref; - NEXT; CASE(OP_BACKREF2) MOP_IN(OP_BACKREF2); mem = 2; goto backref; - NEXT; CASE(OP_BACKREFN) MOP_IN(OP_BACKREFN); GET_MEMNUM_INC(mem, p); @@ -3934,7 +3924,6 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, STACK_GET_REPEAT(mem, stkp); si = GET_STACK_INDEX(stkp); goto repeat_inc; - NEXT; CASE(OP_REPEAT_INC_NG) MOP_IN(OP_REPEAT_INC_NG); GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */ @@ -3970,7 +3959,6 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, STACK_GET_REPEAT(mem, stkp); si = GET_STACK_INDEX(stkp); goto repeat_inc_ng; - NEXT; CASE(OP_PUSH_POS) MOP_IN(OP_PUSH_POS); STACK_PUSH_POS(s, sprev, pkeep); @@ -3995,7 +3983,6 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, CASE(OP_FAIL_POS) MOP_IN(OP_FAIL_POS); STACK_POP_TIL_POS_NOT; goto fail; - NEXT; CASE(OP_PUSH_STOP_BT) MOP_IN(OP_PUSH_STOP_BT); STACK_PUSH_STOP_BT; @@ -4036,7 +4023,6 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, CASE(OP_FAIL_LOOK_BEHIND_NOT) MOP_IN(OP_FAIL_LOOK_BEHIND_NOT); STACK_POP_TIL_LOOK_BEHIND_NOT; goto fail; - NEXT; CASE(OP_PUSH_ABSENT_POS) MOP_IN(OP_PUSH_ABSENT_POS); /* Save the absent-start-pos and the original end-pos. */ @@ -4098,7 +4084,6 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, #endif STACK_POP_TIL_ABSENT; goto fail; - NEXT; #ifdef USE_SUBEXP_CALL CASE(OP_CALL) MOP_IN(OP_CALL); @@ -4128,7 +4113,6 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, CASE(OP_FINISH) goto finish; - NEXT; CASE(OP_FAIL) if (0) { @@ -4393,219 +4377,6 @@ slow_search_backward_ic(OnigEncoding enc, int case_fold_flag, return (UChar* )NULL; } -#ifndef USE_SUNDAY_QUICK_SEARCH -/* Boyer-Moore-Horspool search applied to a multibyte string */ -static UChar* -bm_search_notrev(regex_t* reg, const UChar* target, const UChar* target_end, - const UChar* text, const UChar* text_end, - const UChar* text_range) -{ - const UChar *s, *se, *t, *p, *end; - const UChar *tail; - ptrdiff_t skip, tlen1; - -# ifdef ONIG_DEBUG_SEARCH - fprintf(stderr, "bm_search_notrev: text: %"PRIuPTR" (%p), text_end: %"PRIuPTR" (%p), text_range: %"PRIuPTR" (%p)\n", - (uintptr_t )text, text, (uintptr_t )text_end, text_end, (uintptr_t )text_range, text_range); -# endif - - tail = target_end - 1; - tlen1 = tail - target; - end = text_range; - if (end + tlen1 > text_end) - end = text_end - tlen1; - - s = text; - - if (IS_NULL(reg->int_map)) { - while (s < end) { - p = se = s + tlen1; - t = tail; - while (*p == *t) { - if (t == target) return (UChar* )s; - p--; t--; - } - skip = reg->map[*se]; - t = s; - do { - s += enclen(reg->enc, s, end); - } while ((s - t) < skip && s < end); - } - } - else { -# if OPT_EXACT_MAXLEN >= ONIG_CHAR_TABLE_SIZE - while (s < end) { - p = se = s + tlen1; - t = tail; - while (*p == *t) { - if (t == target) return (UChar* )s; - p--; t--; - } - skip = reg->int_map[*se]; - t = s; - do { - s += enclen(reg->enc, s, end); - } while ((s - t) < skip && s < end); - } -# endif - } - - return (UChar* )NULL; -} - -/* Boyer-Moore-Horspool search */ -static UChar* -bm_search(regex_t* reg, const UChar* target, const UChar* target_end, - const UChar* text, const UChar* text_end, const UChar* text_range) -{ - const UChar *s, *t, *p, *end; - const UChar *tail; - -# ifdef ONIG_DEBUG_SEARCH - fprintf(stderr, "bm_search: text: %"PRIuPTR" (%p), text_end: %"PRIuPTR" (%p), text_range: %"PRIuPTR" (%p)\n", - (uintptr_t )text, text, (uintptr_t )text_end, text_end, (uintptr_t )text_range, text_range); -# endif - - end = text_range + (target_end - target) - 1; - if (end > text_end) - end = text_end; - - tail = target_end - 1; - s = text + (target_end - target) - 1; - if (IS_NULL(reg->int_map)) { - while (s < end) { - p = s; - t = tail; -# ifdef ONIG_DEBUG_SEARCH - fprintf(stderr, "bm_search_loop: pos: %"PRIdPTR" %s\n", - (intptr_t )(s - text), s); -# endif - while (*p == *t) { - if (t == target) return (UChar* )p; - p--; t--; - } - s += reg->map[*s]; - } - } - else { /* see int_map[] */ -# if OPT_EXACT_MAXLEN >= ONIG_CHAR_TABLE_SIZE - while (s < end) { - p = s; - t = tail; - while (*p == *t) { - if (t == target) return (UChar* )p; - p--; t--; - } - s += reg->int_map[*s]; - } -# endif - } - return (UChar* )NULL; -} - -/* Boyer-Moore-Horspool search applied to a multibyte string (ignore case) */ -static UChar* -bm_search_notrev_ic(regex_t* reg, const UChar* target, const UChar* target_end, - const UChar* text, const UChar* text_end, - const UChar* text_range) -{ - const UChar *s, *se, *t, *end; - const UChar *tail; - ptrdiff_t skip, tlen1; - OnigEncoding enc = reg->enc; - int case_fold_flag = reg->case_fold_flag; - -# ifdef ONIG_DEBUG_SEARCH - fprintf(stderr, "bm_search_notrev_ic: text: %d (%p), text_end: %d (%p), text_range: %d (%p)\n", - (int )text, text, (int )text_end, text_end, (int )text_range, text_range); -# endif - - tail = target_end - 1; - tlen1 = tail - target; - end = text_range; - if (end + tlen1 > text_end) - end = text_end - tlen1; - - s = text; - - if (IS_NULL(reg->int_map)) { - while (s < end) { - se = s + tlen1; - if (str_lower_case_match(enc, case_fold_flag, target, target_end, - s, se + 1)) - return (UChar* )s; - skip = reg->map[*se]; - t = s; - do { - s += enclen(reg->enc, s, end); - } while ((s - t) < skip && s < end); - } - } - else { -# if OPT_EXACT_MAXLEN >= ONIG_CHAR_TABLE_SIZE - while (s < end) { - se = s + tlen1; - if (str_lower_case_match(enc, case_fold_flag, target, target_end, - s, se + 1)) - return (UChar* )s; - skip = reg->int_map[*se]; - t = s; - do { - s += enclen(reg->enc, s, end); - } while ((s - t) < skip && s < end); - } -# endif - } - - return (UChar* )NULL; -} - -/* Boyer-Moore-Horspool search (ignore case) */ -static UChar* -bm_search_ic(regex_t* reg, const UChar* target, const UChar* target_end, - const UChar* text, const UChar* text_end, const UChar* text_range) -{ - const UChar *s, *p, *end; - const UChar *tail; - OnigEncoding enc = reg->enc; - int case_fold_flag = reg->case_fold_flag; - -# ifdef ONIG_DEBUG_SEARCH - fprintf(stderr, "bm_search_ic: text: %d (%p), text_end: %d (%p), text_range: %d (%p)\n", - (int )text, text, (int )text_end, text_end, (int )text_range, text_range); -# endif - - end = text_range + (target_end - target) - 1; - if (end > text_end) - end = text_end; - - tail = target_end - 1; - s = text + (target_end - target) - 1; - if (IS_NULL(reg->int_map)) { - while (s < end) { - p = s - (target_end - target) + 1; - if (str_lower_case_match(enc, case_fold_flag, target, target_end, - p, s + 1)) - return (UChar* )p; - s += reg->map[*s]; - } - } - else { /* see int_map[] */ -# if OPT_EXACT_MAXLEN >= ONIG_CHAR_TABLE_SIZE - while (s < end) { - p = s - (target_end - target) + 1; - if (str_lower_case_match(enc, case_fold_flag, target, target_end, - p, s + 1)) - return (UChar* )p; - s += reg->int_map[*s]; - } -# endif - } - return (UChar* )NULL; -} - -#else /* USE_SUNDAY_QUICK_SEARCH */ - /* Sunday's quick search applied to a multibyte string */ static UChar* bm_search_notrev(regex_t* reg, const UChar* target, const UChar* target_end, @@ -4824,7 +4595,6 @@ bm_search_ic(regex_t* reg, const UChar* target, const UChar* target_end, } return (UChar* )NULL; } -#endif /* USE_SUNDAY_QUICK_SEARCH */ #ifdef USE_INT_MAP_BACKWARD static int diff --git a/regint.h b/regint.h index 75abfba235790c..9924e5f62ab5f3 100644 --- a/regint.h +++ b/regint.h @@ -86,7 +86,6 @@ /* #define USE_OP_PUSH_OR_JUMP_EXACT */ #define USE_QTFR_PEEK_NEXT #define USE_ST_LIBRARY -#define USE_SUNDAY_QUICK_SEARCH #define INIT_MATCH_STACK_SIZE 160 #define DEFAULT_MATCH_STACK_LIMIT_SIZE 0 /* unlimited */ diff --git a/spec/ruby/language/regexp_spec.rb b/spec/ruby/language/regexp_spec.rb index 0cd9584549b801..dbf341b19ea526 100644 --- a/spec/ruby/language/regexp_spec.rb +++ b/spec/ruby/language/regexp_spec.rb @@ -112,7 +112,7 @@ /foo.(?<=\d)/.match("fooA foo1").to_a.should == ["foo1"] end - ruby_bug "#13671", ""..."3.6" do # https://bugs.ruby-lang.org/issues/13671 + ruby_bug "#13671", ""..."3.5" do # https://bugs.ruby-lang.org/issues/13671 it "handles a lookbehind with ss characters" do r = Regexp.new("(?