12#include "ruby/internal/config.h"
19#include "internal/encoding.h"
20#include "internal/hash.h"
21#include "internal/imemo.h"
22#include "internal/re.h"
23#include "internal/string.h"
24#include "internal/object.h"
25#include "internal/ractor.h"
26#include "internal/variable.h"
34typedef char onig_errmsg_buffer[ONIG_MAX_ERROR_MESSAGE_LEN];
35#define errcpy(err, msg) strlcpy((err), (msg), ONIG_MAX_ERROR_MESSAGE_LEN)
37#define BEG(no) (regs->beg[(no)])
38#define END(no) (regs->end[(no)])
41static const char casetable[] = {
42 '\000',
'\001',
'\002',
'\003',
'\004',
'\005',
'\006',
'\007',
43 '\010',
'\011',
'\012',
'\013',
'\014',
'\015',
'\016',
'\017',
44 '\020',
'\021',
'\022',
'\023',
'\024',
'\025',
'\026',
'\027',
45 '\030',
'\031',
'\032',
'\033',
'\034',
'\035',
'\036',
'\037',
47 '\040',
'\041',
'\042',
'\043',
'\044',
'\045',
'\046',
'\047',
49 '\050',
'\051',
'\052',
'\053',
'\054',
'\055',
'\056',
'\057',
51 '\060',
'\061',
'\062',
'\063',
'\064',
'\065',
'\066',
'\067',
53 '\070',
'\071',
'\072',
'\073',
'\074',
'\075',
'\076',
'\077',
55 '\100',
'\141',
'\142',
'\143',
'\144',
'\145',
'\146',
'\147',
57 '\150',
'\151',
'\152',
'\153',
'\154',
'\155',
'\156',
'\157',
59 '\160',
'\161',
'\162',
'\163',
'\164',
'\165',
'\166',
'\167',
61 '\170',
'\171',
'\172',
'\133',
'\134',
'\135',
'\136',
'\137',
63 '\140',
'\141',
'\142',
'\143',
'\144',
'\145',
'\146',
'\147',
65 '\150',
'\151',
'\152',
'\153',
'\154',
'\155',
'\156',
'\157',
67 '\160',
'\161',
'\162',
'\163',
'\164',
'\165',
'\166',
'\167',
69 '\170',
'\171',
'\172',
'\173',
'\174',
'\175',
'\176',
'\177',
70 '\200',
'\201',
'\202',
'\203',
'\204',
'\205',
'\206',
'\207',
71 '\210',
'\211',
'\212',
'\213',
'\214',
'\215',
'\216',
'\217',
72 '\220',
'\221',
'\222',
'\223',
'\224',
'\225',
'\226',
'\227',
73 '\230',
'\231',
'\232',
'\233',
'\234',
'\235',
'\236',
'\237',
74 '\240',
'\241',
'\242',
'\243',
'\244',
'\245',
'\246',
'\247',
75 '\250',
'\251',
'\252',
'\253',
'\254',
'\255',
'\256',
'\257',
76 '\260',
'\261',
'\262',
'\263',
'\264',
'\265',
'\266',
'\267',
77 '\270',
'\271',
'\272',
'\273',
'\274',
'\275',
'\276',
'\277',
78 '\300',
'\301',
'\302',
'\303',
'\304',
'\305',
'\306',
'\307',
79 '\310',
'\311',
'\312',
'\313',
'\314',
'\315',
'\316',
'\317',
80 '\320',
'\321',
'\322',
'\323',
'\324',
'\325',
'\326',
'\327',
81 '\330',
'\331',
'\332',
'\333',
'\334',
'\335',
'\336',
'\337',
82 '\340',
'\341',
'\342',
'\343',
'\344',
'\345',
'\346',
'\347',
83 '\350',
'\351',
'\352',
'\353',
'\354',
'\355',
'\356',
'\357',
84 '\360',
'\361',
'\362',
'\363',
'\364',
'\365',
'\366',
'\367',
85 '\370',
'\371',
'\372',
'\373',
'\374',
'\375',
'\376',
'\377',
88# error >>> "You lose. You will need a translation table for your character set." <<<
92rb_hrtime_t rb_reg_match_time_limit = 0;
95rb_memcicmp(
const void *x,
const void *y,
long len)
97 const unsigned char *p1 = x, *p2 = y;
101 if ((tmp = casetable[(
unsigned)*p1++] - casetable[(
unsigned)*p2++]))
109rb_memsearch_ss(
const unsigned char *xs,
long m,
const unsigned char *ys,
long n)
111 const unsigned char *y;
113 if ((y = memmem(ys, n, xs, m)) != NULL)
120rb_memsearch_ss(
const unsigned char *xs,
long m,
const unsigned char *ys,
long n)
122 const unsigned char *x = xs, *xe = xs + m;
123 const unsigned char *y = ys, *ye = ys + n;
124#define VALUE_MAX ((VALUE)~(VALUE)0)
128 rb_bug(
"!!too long pattern string!!");
130 if (!(y = memchr(y, *x, n - m + 1)))
134 for (hx = *x++, hy = *y++; x < xe; ++x, ++y) {
154rb_memsearch_qs(
const unsigned char *xs,
long m,
const unsigned char *ys,
long n)
156 const unsigned char *x = xs, *xe = xs + m;
157 const unsigned char *y = ys;
158 VALUE i, qstable[256];
161 for (i = 0; i < 256; ++i)
164 qstable[*x] = xe - x;
166 for (; y + m <= ys + n; y += *(qstable + y[m])) {
167 if (*xs == *y && memcmp(xs, y, m) == 0)
173static inline unsigned int
174rb_memsearch_qs_utf8_hash(
const unsigned char *x)
176 register const unsigned int mix = 8353;
177 register unsigned int h = *x;
202 return (
unsigned char)h;
206rb_memsearch_qs_utf8(
const unsigned char *xs,
long m,
const unsigned char *ys,
long n)
208 const unsigned char *x = xs, *xe = xs + m;
209 const unsigned char *y = ys;
210 VALUE i, qstable[512];
213 for (i = 0; i < 512; ++i) {
216 for (; x < xe; ++x) {
217 qstable[rb_memsearch_qs_utf8_hash(x)] = xe - x;
220 for (; y + m <= ys + n; y += qstable[rb_memsearch_qs_utf8_hash(y+m)]) {
221 if (*xs == *y && memcmp(xs, y, m) == 0)
228rb_memsearch_with_char_size(
const unsigned char *xs,
long m,
const unsigned char *ys,
long n,
int char_size)
230 const unsigned char *x = xs, x0 = *xs, *y = ys;
232 for (n -= m; n >= 0; n -= char_size, y += char_size) {
233 if (x0 == *y && memcmp(x+1, y+1, m-1) == 0)
240rb_memsearch_wchar(
const unsigned char *xs,
long m,
const unsigned char *ys,
long n)
242 return rb_memsearch_with_char_size(xs, m, ys, n, 2);
246rb_memsearch_qchar(
const unsigned char *xs,
long m,
const unsigned char *ys,
long n)
248 return rb_memsearch_with_char_size(xs, m, ys, n, 4);
252rb_memsearch(
const void *x0,
long m,
const void *y0,
long n, rb_encoding *enc)
254 const unsigned char *x = x0, *y = y0;
256 if (m > n)
return -1;
258 return memcmp(x0, y0, m) == 0 ? 0 : -1;
264 const unsigned char *ys = memchr(y, *x, n);
273 return rb_memsearch_ss(x0, m, y0, n);
275 else if (enc == rb_utf8_encoding()){
276 return rb_memsearch_qs_utf8(x0, m, y0, n);
280 return rb_memsearch_wchar(x0, m, y0, n);
283 return rb_memsearch_qchar(x0, m, y0, n);
285 return rb_memsearch_qs(x0, m, y0, n);
288#define REG_ENCODING_NONE FL_USER6
290#define KCODE_FIXED FL_USER4
292#define ARG_REG_OPTION_MASK \
293 (ONIG_OPTION_IGNORECASE|ONIG_OPTION_MULTILINE|ONIG_OPTION_EXTEND)
294#define ARG_ENCODING_FIXED 16
295#define ARG_ENCODING_NONE 32
304 val = ONIG_OPTION_IGNORECASE;
307 val = ONIG_OPTION_EXTEND;
310 val = ONIG_OPTION_MULTILINE;
319enum { OPTBUF_SIZE = 4 };
322option_to_str(
char str[OPTBUF_SIZE],
int options)
325 if (options & ONIG_OPTION_MULTILINE) *p++ =
'm';
326 if (options & ONIG_OPTION_IGNORECASE) *p++ =
'i';
327 if (options & ONIG_OPTION_EXTEND) *p++ =
'x';
333rb_char_to_option_kcode(
int c,
int *option,
int *kcode)
339 *kcode = rb_ascii8bit_encindex();
340 return (*option = ARG_ENCODING_NONE);
342 *kcode = ENCINDEX_EUC_JP;
345 *kcode = ENCINDEX_Windows_31J;
348 *kcode = rb_utf8_encindex();
352 return (*option = char_to_option(c));
354 *option = ARG_ENCODING_FIXED;
359rb_reg_check(
VALUE re)
367rb_reg_expr_str(
VALUE str,
const char *s,
long len,
368 rb_encoding *enc, rb_encoding *resenc,
int term)
370 const char *p, *pend;
375 p = s; pend = p +
len;
379 c = rb_enc_ascget(p, pend, &clen, enc);
382 p += mbclen(p, pend, enc);
406 int unicode_p = rb_enc_unicode_p(enc);
409 c = rb_enc_ascget(p, pend, &clen, enc);
410 if (c ==
'\\' && p+clen < pend) {
411 int n = clen + mbclen(p+clen, pend, enc);
417 clen = rb_enc_precise_mbclen(p, pend, enc);
419 c = (
unsigned char)*p;
425 rb_str_buf_cat_escaped_char(str, c, unicode_p);
432 else if (c == term) {
440 else if (!rb_enc_isspace(c, enc)) {
444 snprintf(b,
sizeof(b),
"\\x%02X", c);
458 rb_encoding *enc = rb_enc_get(re);
460 rb_encoding *resenc = rb_default_internal_encoding();
461 if (resenc == NULL) resenc = rb_default_external_encoding();
463 if (re && rb_enc_asciicompat(enc)) {
464 rb_enc_copy(str, re);
467 rb_enc_associate(str, rb_usascii_encoding());
471 rb_reg_expr_str(str, RSTRING_PTR(src_str), RSTRING_LEN(src_str), enc, resenc,
'/');
476 char opts[OPTBUF_SIZE];
478 if (*option_to_str(opts,
RREGEXP_PTR(re)->options))
480 if (
RBASIC(re)->flags & REG_ENCODING_NONE)
506rb_reg_source(
VALUE re)
527rb_reg_inspect(
VALUE re)
532 return rb_reg_desc(re);
535static VALUE rb_reg_str_with_term(
VALUE re,
int term);
567 return rb_reg_str_with_term(re,
'/');
571rb_reg_str_with_term(
VALUE re,
int term)
574 const int embeddable = ONIG_OPTION_MULTILINE|ONIG_OPTION_IGNORECASE|ONIG_OPTION_EXTEND;
576 char optbuf[OPTBUF_SIZE + 1];
577 rb_encoding *enc = rb_enc_get(re);
581 rb_enc_copy(str, re);
584 const UChar *ptr = (UChar *)RSTRING_PTR(src_str);
585 long len = RSTRING_LEN(src_str);
587 if (
len >= 4 && ptr[0] ==
'(' && ptr[1] ==
'?') {
590 if ((
len -= 2) > 0) {
592 opt = char_to_option((
int )*ptr);
602 if (
len > 1 && *ptr ==
'-') {
606 opt = char_to_option((
int )*ptr);
621 if (*ptr ==
':' && ptr[
len-1] ==
')') {
628 err = onig_new(&rp, ptr, ptr +
len, options,
629 enc, OnigDefaultSyntax, NULL);
642 if ((options & embeddable) != embeddable) {
644 option_to_str(optbuf + 1, ~options);
649 if (rb_enc_asciicompat(enc)) {
650 rb_reg_expr_str(str, (
char*)ptr,
len, enc, NULL, term);
658 rb_enc_associate(str, rb_usascii_encoding());
662 s = RSTRING_PTR(str);
663 e = RSTRING_END(str);
668 rb_str_resize(str, RSTRING_LEN(str) - n);
670 rb_reg_expr_str(str, (
char*)ptr,
len, enc, NULL, term);
673 rb_enc_copy(str, re);
680NORETURN(
static void rb_reg_raise(
const char *err,
VALUE re));
683rb_reg_raise(
const char *err,
VALUE re)
685 VALUE desc = rb_reg_desc(re);
691rb_enc_reg_error_desc(
const char *s,
long len, rb_encoding *enc,
int options,
const char *err)
693 char opts[OPTBUF_SIZE + 1];
695 rb_encoding *resenc = rb_default_internal_encoding();
696 if (resenc == NULL) resenc = rb_default_external_encoding();
698 rb_enc_associate(desc, enc);
700 rb_reg_expr_str(desc, s,
len, enc, resenc,
'/');
702 option_to_str(opts + 1, options);
707NORETURN(
static void rb_enc_reg_raise(
const char *s,
long len, rb_encoding *enc,
int options,
const char *err));
710rb_enc_reg_raise(
const char *s,
long len, rb_encoding *enc,
int options,
const char *err)
712 rb_exc_raise(rb_enc_reg_error_desc(s,
len, enc, options, err));
716rb_reg_error_desc(
VALUE str,
int options,
const char *err)
718 return rb_enc_reg_error_desc(RSTRING_PTR(str), RSTRING_LEN(str),
719 rb_enc_get(str), options, err);
722NORETURN(
static void rb_reg_raise_str(
VALUE str,
int options,
const char *err));
725rb_reg_raise_str(
VALUE str,
int options,
const char *err)
727 rb_exc_raise(rb_reg_error_desc(str, options, err));
745rb_reg_casefold_p(
VALUE re)
748 return RBOOL(
RREGEXP_PTR(re)->options & ONIG_OPTION_IGNORECASE);
790rb_reg_options_m(
VALUE re)
797reg_names_iter(
const OnigUChar *name,
const OnigUChar *name_end,
798 int back_num,
int *back_refs, OnigRegex regex,
void *arg)
801 rb_ary_push(ary, rb_enc_str_new((
const char *)name, name_end-name, regex->enc));
819rb_reg_names(
VALUE re)
823 ary = rb_ary_new_capa(onig_number_of_names(
RREGEXP_PTR(re)));
824 onig_foreach_name(
RREGEXP_PTR(re), reg_names_iter, (
void*)ary);
829reg_named_captures_iter(
const OnigUChar *name,
const OnigUChar *name_end,
830 int back_num,
int *back_refs, OnigRegex regex,
void *arg)
836 for (i = 0; i < back_num; i++)
837 rb_ary_store(ary, i,
INT2NUM(back_refs[i]));
839 rb_hash_aset(hash,
rb_str_new((
const char*)name, name_end-name),ary);
863rb_reg_named_captures(
VALUE re)
865 regex_t *reg = (rb_reg_check(re),
RREGEXP_PTR(re));
866 VALUE hash = rb_hash_new_with_size(onig_number_of_names(reg));
867 onig_foreach_name(reg, reg_named_captures_iter, (
void*)hash);
872onig_new_with_source(regex_t** reg,
const UChar* pattern,
const UChar* pattern_end,
873 OnigOptionType option, OnigEncoding enc,
const OnigSyntaxType* syntax,
874 OnigErrorInfo* einfo,
const char *sourcefile,
int sourceline)
878 *reg = (regex_t* )malloc(
sizeof(regex_t));
879 if (IS_NULL(*reg))
return ONIGERR_MEMORY;
881 r = onig_reg_init(*reg, option, ONIGENC_CASE_FOLD_DEFAULT, enc, syntax);
884 r = onig_compile_ruby(*reg, pattern, pattern_end, einfo, sourcefile, sourceline);
894make_regexp(
const char *s,
long len, rb_encoding *enc,
int flags, onig_errmsg_buffer err,
895 const char *sourcefile,
int sourceline)
908 r = onig_new_with_source(&rp, (UChar*)s, (UChar*)(s +
len), flags,
909 enc, OnigDefaultSyntax, &einfo, sourcefile, sourceline);
911 onig_error_code_to_str((UChar*)err, r, &einfo);
970match_alloc(
VALUE klass)
972 size_t alloc_size =
sizeof(
struct RMatch) + sizeof(rb_matchext_t);
978 memset(RMATCH_EXT(match), 0,
sizeof(rb_matchext_t));
986 onig_region_copy(to, (OnigRegion *)from);
987 if (to->allocated)
return 0;
989 onig_region_copy(to, (OnigRegion *)from);
990 if (to->allocated)
return 0;
991 return ONIGERR_MEMORY;
1000pair_byte_cmp(
const void *pair1,
const void *pair2)
1002 long diff = ((
pair_t*)pair1)->byte_pos - ((
pair_t*)pair2)->byte_pos;
1003#if SIZEOF_LONG > SIZEOF_INT
1004 return diff ? diff > 0 ? 1 : -1 : 0;
1011update_char_offset(
VALUE match)
1013 rb_matchext_t *rm = RMATCH_EXT(match);
1015 int i, num_regs, num_pos;
1025 num_regs = rm->
regs.num_regs;
1032 enc = rb_enc_get(
RMATCH(match)->str);
1034 for (i = 0; i < num_regs; i++) {
1043 for (i = 0; i < num_regs; i++) {
1046 pairs[num_pos++].byte_pos = BEG(i);
1047 pairs[num_pos++].byte_pos = END(i);
1049 qsort(pairs, num_pos,
sizeof(
pair_t), pair_byte_cmp);
1051 s = p = RSTRING_PTR(
RMATCH(match)->str);
1053 for (i = 0; i < num_pos; i++) {
1054 q = s + pairs[i].byte_pos;
1056 pairs[i].char_pos = c;
1060 for (i = 0; i < num_regs; i++) {
1068 key.byte_pos = BEG(i);
1069 found = bsearch(&key, pairs, num_pos,
sizeof(
pair_t), pair_byte_cmp);
1072 key.byte_pos = END(i);
1073 found = bsearch(&key, pairs, num_pos,
sizeof(
pair_t), pair_byte_cmp);
1079match_check(
VALUE match)
1081 if (!
RMATCH(match)->regexp) {
1098 rm = RMATCH_EXT(obj);
1102 if (RMATCH_EXT(orig)->char_offset_num_allocated) {
1128match_regexp(
VALUE match)
1132 regexp =
RMATCH(match)->regexp;
1133 if (
NIL_P(regexp)) {
1163match_names(
VALUE match)
1167 return rb_ary_new_capa(0);
1168 return rb_reg_names(
RMATCH(match)->regexp);
1184match_size(
VALUE match)
1190static int name_to_backref_number(
struct re_registers *,
VALUE,
const char*,
const char*);
1191NORETURN(
static void name_to_backref_error(
VALUE name));
1194name_to_backref_error(
VALUE name)
1196 rb_raise(
rb_eIndexError,
"undefined group name reference: % "PRIsVALUE,
1203 if (i < 0 || regs->num_regs <= i)
1208match_backref_number(
VALUE match,
VALUE backref)
1220 else if (!RB_TYPE_P(backref,
T_STRING)) {
1225 num = name_to_backref_number(regs, regexp, name, name + RSTRING_LEN(backref));
1228 name_to_backref_error(backref);
1237 return match_backref_number(match, backref);
1252 int i = match_backref_number(match, n);
1256 backref_number_check(regs, i);
1261 update_char_offset(match);
1262 return rb_assoc_new(
LONG2NUM(RMATCH_EXT(match)->char_offset[i].beg),
1263 LONG2NUM(RMATCH_EXT(match)->char_offset[i].end));
1287 int i = match_backref_number(match, n);
1291 backref_number_check(regs, i);
1311 int i = match_backref_number(match, n);
1315 backref_number_check(regs, i);
1320 update_char_offset(match);
1321 return LONG2NUM(RMATCH_EXT(match)->char_offset[i].beg);
1337 int i = match_backref_number(match, n);
1341 backref_number_check(regs, i);
1346 update_char_offset(match);
1347 return LONG2NUM(RMATCH_EXT(match)->char_offset[i].end);
1379 int i = match_backref_number(match, n);
1382 backref_number_check(regs, i);
1384 long start = BEG(i), end = END(i);
1388 return rb_str_subseq(
RMATCH(match)->str, start, end - start);
1423 int i = match_backref_number(match, n);
1427 backref_number_check(regs, i);
1432 update_char_offset(match);
1434 &RMATCH_EXT(match)->char_offset[i];
1438#define MATCH_BUSY FL_USER2
1443 FL_SET(match, MATCH_BUSY);
1447rb_match_unbusy(
VALUE match)
1453rb_match_count(
VALUE match)
1456 if (
NIL_P(match))
return -1;
1458 if (!regs)
return -1;
1459 return regs->num_regs;
1466 rb_matchext_t *rmatch = RMATCH_EXT(match);
1470 int err = onig_region_resize(&rmatch->
regs, 1);
1471 if (err) rb_memerror();
1472 rmatch->
regs.beg[0] = pos;
1473 rmatch->
regs.end[0] = pos +
len;
1477rb_backref_set_string(
VALUE string,
long pos,
long len)
1483 match_set_string(match,
string, pos,
len);
1517rb_reg_fixed_encoding_p(
VALUE re)
1519 return RBOOL(
FL_TEST(re, KCODE_FIXED));
1523rb_reg_preprocess(
const char *p,
const char *end, rb_encoding *enc,
1524 rb_encoding **fixed_enc, onig_errmsg_buffer err,
int options);
1532 "incompatible encoding regexp match (%s regexp with %s string)",
1533 rb_enc_name(rb_enc_get(re)),
1534 rb_enc_name(rb_enc_get(
str)));
1542 cr = rb_enc_str_coderange(
str);
1550 rb_encoding *enc = 0;
1551 int cr = str_coderange(
str);
1554 rb_raise(rb_eArgError,
1555 "invalid byte sequence in %s",
1556 rb_enc_name(rb_enc_get(
str)));
1560 enc = rb_enc_get(
str);
1567 else if (!rb_enc_asciicompat(enc)) {
1568 reg_enc_error(re,
str);
1570 else if (rb_reg_fixed_encoding_p(re)) {
1573 reg_enc_error(re,
str);
1577 else if (warn && (
RBASIC(re)->flags & REG_ENCODING_NONE) &&
1578 enc != rb_ascii8bit_encoding() &&
1580 rb_warn(
"historical binary regexp match /.../n against %s string",
1592 rb_encoding *fixed_enc = 0;
1593 rb_encoding *enc = rb_reg_prepare_enc(re,
str, 1);
1596 if (reg->enc == enc)
return reg;
1601 const char *pattern = RSTRING_PTR(src_str);
1603 onig_errmsg_buffer err =
"";
1604 unescaped = rb_reg_preprocess(
1605 pattern, pattern + RSTRING_LEN(src_str), enc,
1606 &fixed_enc, err, 0);
1608 if (
NIL_P(unescaped)) {
1609 rb_raise(rb_eArgError,
"regexp preprocess failed: %s", err);
1613 rb_hrtime_t timelimit = reg->timelimit;
1620 if (
RREGEXP(re)->usecnt == 0) {
1622 r = onig_new_without_alloc(&tmp_reg, (UChar *)ptr, (UChar *)(ptr +
len),
1624 OnigDefaultSyntax, &einfo);
1628 onig_free_body(&tmp_reg);
1631 onig_free_body(reg);
1637 r = onig_new(®, (UChar *)ptr, (UChar *)(ptr +
len),
1639 OnigDefaultSyntax, &einfo);
1643 onig_error_code_to_str((UChar*)err, r, &einfo);
1644 rb_reg_raise(err, re);
1647 reg->timelimit = timelimit;
1662 if (!tmpreg)
RREGEXP(re)->usecnt++;
1664 OnigPosition result = match(reg,
str, regs, args);
1666 if (!tmpreg)
RREGEXP(re)->usecnt--;
1672 onig_region_free(regs, 0);
1677 case ONIGERR_TIMEOUT:
1678 rb_raise(rb_eRegexpTimeoutError,
"regexp match timeout");
1680 onig_errmsg_buffer err =
"";
1681 onig_error_code_to_str((UChar*)err, (
int)result);
1682 rb_reg_raise(err, re);
1697 enc = rb_reg_prepare_enc(re,
str, 0);
1703 range = RSTRING_LEN(
str) - pos;
1706 if (pos > 0 && ONIGENC_MBC_MAXLEN(enc) != 1 && pos < RSTRING_LEN(
str)) {
1707 string = (UChar*)RSTRING_PTR(
str);
1710 p = onigenc_get_right_adjust_char_head(enc,
string,
string + pos,
string + RSTRING_LEN(
str));
1713 p = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc,
string,
string + pos,
string + RSTRING_LEN(
str));
1727reg_onig_search(regex_t *reg,
VALUE str,
struct re_registers *regs,
void *args_ptr)
1737 (UChar *)(ptr +
len),
1738 (UChar *)(ptr + args->pos),
1739 (UChar *)(ptr + args->range),
1746rb_reg_search_set_match(
VALUE re,
VALUE str,
long pos,
int reverse,
int set_backref_str,
VALUE *set_match)
1748 long len = RSTRING_LEN(str);
1749 if (pos >
len || pos < 0) {
1756 .range = reverse ? 0 :
len,
1760 OnigPosition result =
rb_reg_onig_match(re, str, reg_onig_search, &args, ®s);
1762 if (result == ONIG_MISMATCH) {
1764 return ONIG_MISMATCH;
1768 rb_matchext_t *rm = RMATCH_EXT(match);
1771 if (set_backref_str) {
1784 if (set_match) *set_match = match;
1790rb_reg_search0(
VALUE re,
VALUE str,
long pos,
int reverse,
int set_backref_str)
1792 return rb_reg_search_set_match(re, str, pos, reverse, set_backref_str, NULL);
1798 return rb_reg_search0(re, str, pos, reverse, 1);
1811 (UChar *)(ptr +
len),
1846 if (nth >= regs->num_regs) {
1850 nth += regs->num_regs;
1851 if (nth <= 0)
return Qnil;
1853 return RBOOL(BEG(nth) != -1);
1860 long start, end,
len;
1866 if (nth >= regs->num_regs) {
1870 nth += regs->num_regs;
1871 if (nth <= 0)
return Qnil;
1874 if (start == -1)
return Qnil;
1877 str = rb_str_subseq(
RMATCH(match)->str, start,
len);
1914 if (BEG(0) == -1)
return Qnil;
1915 str = rb_str_subseq(
RMATCH(match)->str, 0, BEG(0));
1948 if (BEG(0) == -1)
return Qnil;
1949 str =
RMATCH(match)->str;
1951 str = rb_str_subseq(str, pos, RSTRING_LEN(str) - pos);
1956match_last_index(
VALUE match)
1961 if (
NIL_P(match))
return -1;
1964 if (BEG(0) == -1)
return -1;
1966 for (i=regs->num_regs-1; BEG(i) == -1 && i > 0; i--)
1974 int i = match_last_index(match);
1975 if (i <= 0)
return Qnil;
1977 return rb_str_subseq(
RMATCH(match)->str, BEG(i), END(i) - BEG(i));
1981rb_reg_last_defined(
VALUE match)
1983 int i = match_last_index(match);
1984 if (i < 0)
return Qnil;
1989last_match_getter(
ID _x,
VALUE *_y)
1995prematch_getter(
ID _x,
VALUE *_y)
2001postmatch_getter(
ID _x,
VALUE *_y)
2007last_paren_match_getter(
ID _x,
VALUE *_y)
2013match_array(
VALUE match,
int start)
2023 target =
RMATCH(match)->str;
2025 for (i=start; i<regs->num_regs; i++) {
2026 if (regs->beg[i] == -1) {
2027 rb_ary_push(ary,
Qnil);
2030 VALUE str = rb_str_subseq(target, regs->beg[i], regs->end[i]-regs->beg[i]);
2031 rb_ary_push(ary, str);
2053match_to_a(
VALUE match)
2055 return match_array(match, 0);
2075match_captures(
VALUE match)
2077 return match_array(match, 1);
2081name_to_backref_number(
struct re_registers *regs,
VALUE regexp,
const char* name,
const char* name_end)
2083 if (
NIL_P(regexp))
return -1;
2084 return onig_name_to_backref_number(
RREGEXP_PTR(regexp),
2085 (
const unsigned char *)name, (
const unsigned char *)name_end, regs);
2088#define NAME_TO_NUMBER(regs, re, name, name_ptr, name_end) \
2090 !rb_enc_compatible(RREGEXP_SRC(re), (name)) ? 0 : \
2091 name_to_backref_number((regs), (re), (name_ptr), (name_end)))
2101 else if (!RB_TYPE_P(name,
T_STRING)) {
2104 num = NAME_TO_NUMBER(regs, re, name,
2105 RSTRING_PTR(name), RSTRING_END(name));
2107 name_to_backref_error(name);
2113match_ary_subseq(
VALUE match,
long beg,
long len,
VALUE result)
2116 long j, end = olen < beg+
len ? olen : beg+
len;
2117 if (
NIL_P(result)) result = rb_ary_new_capa(
len);
2118 if (
len == 0)
return result;
2120 for (j = beg; j < end; j++) {
2123 if (beg +
len > j) {
2124 rb_ary_resize(result,
RARRAY_LEN(result) + (beg +
len) - j);
2144 return match_ary_subseq(match, beg,
len, result);
2187match_aref(
int argc,
VALUE *argv,
VALUE match)
2194 if (
NIL_P(length)) {
2199 int num = namev_to_backref_number(
RMATCH_REGS(match),
RMATCH(match)->regexp, idx);
2204 return match_ary_aref(match, idx,
Qnil);
2217 if (beg < 0)
return Qnil;
2219 else if (beg > num_regs) {
2222 if (beg+
len > num_regs) {
2223 len = num_regs - beg;
2225 return match_ary_subseq(match, beg,
len,
Qnil);
2256match_values_at(
int argc,
VALUE *argv,
VALUE match)
2264 for (i=0; i<argc; i++) {
2269 int num = namev_to_backref_number(
RMATCH_REGS(match),
RMATCH(match)->regexp, argv[i]);
2274 match_ary_aref(match, argv[i], result);
2301match_to_s(
VALUE match)
2310match_named_captures_iter(
const OnigUChar *name,
const OnigUChar *name_end,
2311 int back_num,
int *back_refs, OnigRegex regex,
void *arg)
2313 struct MEMO *memo = MEMO_CAST(arg);
2314 VALUE hash = memo->v1;
2315 VALUE match = memo->v2;
2316 long symbolize = memo->u3.state;
2318 VALUE key = rb_enc_str_new((
const char *)name, name_end-name, regex->enc);
2320 if (symbolize > 0) {
2321 key = rb_str_intern(key);
2329 for (i = 0; i < back_num; i++) {
2332 rb_hash_aset(hash, key, value);
2338 rb_hash_aset(hash, key,
Qnil);
2377match_named_captures(
int argc,
VALUE *argv,
VALUE match)
2384 return rb_hash_new();
2387 VALUE symbolize_names = 0;
2392 static ID keyword_ids[1];
2394 VALUE symbolize_names_val;
2396 if (!keyword_ids[0]) {
2399 rb_get_kwargs(opt, keyword_ids, 0, 1, &symbolize_names_val);
2400 if (!UNDEF_P(symbolize_names_val) &&
RTEST(symbolize_names_val)) {
2401 symbolize_names = 1;
2405 hash = rb_hash_new();
2406 memo = MEMO_NEW(hash, match, symbolize_names);
2408 onig_foreach_name(
RREGEXP(
RMATCH(match)->regexp)->ptr, match_named_captures_iter, (
void*)memo);
2430match_deconstruct_keys(
VALUE match,
VALUE keys)
2438 return rb_hash_new_with_size(0);
2442 h = rb_hash_new_with_size(onig_number_of_names(
RREGEXP_PTR(
RMATCH(match)->regexp)));
2445 memo = MEMO_NEW(h, match, 1);
2447 onig_foreach_name(
RREGEXP_PTR(
RMATCH(match)->regexp), match_named_captures_iter, (
void*)memo);
2455 return rb_hash_new_with_size(0);
2469 RSTRING_PTR(name), RSTRING_END(name));
2496match_string(
VALUE match)
2499 return RMATCH(match)->str;
2508match_inspect_name_iter(
const OnigUChar *name,
const OnigUChar *name_end,
2509 int back_num,
int *back_refs, OnigRegex regex,
void *arg0)
2514 for (i = 0; i < back_num; i++) {
2515 arg[back_refs[i]].name = name;
2516 arg[back_refs[i]].len = name_end - name;
2543match_inspect(
VALUE match)
2549 int num_regs = regs->num_regs;
2554 return rb_sprintf(
"#<%"PRIsVALUE
":%p>", cname, (
void*)match);
2556 else if (
NIL_P(regexp)) {
2557 return rb_sprintf(
"#<%"PRIsVALUE
": %"PRIsVALUE
">",
2565 match_inspect_name_iter, names);
2570 for (i = 0; i < num_regs; i++) {
2577 rb_str_catf(str,
"%d", i);
2595read_escaped_byte(
const char **pp,
const char *end, onig_errmsg_buffer err)
2597 const char *p = *pp;
2599 int meta_prefix = 0, ctrl_prefix = 0;
2602 if (p == end || *p++ !=
'\\') {
2603 errcpy(err,
"too short escaped multibyte character");
2609 errcpy(err,
"too short escape sequence");
2613 case '\\': code =
'\\';
break;
2614 case 'n': code =
'\n';
break;
2615 case 't': code =
'\t';
break;
2616 case 'r': code =
'\r';
break;
2617 case 'f': code =
'\f';
break;
2618 case 'v': code =
'\013';
break;
2619 case 'a': code =
'\007';
break;
2620 case 'e': code =
'\033';
break;
2623 case '0':
case '1':
case '2':
case '3':
2624 case '4':
case '5':
case '6':
case '7':
2633 errcpy(err,
"invalid hex escape");
2641 errcpy(err,
"duplicate meta escape");
2645 if (p+1 < end && *p++ ==
'-' && (*p & 0x80) == 0) {
2655 errcpy(err,
"too short meta escape");
2659 if (p == end || *p++ !=
'-') {
2660 errcpy(err,
"too short control escape");
2665 errcpy(err,
"duplicate control escape");
2669 if (p < end && (*p & 0x80) == 0) {
2679 errcpy(err,
"too short control escape");
2683 errcpy(err,
"unexpected escape sequence");
2686 if (code < 0 || 0xff < code) {
2687 errcpy(err,
"invalid escape code");
2701unescape_escaped_nonascii(
const char **pp,
const char *end, rb_encoding *enc,
2702 VALUE buf, rb_encoding **encp, onig_errmsg_buffer err)
2704 const char *p = *pp;
2706 unsigned char *area =
ALLOCA_N(
unsigned char, chmaxlen);
2707 char *chbuf = (
char *)area;
2712 memset(chbuf, 0, chmaxlen);
2714 byte = read_escaped_byte(&p, end, err);
2719 area[chlen++] = byte;
2720 while (chlen < chmaxlen &&
2722 byte = read_escaped_byte(&p, end, err);
2726 area[chlen++] = byte;
2729 l = rb_enc_precise_mbclen(chbuf, chbuf+chlen, enc);
2731 errcpy(err,
"invalid multibyte escape");
2734 if (1 < chlen || (area[0] & 0x80)) {
2739 else if (*encp != enc) {
2740 errcpy(err,
"escaped non ASCII character in UTF-8 regexp");
2746 snprintf(escbuf,
sizeof(escbuf),
"\\x%02X", area[0]&0xff);
2754check_unicode_range(
unsigned long code, onig_errmsg_buffer err)
2756 if ((0xd800 <= code && code <= 0xdfff) ||
2758 errcpy(err,
"invalid Unicode range");
2765append_utf8(
unsigned long uv,
2766 VALUE buf, rb_encoding **encp, onig_errmsg_buffer err)
2768 if (check_unicode_range(uv, err) != 0)
2772 snprintf(escbuf,
sizeof(escbuf),
"\\x%02X", (
int)uv);
2782 *encp = rb_utf8_encoding();
2783 else if (*encp != rb_utf8_encoding()) {
2784 errcpy(err,
"UTF-8 character in non UTF-8 regexp");
2792unescape_unicode_list(
const char **pp,
const char *end,
2793 VALUE buf, rb_encoding **encp, onig_errmsg_buffer err)
2795 const char *p = *pp;
2796 int has_unicode = 0;
2800 while (p < end &&
ISSPACE(*p)) p++;
2803 code = ruby_scan_hex(p, end-p, &
len);
2807 errcpy(err,
"invalid Unicode range");
2811 if (append_utf8(code, buf, encp, err) != 0)
2815 while (p < end &&
ISSPACE(*p)) p++;
2818 if (has_unicode == 0) {
2819 errcpy(err,
"invalid Unicode list");
2829unescape_unicode_bmp(
const char **pp,
const char *end,
2830 VALUE buf, rb_encoding **encp, onig_errmsg_buffer err)
2832 const char *p = *pp;
2837 errcpy(err,
"invalid Unicode escape");
2840 code = ruby_scan_hex(p, 4, &
len);
2842 errcpy(err,
"invalid Unicode escape");
2845 if (append_utf8(code, buf, encp, err) != 0)
2852unescape_nonascii0(
const char **pp,
const char *end, rb_encoding *enc,
2853 VALUE buf, rb_encoding **encp,
int *has_property,
2854 onig_errmsg_buffer err,
int options,
int recurse)
2856 const char *p = *pp;
2859 int in_char_class = 0;
2861 int extended_mode = options & ONIG_OPTION_EXTEND;
2865 int chlen = rb_enc_precise_mbclen(p, end, enc);
2868 errcpy(err,
"invalid multibyte character");
2872 if (1 < chlen || (*p & 0x80)) {
2878 else if (*encp != enc) {
2879 errcpy(err,
"non ASCII character in UTF-8 regexp");
2888 errcpy(err,
"too short escape sequence");
2891 chlen = rb_enc_precise_mbclen(p, end, enc);
2893 goto invalid_multibyte;
2902 case '1':
case '2':
case '3':
2903 case '4':
case '5':
case '6':
case '7':
2905 size_t len = end-(p-1), octlen;
2906 if (ruby_scan_oct(p-1,
len < 3 ?
len : 3, &octlen) <= 0177) {
2922 if (rb_is_usascii_enc(enc)) {
2923 const char *pbeg = p;
2924 int byte = read_escaped_byte(&p, end, err);
2925 if (
byte == -1)
return -1;
2930 if (unescape_escaped_nonascii(&p, end, enc, buf, encp, err) != 0)
2937 errcpy(err,
"too short escape sequence");
2943 if (unescape_unicode_list(&p, end, buf, encp, err) != 0)
2945 if (p == end || *p++ !=
'}') {
2946 errcpy(err,
"invalid Unicode list");
2953 if (unescape_unicode_bmp(&p, end, buf, encp, err) != 0)
2975 if (extended_mode && !in_char_class) {
2977 while ((p < end) && ((c = *p++) !=
'\n')) {
2978 if ((c & 0x80) && !*encp && enc == rb_utf8_encoding()) {
2991 if (in_char_class) {
2998 if (!in_char_class && recurse) {
2999 if (--parens == 0) {
3006 if (!in_char_class && p + 1 < end && *p ==
'?') {
3007 if (*(p+1) ==
'#') {
3009 const char *orig_p = p;
3012 while (cont && (p < end)) {
3015 if (!(c & 0x80))
break;
3016 if (!*encp && enc == rb_utf8_encoding()) {
3022 chlen = rb_enc_precise_mbclen(p, end, enc);
3024 goto invalid_multibyte;
3045 int local_extend = 0;
3052 for(s = p+1; s < end; s++) {
3055 local_extend = invert ? -1 : 1;
3062 if (local_extend == 0 ||
3063 (local_extend == -1 && !extended_mode) ||
3064 (local_extend == 1 && extended_mode)) {
3071 int local_options = options;
3072 if (local_extend == 1) {
3073 local_options |= ONIG_OPTION_EXTEND;
3076 local_options &= ~ONIG_OPTION_EXTEND;
3080 int ret = unescape_nonascii0(&p, end, enc, buf, encp,
3083 if (ret < 0)
return ret;
3088 extended_mode = local_extend == 1;
3105 else if (!in_char_class && recurse) {
3123unescape_nonascii(
const char *p,
const char *end, rb_encoding *enc,
3124 VALUE buf, rb_encoding **encp,
int *has_property,
3125 onig_errmsg_buffer err,
int options)
3127 return unescape_nonascii0(&p, end, enc, buf, encp, has_property,
3132rb_reg_preprocess(
const char *p,
const char *end, rb_encoding *enc,
3133 rb_encoding **fixed_enc, onig_errmsg_buffer err,
int options)
3136 int has_property = 0;
3138 buf = rb_str_buf_new(0);
3140 if (rb_enc_asciicompat(enc))
3144 rb_enc_associate(buf, enc);
3147 if (unescape_nonascii(p, end, enc, buf, fixed_enc, &has_property, err, options) != 0)
3150 if (has_property && !*fixed_enc) {
3155 rb_enc_associate(buf, *fixed_enc);
3162rb_reg_check_preprocess(
VALUE str)
3164 rb_encoding *fixed_enc = 0;
3165 onig_errmsg_buffer err =
"";
3171 p = RSTRING_PTR(str);
3172 end = p + RSTRING_LEN(str);
3173 enc = rb_enc_get(str);
3175 buf = rb_reg_preprocess(p, end, enc, &fixed_enc, err, 0);
3179 return rb_reg_error_desc(str, 0, err);
3185rb_reg_preprocess_dregexp(
VALUE ary,
int options)
3187 rb_encoding *fixed_enc = 0;
3188 rb_encoding *regexp_enc = 0;
3189 onig_errmsg_buffer err =
"";
3192 rb_encoding *ascii8bit = rb_ascii8bit_encoding();
3195 rb_raise(rb_eArgError,
"no arguments given");
3202 rb_encoding *src_enc;
3204 src_enc = rb_enc_get(str);
3205 if (options & ARG_ENCODING_NONE &&
3206 src_enc != ascii8bit) {
3208 rb_raise(
rb_eRegexpError,
"/.../n has a non escaped non ASCII character in non ASCII-8BIT script");
3210 src_enc = ascii8bit;
3214 p = RSTRING_PTR(str);
3215 end = p + RSTRING_LEN(str);
3217 buf = rb_reg_preprocess(p, end, src_enc, &fixed_enc, err, options);
3220 rb_raise(rb_eArgError,
"%s", err);
3222 if (fixed_enc != 0) {
3223 if (regexp_enc != 0 && regexp_enc != fixed_enc) {
3224 rb_raise(
rb_eRegexpError,
"encoding mismatch in dynamic regexp : %s and %s",
3225 rb_enc_name(regexp_enc), rb_enc_name(fixed_enc));
3227 regexp_enc = fixed_enc;
3233 rb_str_buf_append(result, str);
3236 rb_enc_associate(result, regexp_enc);
3243rb_reg_initialize_check(
VALUE obj)
3252rb_reg_initialize(
VALUE obj,
const char *s,
long len, rb_encoding *enc,
3253 int options, onig_errmsg_buffer err,
3254 const char *sourcefile,
int sourceline)
3258 rb_encoding *fixed_enc = 0;
3259 rb_encoding *a_enc = rb_ascii8bit_encoding();
3261 rb_reg_initialize_check(obj);
3263 if (rb_enc_dummy_p(enc)) {
3264 errcpy(err,
"can't make regexp with dummy encoding");
3268 unescaped = rb_reg_preprocess(s, s+
len, enc, &fixed_enc, err, options);
3269 if (
NIL_P(unescaped))
3273 if ((fixed_enc != enc && (options & ARG_ENCODING_FIXED)) ||
3274 (fixed_enc != a_enc && (options & ARG_ENCODING_NONE))) {
3275 errcpy(err,
"incompatible character encoding");
3278 if (fixed_enc != a_enc) {
3279 options |= ARG_ENCODING_FIXED;
3283 else if (!(options & ARG_ENCODING_FIXED)) {
3284 enc = rb_usascii_encoding();
3287 rb_enc_associate((
VALUE)re, enc);
3288 if ((options & ARG_ENCODING_FIXED) || fixed_enc) {
3291 if (options & ARG_ENCODING_NONE) {
3295 re->
ptr = make_regexp(RSTRING_PTR(unescaped), RSTRING_LEN(unescaped), enc,
3296 options & ARG_REG_OPTION_MASK, err,
3297 sourcefile, sourceline);
3298 if (!re->
ptr)
return -1;
3304reg_set_source(
VALUE reg,
VALUE str, rb_encoding *enc)
3306 rb_encoding *regenc = rb_enc_get(reg);
3307 if (regenc != enc) {
3308 str = rb_enc_associate(rb_str_dup(str), enc = regenc);
3314rb_reg_initialize_str(
VALUE obj,
VALUE str,
int options, onig_errmsg_buffer err,
3315 const char *sourcefile,
int sourceline)
3318 rb_encoding *str_enc = rb_enc_get(str), *enc = str_enc;
3319 if (options & ARG_ENCODING_NONE) {
3320 rb_encoding *ascii8bit = rb_ascii8bit_encoding();
3321 if (enc != ascii8bit) {
3323 errcpy(err,
"/.../n has a non escaped non ASCII character in non ASCII-8BIT script");
3329 ret = rb_reg_initialize(obj, RSTRING_PTR(str), RSTRING_LEN(str), enc,
3330 options, err, sourcefile, sourceline);
3331 if (ret == 0) reg_set_source(obj, str, str_enc);
3336rb_reg_s_alloc(
VALUE klass)
3356 return rb_reg_init_str(rb_reg_alloc(), s, options);
3360rb_reg_init_str(
VALUE re,
VALUE s,
int options)
3362 onig_errmsg_buffer err =
"";
3364 if (rb_reg_initialize_str(re, s, options, err, NULL, 0) != 0) {
3365 rb_reg_raise_str(s, options, err);
3372rb_reg_init_str_enc(
VALUE re,
VALUE s, rb_encoding *enc,
int options)
3374 onig_errmsg_buffer err =
"";
3376 if (rb_reg_initialize(re, RSTRING_PTR(s), RSTRING_LEN(s),
3377 enc, options, err, NULL, 0) != 0) {
3378 rb_reg_raise_str(s, options, err);
3380 reg_set_source(re, s, enc);
3386rb_reg_new_ary(
VALUE ary,
int opt)
3396 VALUE re = rb_reg_alloc();
3397 onig_errmsg_buffer err =
"";
3399 if (rb_reg_initialize(re, s,
len, enc, options, err, NULL, 0) != 0) {
3400 rb_enc_reg_raise(s,
len, enc, options, err);
3414rb_reg_compile(
VALUE str,
int options,
const char *sourcefile,
int sourceline)
3416 VALUE re = rb_reg_alloc();
3417 onig_errmsg_buffer err =
"";
3420 if (rb_reg_initialize_str(re, str, options, err, sourcefile, sourceline) != 0) {
3421 rb_set_errinfo(rb_reg_error_desc(str, options, err));
3428static VALUE reg_cache;
3435 && memcmp(
RREGEXP_SRC_PTR(reg_cache), RSTRING_PTR(str), RSTRING_LEN(str)) == 0)
3441static st_index_t reg_hash(
VALUE re);
3453rb_reg_hash(
VALUE re)
3455 st_index_t hashval = reg_hash(re);
3488 if (re1 == re2)
return Qtrue;
3490 rb_reg_check(re1); rb_reg_check(re2);
3510match_hash(
VALUE match)
3517 hashval =
rb_hash_uint(hashval, reg_hash(match_regexp(match)));
3540 if (match1 == match2)
return Qtrue;
3544 if (!rb_reg_equal(match_regexp(match1), match_regexp(match2)))
return Qfalse;
3547 if (regs1->num_regs != regs2->num_regs)
return Qfalse;
3548 if (memcmp(regs1->beg, regs2->beg, regs1->num_regs *
sizeof(*regs1->beg)))
return Qfalse;
3549 if (memcmp(regs1->end, regs2->end, regs1->num_regs *
sizeof(*regs1->end)))
return Qfalse;
3554reg_operand(
VALUE s,
int check)
3576 *strp = str = reg_operand(str, TRUE);
3587 return rb_reg_search_set_match(re, str, pos, 0, 1, set_match);
3649 long pos = reg_match_pos(re, &str, 0, NULL);
3650 if (pos < 0)
return Qnil;
3680 str = reg_operand(str, FALSE);
3686 return RBOOL(start >= 0);
3763rb_reg_match_m(
int argc,
VALUE *argv,
VALUE re)
3768 if (
rb_scan_args(argc, argv,
"11", &str, &initpos) == 2) {
3775 pos = reg_match_pos(re, &str, pos, &result);
3804rb_reg_match_m_p(
int argc,
VALUE *argv,
VALUE re)
3807 return rb_reg_match_p(re, argv[0], pos);
3818 if (pos < 0)
return Qfalse;
3824 pos = beg - RSTRING_PTR(str);
3830 .range = RSTRING_LEN(str),
3843str_to_option(
VALUE str)
3849 if (
NIL_P(str))
return -1;
3851 for (
long i = 0; i <
len; ++i) {
3852 int f = char_to_option(ptr[i]);
3854 rb_raise(rb_eArgError,
"unknown regexp option: %"PRIsVALUE, str);
3862set_timeout(rb_hrtime_t *hrt,
VALUE timeout)
3864 double timeout_d =
NIL_P(timeout) ? 0.0 :
NUM2DBL(timeout);
3865 if (!
NIL_P(timeout) && timeout_d <= 0) {
3866 rb_raise(rb_eArgError,
"invalid timeout: %"PRIsVALUE, timeout);
3868 double2hrtime(hrt, timeout_d);
3877 rb_reg_initialize_check(copy);
3878 if ((r = onig_reg_copy(&re,
RREGEXP_PTR(orig))) != 0) {
3885 rb_enc_copy(copy, orig);
3900void rb_warn_deprecated_to_remove(
const char *removal,
const char *fmt,
const char *suggest, ...);
3957rb_reg_initialize_m(
int argc,
VALUE *argv,
VALUE self)
3960 VALUE re = reg_extract_args(argc, argv, &args);
3969 set_timeout(&
RREGEXP_PTR(self)->timelimit, args.timeout);
3978 rb_encoding *enc = 0;
3984 args->timeout =
Qnil;
3985 if (!
NIL_P(kwargs)) {
3986 static ID keywords[1];
4007 else if ((f = str_to_option(opts)) >= 0) flags = f;
4008 else if (rb_bool_expected(opts,
"ignorecase", FALSE))
4009 flags = ONIG_OPTION_IGNORECASE;
4015 args->flags = flags;
4022 if (enc && rb_enc_get(str) != enc)
4023 rb_reg_init_str_enc(self, str, enc, flags);
4025 rb_reg_init_str(self, str, flags);
4032 rb_encoding *enc = rb_enc_get(str);
4038 s = RSTRING_PTR(str);
4039 send = s + RSTRING_LEN(str);
4041 c = rb_enc_ascget(s, send, &clen, enc);
4043 s += mbclen(s, send, enc);
4047 case '[':
case ']':
case '{':
case '}':
4048 case '(':
case ')':
case '|':
case '-':
4049 case '*':
case '.':
case '\\':
4050 case '?':
case '+':
case '^':
case '$':
4052 case '\t':
case '\f':
case '\v':
case '\n':
case '\r':
4059 rb_enc_associate(tmp, rb_usascii_encoding());
4066 rb_enc_associate(tmp, rb_usascii_encoding());
4069 rb_enc_copy(tmp, str);
4071 t = RSTRING_PTR(tmp);
4073 const char *p = RSTRING_PTR(str);
4074 memcpy(t, p, s - p);
4078 c = rb_enc_ascget(s, send, &clen, enc);
4080 int n = mbclen(s, send, enc);
4088 case '[':
case ']':
case '{':
case '}':
4089 case '(':
case ')':
case '|':
case '-':
4090 case '*':
case '.':
case '\\':
4091 case '?':
case '+':
case '^':
case '$':
4093 t += rb_enc_mbcput(
'\\', t, enc);
4096 t += rb_enc_mbcput(
'\\', t, enc);
4097 t += rb_enc_mbcput(
' ', t, enc);
4100 t += rb_enc_mbcput(
'\\', t, enc);
4101 t += rb_enc_mbcput(
't', t, enc);
4104 t += rb_enc_mbcput(
'\\', t, enc);
4105 t += rb_enc_mbcput(
'n', t, enc);
4108 t += rb_enc_mbcput(
'\\', t, enc);
4109 t += rb_enc_mbcput(
'r', t, enc);
4112 t += rb_enc_mbcput(
'\\', t, enc);
4113 t += rb_enc_mbcput(
'f', t, enc);
4116 t += rb_enc_mbcput(
'\\', t, enc);
4117 t += rb_enc_mbcput(
'v', t, enc);
4120 t += rb_enc_mbcput(c, t, enc);
4122 rb_str_resize(tmp, t - RSTRING_PTR(tmp));
4155 options =
RREGEXP_PTR(re)->options & ARG_REG_OPTION_MASK;
4156 if (
RBASIC(re)->flags & KCODE_FIXED) options |= ARG_ENCODING_FIXED;
4157 if (
RBASIC(re)->flags & REG_ENCODING_NONE) options |= ARG_ENCODING_NONE;
4162rb_check_regexp_type(
VALUE re)
4188 return rb_check_regexp_type(re);
4201 else if (argc == 1) {
4202 VALUE arg = rb_ary_entry(args0, 0);
4203 VALUE re = rb_check_regexp_type(arg);
4208 quoted = rb_reg_s_quote(
Qnil, arg);
4214 VALUE source = rb_str_buf_new(0);
4215 rb_encoding *result_enc;
4217 int has_asciionly = 0;
4218 rb_encoding *has_ascii_compat_fixed = 0;
4219 rb_encoding *has_ascii_incompat = 0;
4221 for (i = 0; i < argc; i++) {
4223 VALUE e = rb_ary_entry(args0, i);
4228 v = rb_check_regexp_type(e);
4230 rb_encoding *enc = rb_enc_get(v);
4231 if (!rb_enc_asciicompat(enc)) {
4232 if (!has_ascii_incompat)
4233 has_ascii_incompat = enc;
4234 else if (has_ascii_incompat != enc)
4235 rb_raise(rb_eArgError,
"incompatible encodings: %s and %s",
4236 rb_enc_name(has_ascii_incompat), rb_enc_name(enc));
4238 else if (rb_reg_fixed_encoding_p(v)) {
4239 if (!has_ascii_compat_fixed)
4240 has_ascii_compat_fixed = enc;
4241 else if (has_ascii_compat_fixed != enc)
4242 rb_raise(rb_eArgError,
"incompatible encodings: %s and %s",
4243 rb_enc_name(has_ascii_compat_fixed), rb_enc_name(enc));
4248 v = rb_reg_str_with_term(v, -1);
4253 enc = rb_enc_get(e);
4254 if (!rb_enc_asciicompat(enc)) {
4255 if (!has_ascii_incompat)
4256 has_ascii_incompat = enc;
4257 else if (has_ascii_incompat != enc)
4258 rb_raise(rb_eArgError,
"incompatible encodings: %s and %s",
4259 rb_enc_name(has_ascii_incompat), rb_enc_name(enc));
4265 if (!has_ascii_compat_fixed)
4266 has_ascii_compat_fixed = enc;
4267 else if (has_ascii_compat_fixed != enc)
4268 rb_raise(rb_eArgError,
"incompatible encodings: %s and %s",
4269 rb_enc_name(has_ascii_compat_fixed), rb_enc_name(enc));
4271 v = rb_reg_s_quote(
Qnil, e);
4273 if (has_ascii_incompat) {
4274 if (has_asciionly) {
4275 rb_raise(rb_eArgError,
"ASCII incompatible encoding: %s",
4276 rb_enc_name(has_ascii_incompat));
4278 if (has_ascii_compat_fixed) {
4279 rb_raise(rb_eArgError,
"incompatible encodings: %s and %s",
4280 rb_enc_name(has_ascii_incompat), rb_enc_name(has_ascii_compat_fixed));
4285 rb_enc_copy(source, v);
4290 if (has_ascii_incompat) {
4291 result_enc = has_ascii_incompat;
4293 else if (has_ascii_compat_fixed) {
4294 result_enc = has_ascii_compat_fixed;
4297 result_enc = rb_ascii8bit_encoding();
4300 rb_enc_associate(source, result_enc);
4344 !
NIL_P(v = rb_check_array_type(rb_ary_entry(args, 0)))) {
4345 return rb_reg_s_union(self, v);
4347 return rb_reg_s_union(self, args);
4372rb_reg_s_linear_time_p(
int argc,
VALUE *argv,
VALUE self)
4375 VALUE re = reg_extract_args(argc, argv, &args);
4378 re =
reg_init_args(rb_reg_alloc(), args.str, args.enc, args.flags);
4381 return RBOOL(onig_check_linear_time(
RREGEXP_PTR(re)));
4390 return reg_copy(copy, re);
4399 rb_encoding *str_enc = rb_enc_get(str);
4400 rb_encoding *src_enc = rb_enc_get(src);
4401 int acompat = rb_enc_asciicompat(str_enc);
4403#define ASCGET(s,e,cl) (acompat ? (*(cl)=1,ISASCII((s)[0])?(s)[0]:-1) : rb_enc_ascget((s), (e), (cl), str_enc))
4410 int c = ASCGET(s, e, &clen);
4414 s += mbclen(s, e, str_enc);
4420 if (c !=
'\\' || s == e)
continue;
4423 val = rb_str_buf_new(ss-p);
4425 rb_enc_str_buf_cat(val, p, ss-p, str_enc);
4427 c = ASCGET(s, e, &clen);
4429 s += mbclen(s, e, str_enc);
4430 rb_enc_str_buf_cat(val, ss, s-ss, str_enc);
4438 case '1':
case '2':
case '3':
case '4':
4439 case '5':
case '6':
case '7':
case '8':
case '9':
4440 if (!
NIL_P(regexp) && onig_noname_group_capture_is_active(
RREGEXP_PTR(regexp))) {
4449 if (s < e && ASCGET(s, e, &clen) ==
'<') {
4450 char *name, *name_end;
4452 name_end = name = s + clen;
4453 while (name_end < e) {
4454 c = ASCGET(name_end, e, &clen);
4455 if (c ==
'>')
break;
4456 name_end += c == -1 ? mbclen(name_end, e, str_enc) : clen;
4459 VALUE n = rb_str_subseq(str, (
long)(name - RSTRING_PTR(str)),
4460 (
long)(name_end - name));
4461 if ((no = NAME_TO_NUMBER(regs, regexp, n, name, name_end)) < 1) {
4462 name_to_backref_error(n);
4464 p = s = name_end + clen;
4472 rb_enc_str_buf_cat(val, ss, s-ss, str_enc);
4481 rb_enc_str_buf_cat(val, RSTRING_PTR(src), BEG(0), src_enc);
4485 rb_enc_str_buf_cat(val, RSTRING_PTR(src)+END(0), RSTRING_LEN(src)-END(0), src_enc);
4489 no = regs->num_regs-1;
4490 while (BEG(no) == -1 && no > 0) no--;
4491 if (no == 0)
continue;
4495 rb_enc_str_buf_cat(val, s-clen, clen, str_enc);
4499 rb_enc_str_buf_cat(val, ss, s-ss, str_enc);
4504 if (no >= regs->num_regs)
continue;
4505 if (BEG(no) == -1)
continue;
4506 rb_enc_str_buf_cat(val, RSTRING_PTR(src)+BEG(no), END(no)-BEG(no), src_enc);
4510 if (!val)
return str;
4512 rb_enc_str_buf_cat(val, p, e-p, str_enc);
4519ignorecase_getter(
ID _x,
VALUE *_y)
4542get_LAST_MATCH_INFO(
ID _x,
VALUE *_y)
4544 return match_getter();
4595rb_reg_s_last_match(
int argc,
VALUE *argv,
VALUE _)
4601 n = match_backref_number(match, argv[0]);
4604 return match_getter();
4608re_warn(
const char *s)
4615rb_reg_timeout_p(regex_t *reg,
void *end_time_)
4617 rb_hrtime_t *end_time = (rb_hrtime_t *)end_time_;
4619 if (*end_time == 0) {
4623 rb_hrtime_t timelimit = reg->timelimit;
4627 timelimit = rb_reg_match_time_limit;
4631 *end_time = rb_hrtime_add(timelimit, rb_hrtime_now());
4635 *end_time = RB_HRTIME_MAX;
4639 if (*end_time < rb_hrtime_now()) {
4657rb_reg_s_timeout_get(
VALUE dummy)
4659 double d = hrtime2double(rb_reg_match_time_limit);
4660 if (d == 0.0)
return Qnil;
4678rb_reg_s_timeout_set(
VALUE dummy,
VALUE timeout)
4680 rb_ractor_ensure_main_ractor(
"can not access Regexp.timeout from non-main Ractors");
4682 set_timeout(&rb_reg_match_time_limit, timeout);
4703rb_reg_timeout_get(
VALUE re)
4706 double d = hrtime2double(
RREGEXP_PTR(re)->timelimit);
4707 if (d == 0.0)
return Qnil;
4734 onigenc_set_default_encoding(ONIG_ENCODING_ASCII);
4735 onig_set_warn_func(re_warn);
4736 onig_set_verb_warn_func(re_warn);
4744 rb_gvar_ractor_local(
"$~");
4745 rb_gvar_ractor_local(
"$&");
4746 rb_gvar_ractor_local(
"$`");
4747 rb_gvar_ractor_local(
"$'");
4748 rb_gvar_ractor_local(
"$+");
#define rb_define_method(klass, mid, func, arity)
Defines klass#mid.
#define rb_define_singleton_method(klass, mid, func, arity)
Defines klass.mid.
static bool rb_enc_isprint(OnigCodePoint c, rb_encoding *enc)
Identical to rb_isprint(), except it additionally takes an encoding.
VALUE rb_define_class(const char *name, VALUE super)
Defines a top-level class.
VALUE rb_define_class_under(VALUE outer, const char *name, VALUE super)
Defines a class under the namespace of outer.
void rb_define_alias(VALUE klass, const char *name1, const char *name2)
Defines an alias of a method.
void rb_undef_method(VALUE klass, const char *name)
Defines an undef of a method.
int rb_scan_args(int argc, const VALUE *argv, const char *fmt,...)
Retrieves argument from argc and argv to given VALUE references according to the format string.
int rb_block_given_p(void)
Determines if the current method is given a block.
int rb_get_kwargs(VALUE keyword_hash, const ID *table, int required, int optional, VALUE *values)
Keyword argument deconstructor.
#define rb_str_new2
Old name of rb_str_new_cstr.
#define NEWOBJ_OF
Old name of RB_NEWOBJ_OF.
#define ENC_CODERANGE_7BIT
Old name of RUBY_ENC_CODERANGE_7BIT.
struct re_pattern_buffer Regexp
Old name of re_pattern_buffer.
#define rb_str_buf_cat2
Old name of rb_usascii_str_new_cstr.
#define REALLOC_N
Old name of RB_REALLOC_N.
#define OBJ_INIT_COPY(obj, orig)
Old name of RB_OBJ_INIT_COPY.
#define ISSPACE
Old name of rb_isspace.
#define T_STRING
Old name of RUBY_T_STRING.
#define ENC_CODERANGE_CLEAN_P(cr)
Old name of RB_ENC_CODERANGE_CLEAN_P.
#define Qundef
Old name of RUBY_Qundef.
#define INT2FIX
Old name of RB_INT2FIX.
#define rb_str_buf_new2
Old name of rb_str_buf_new_cstr.
#define ENC_CODERANGE(obj)
Old name of RB_ENC_CODERANGE.
#define CLASS_OF
Old name of rb_class_of.
#define ENC_CODERANGE_UNKNOWN
Old name of RUBY_ENC_CODERANGE_UNKNOWN.
#define ENCODING_GET(obj)
Old name of RB_ENCODING_GET.
#define LONG2FIX
Old name of RB_INT2FIX.
#define FIX2INT
Old name of RB_FIX2INT.
#define NUM2DBL
Old name of rb_num2dbl.
#define rb_str_new3
Old name of rb_str_new_shared.
#define MBCLEN_CHARFOUND_LEN(ret)
Old name of ONIGENC_MBCLEN_CHARFOUND_LEN.
#define FL_TEST_RAW
Old name of RB_FL_TEST_RAW.
#define FL_SET
Old name of RB_FL_SET.
#define LONG2NUM
Old name of RB_LONG2NUM.
#define rb_exc_new3
Old name of rb_exc_new_str.
#define MBCLEN_INVALID_P(ret)
Old name of ONIGENC_MBCLEN_INVALID_P.
#define Qtrue
Old name of RUBY_Qtrue.
#define ST2FIX
Old name of RB_ST2FIX.
#define MBCLEN_NEEDMORE_P(ret)
Old name of ONIGENC_MBCLEN_NEEDMORE_P.
#define NUM2INT
Old name of RB_NUM2INT.
#define INT2NUM
Old name of RB_INT2NUM.
#define Qnil
Old name of RUBY_Qnil.
#define Qfalse
Old name of RUBY_Qfalse.
#define ENC_CODERANGE_BROKEN
Old name of RUBY_ENC_CODERANGE_BROKEN.
#define T_ARRAY
Old name of RUBY_T_ARRAY.
#define scan_hex(s, l, e)
Old name of ruby_scan_hex.
#define NIL_P
Old name of RB_NIL_P.
#define MBCLEN_CHARFOUND_P(ret)
Old name of ONIGENC_MBCLEN_CHARFOUND_P.
#define FL_WB_PROTECTED
Old name of RUBY_FL_WB_PROTECTED.
#define T_SYMBOL
Old name of RUBY_T_SYMBOL.
#define DBL2NUM
Old name of rb_float_new.
#define T_MATCH
Old name of RUBY_T_MATCH.
#define FL_TEST
Old name of RB_FL_TEST.
#define NUM2LONG
Old name of RB_NUM2LONG.
#define FL_UNSET
Old name of RB_FL_UNSET.
#define FIXNUM_P
Old name of RB_FIXNUM_P.
#define scan_oct(s, l, e)
Old name of ruby_scan_oct.
#define rb_ary_new2
Old name of rb_ary_new_capa.
#define FL_SET_RAW
Old name of RB_FL_SET_RAW.
#define rb_str_new4
Old name of rb_str_new_frozen.
#define SYMBOL_P
Old name of RB_SYMBOL_P.
#define T_REGEXP
Old name of RUBY_T_REGEXP.
void rb_category_warn(rb_warning_category_t category, const char *fmt,...)
Identical to rb_category_warning(), except it reports unless $VERBOSE is nil.
VALUE rb_eStandardError
StandardError exception.
VALUE rb_eRegexpError
RegexpError exception.
#define ruby_verbose
This variable controls whether the interpreter is in debug mode.
VALUE rb_eTypeError
TypeError exception.
VALUE rb_eEncCompatError
Encoding::CompatibilityError exception.
VALUE rb_eRuntimeError
RuntimeError exception.
void rb_warn(const char *fmt,...)
Identical to rb_warning(), except it reports unless $VERBOSE is nil.
VALUE rb_eIndexError
IndexError exception.
@ RB_WARN_CATEGORY_DEPRECATED
Warning is for deprecated features.
VALUE rb_check_convert_type(VALUE val, int type, const char *name, const char *mid)
Identical to rb_convert_type(), except it returns RUBY_Qnil instead of raising exceptions,...
VALUE rb_any_to_s(VALUE obj)
Generates a textual representation of the given object.
VALUE rb_class_new_instance(int argc, const VALUE *argv, VALUE klass)
Allocates, then initialises an instance of the given class.
VALUE rb_cMatch
MatchData class.
VALUE rb_class_new_instance_pass_kw(int argc, const VALUE *argv, VALUE klass)
Identical to rb_class_new_instance(), except it passes the passed keywords if any to the #initialize ...
VALUE rb_cRegexp
Regexp class.
VALUE rb_obj_class(VALUE obj)
Queries the class of an object.
#define RB_OBJ_WRITE(old, slot, young)
Declaration of a "back" pointer.
static char * rb_enc_left_char_head(const char *s, const char *p, const char *e, rb_encoding *enc)
Queries the left boundary of a character.
static int rb_enc_mbmaxlen(rb_encoding *enc)
Queries the maximum number of bytes that the passed encoding needs to represent a character.
static OnigCodePoint rb_enc_mbc_to_codepoint(const char *p, const char *e, rb_encoding *enc)
Identical to rb_enc_codepoint(), except it assumes the passed character is not broken.
static int rb_enc_mbminlen(rb_encoding *enc)
Queries the minimum number of bytes that the passed encoding needs to represent a character.
VALUE rb_enc_reg_new(const char *ptr, long len, rb_encoding *enc, int opts)
Identical to rb_reg_new(), except it additionally takes an encoding.
long rb_memsearch(const void *x, long m, const void *y, long n, rb_encoding *enc)
Looks for the passed string in the passed buffer.
long rb_enc_strlen(const char *head, const char *tail, rb_encoding *enc)
Counts the number of characters of the passed string, according to the passed encoding.
int rb_enc_str_asciionly_p(VALUE str)
Queries if the passed string is "ASCII only".
long rb_str_coderange_scan_restartable(const char *str, const char *end, rb_encoding *enc, int *cr)
Scans the passed string until it finds something odd.
VALUE rb_str_encode(VALUE str, VALUE to, int ecflags, VALUE ecopts)
Converts the contents of the passed string from its encoding to the passed one.
#define RGENGC_WB_PROTECTED_MATCH
This is a compile-time flag to enable/disable write barrier for struct RMatch.
#define RGENGC_WB_PROTECTED_REGEXP
This is a compile-time flag to enable/disable write barrier for struct RRegexp.
int rb_uv_to_utf8(char buf[6], unsigned long uv)
Encodes a Unicode codepoint into its UTF-8 representation.
#define rb_check_frozen
Just another name of rb_check_frozen.
static int rb_check_arity(int argc, int min, int max)
Ensures that the passed integer is in the passed range.
VALUE rb_backref_get(void)
Queries the last match, or Regexp.last_match, or the $~.
VALUE rb_lastline_get(void)
Queries the last line, or the $_.
void rb_backref_set(VALUE md)
Updates $~.
VALUE rb_range_beg_len(VALUE range, long *begp, long *lenp, long len, int err)
Deconstructs a numerical range.
int rb_reg_backref_number(VALUE match, VALUE backref)
Queries the index of the given named capture.
int rb_reg_options(VALUE re)
Queries the options of the passed regular expression.
VALUE rb_reg_last_match(VALUE md)
This just returns the argument, stringified.
VALUE rb_reg_match(VALUE re, VALUE str)
This is the match operator.
void rb_match_busy(VALUE md)
Asserts that the given MatchData is "occupied".
VALUE rb_reg_nth_match(int n, VALUE md)
Queries the nth captured substring.
VALUE rb_reg_match_post(VALUE md)
The portion of the original string after the given match.
VALUE rb_reg_nth_defined(int n, VALUE md)
Identical to rb_reg_nth_match(), except it just returns Boolean.
VALUE rb_reg_match_pre(VALUE md)
The portion of the original string before the given match.
VALUE rb_reg_new_str(VALUE src, int opts)
Identical to rb_reg_new(), except it takes the expression in Ruby's string instead of C's.
VALUE rb_reg_match_last(VALUE md)
The portion of the original string that captured at the very last.
VALUE rb_reg_match2(VALUE re)
Identical to rb_reg_match(), except it matches against rb_lastline_get() (or, the $_).
VALUE rb_reg_new(const char *src, long len, int opts)
Creates a new Regular expression.
#define rb_hash_uint(h, i)
Just another name of st_hash_uint.
#define rb_hash_end(h)
Just another name of st_hash_end.
VALUE rb_str_append(VALUE dst, VALUE src)
Identical to rb_str_buf_append(), except it converts the right hand side before concatenating.
long rb_str_offset(VALUE str, long pos)
"Inverse" of rb_str_sublen().
st_index_t rb_memhash(const void *ptr, long len)
This is a universal hash function.
#define rb_str_new(str, len)
Allocates an instance of rb_cString.
#define rb_str_buf_cat
Just another name of rb_str_cat.
st_index_t rb_str_hash(VALUE str)
Calculates a hash value of a string.
char * rb_str_subpos(VALUE str, long beg, long *len)
Identical to rb_str_substr(), except it returns a C's string instead of Ruby's.
long rb_str_sublen(VALUE str, long pos)
Byte offset to character offset conversion.
VALUE rb_str_equal(VALUE str1, VALUE str2)
Equality of two strings.
st_index_t rb_hash_start(st_index_t i)
Starts a series of hashing.
VALUE rb_str_inspect(VALUE str)
Generates a "readable" version of the receiver.
VALUE rb_str_buf_cat_ascii(VALUE dst, const char *src)
Identical to rb_str_cat_cstr(), except it additionally assumes the source string be a NUL terminated ...
VALUE rb_check_string_type(VALUE obj)
Try converting an object to its stringised representation using its to_str method,...
VALUE rb_str_length(VALUE)
Identical to rb_str_strlen(), except it returns the value in rb_cInteger.
VALUE rb_class_path(VALUE mod)
Identical to rb_mod_name(), except it returns #<Class: ...> style inspection for anonymous modules.
void rb_define_alloc_func(VALUE klass, rb_alloc_func_t func)
Sets the allocator function of a class.
static ID rb_intern_const(const char *str)
This is a "tiny optimisation" over rb_intern().
VALUE rb_sym2str(VALUE id)
Identical to rb_id2str(), except it takes an instance of rb_cSymbol rather than an ID.
void rb_define_const(VALUE klass, const char *name, VALUE val)
Defines a Ruby level constant under a namespace.
int len
Length of the buffer.
long rb_reg_search(VALUE re, VALUE str, long pos, int dir)
Runs the passed regular expression over the passed string.
regex_t * rb_reg_prepare_re(VALUE re, VALUE str)
Exercises various checks and preprocesses so that the given regular expression can be applied to the ...
long rb_reg_adjust_startpos(VALUE re, VALUE str, long pos, int dir)
Tell us if this is a wrong idea, but it seems this function has no usage at all.
OnigPosition rb_reg_onig_match(VALUE re, VALUE str, OnigPosition(*match)(regex_t *reg, VALUE str, struct re_registers *regs, void *args), void *args, struct re_registers *regs)
Runs a regular expression match using function match.
VALUE rb_reg_regcomp(VALUE str)
Creates a new instance of rb_cRegexp.
VALUE rb_reg_quote(VALUE str)
Escapes any characters that would have special meaning in a regular expression.
VALUE rb_reg_regsub(VALUE repl, VALUE src, struct re_registers *regs, VALUE rexp)
Substitution.
int rb_reg_region_copy(struct re_registers *dst, const struct re_registers *src)
Duplicates a match data.
VALUE rb_yield(VALUE val)
Yields the block.
#define MEMCPY(p1, p2, type, n)
Handy macro to call memcpy.
#define ALLOCA_N(type, n)
#define MEMZERO(p, type, n)
Handy macro to erase a region of memory.
#define RB_GC_GUARD(v)
Prevents premature destruction of local objects.
void rb_define_virtual_variable(const char *q, type *w, void_type *e)
Define a function-backended global variable.
#define RARRAY_LEN
Just another name of rb_array_len.
#define RARRAY_AREF(a, i)
#define RBASIC(obj)
Convenient casting macro.
#define RMATCH(obj)
Convenient casting macro.
static struct re_registers * RMATCH_REGS(VALUE match)
Queries the raw re_registers.
#define RREGEXP(obj)
Convenient casting macro.
static VALUE RREGEXP_SRC(VALUE rexp)
Convenient getter function.
#define RREGEXP_PTR(obj)
Convenient accessor macro.
static long RREGEXP_SRC_LEN(VALUE rexp)
Convenient getter function.
static char * RREGEXP_SRC_PTR(VALUE rexp)
Convenient getter function.
#define StringValue(v)
Ensures that the parameter object is a String.
#define RSTRING_GETMEM(str, ptrvar, lenvar)
Convenient macro to obtain the contents and length at once.
VALUE rb_str_to_str(VALUE obj)
Identical to rb_check_string_type(), except it raises exceptions in case of conversion failures.
#define StringValueCStr(v)
Identical to StringValuePtr, except it additionally checks for the contents for viability as a C stri...
#define RTEST
This is an old name of RB_TEST.
#define _(args)
This was a transition path from K&R to ANSI.
VALUE flags
Per-object flags.
Regular expression execution context.
VALUE regexp
The expression of this match.
VALUE str
The target string that the match was made against.
Ruby's regular expression.
struct RBasic basic
Basic part, including flags and class.
const VALUE src
Source code of this expression.
unsigned long usecnt
Reference count.
struct re_pattern_buffer * ptr
The pattern buffer.
struct rmatch_offset * char_offset
Capture group offsets, in C array.
int char_offset_num_allocated
Number of rmatch_offset that ::rmatch::char_offset holds.
struct re_registers regs
"Registers" of a match.
Represents the region of a capture group.
long beg
Beginning of a group.
uintptr_t ID
Type that represents a Ruby identifier such as a variable name.
#define SIZEOF_VALUE
Identical to sizeof(VALUE), except it is a macro that can also be used inside of preprocessor directi...
uintptr_t VALUE
Type that represents a Ruby object.
static void Check_Type(VALUE v, enum ruby_value_type t)
Identical to RB_TYPE_P(), except it raises exceptions on predication failure.