Ruby 3.3.7p123 (2025-01-15 revision be31f993d7fa0219d85f7b3c694d454da4ecc10b)
pack.c
1#include "prism/pack.h"
2
3#include <stdbool.h>
4#include <errno.h>
5
6static uintmax_t
7strtoumaxc(const char **format);
8
10pm_pack_parse(pm_pack_variant variant, const char **format, const char *format_end,
11 pm_pack_type *type, pm_pack_signed *signed_type, pm_pack_endian *endian, pm_pack_size *size,
12 pm_pack_length_type *length_type, uint64_t *length, pm_pack_encoding *encoding) {
13
14 if (*encoding == PM_PACK_ENCODING_START) {
15 *encoding = PM_PACK_ENCODING_US_ASCII;
16 }
17
18 if (*format == format_end) {
19 *type = PM_PACK_END;
20 *signed_type = PM_PACK_SIGNED_NA;
21 *endian = PM_PACK_ENDIAN_NA;
22 *size = PM_PACK_SIZE_NA;
23 *length_type = PM_PACK_LENGTH_NA;
24 return PM_PACK_OK;
25 }
26
27 *length_type = PM_PACK_LENGTH_FIXED;
28 *length = 1;
29 bool length_changed_allowed = true;
30
31 char directive = **format;
32 (*format)++;
33 switch (directive) {
34 case ' ':
35 case '\t':
36 case '\n':
37 case '\v':
38 case '\f':
39 case '\r':
40 *type = PM_PACK_SPACE;
41 *signed_type = PM_PACK_SIGNED_NA;
42 *endian = PM_PACK_ENDIAN_NA;
43 *size = PM_PACK_SIZE_NA;
44 *length_type = PM_PACK_LENGTH_NA;
45 *length = 0;
46 return PM_PACK_OK;
47 case '#':
48 while ((*format < format_end) && (**format != '\n')) {
49 (*format)++;
50 }
51 *type = PM_PACK_COMMENT;
52 *signed_type = PM_PACK_SIGNED_NA;
53 *endian = PM_PACK_ENDIAN_NA;
54 *size = PM_PACK_SIZE_NA;
55 *length_type = PM_PACK_LENGTH_NA;
56 *length = 0;
57 return PM_PACK_OK;
58 case 'C':
59 *type = PM_PACK_INTEGER;
60 *signed_type = PM_PACK_UNSIGNED;
61 *endian = PM_PACK_AGNOSTIC_ENDIAN;
62 *size = PM_PACK_SIZE_8;
63 break;
64 case 'S':
65 *type = PM_PACK_INTEGER;
66 *signed_type = PM_PACK_UNSIGNED;
67 *endian = PM_PACK_NATIVE_ENDIAN;
68 *size = PM_PACK_SIZE_16;
69 break;
70 case 'L':
71 *type = PM_PACK_INTEGER;
72 *signed_type = PM_PACK_UNSIGNED;
73 *endian = PM_PACK_NATIVE_ENDIAN;
74 *size = PM_PACK_SIZE_32;
75 break;
76 case 'Q':
77 *type = PM_PACK_INTEGER;
78 *signed_type = PM_PACK_UNSIGNED;
79 *endian = PM_PACK_NATIVE_ENDIAN;
80 *size = PM_PACK_SIZE_64;
81 break;
82 case 'J':
83 *type = PM_PACK_INTEGER;
84 *signed_type = PM_PACK_UNSIGNED;
85 *endian = PM_PACK_NATIVE_ENDIAN;
86 *size = PM_PACK_SIZE_P;
87 break;
88 case 'c':
89 *type = PM_PACK_INTEGER;
90 *signed_type = PM_PACK_SIGNED;
91 *endian = PM_PACK_AGNOSTIC_ENDIAN;
92 *size = PM_PACK_SIZE_8;
93 break;
94 case 's':
95 *type = PM_PACK_INTEGER;
96 *signed_type = PM_PACK_SIGNED;
97 *endian = PM_PACK_NATIVE_ENDIAN;
98 *size = PM_PACK_SIZE_16;
99 break;
100 case 'l':
101 *type = PM_PACK_INTEGER;
102 *signed_type = PM_PACK_SIGNED;
103 *endian = PM_PACK_NATIVE_ENDIAN;
104 *size = PM_PACK_SIZE_32;
105 break;
106 case 'q':
107 *type = PM_PACK_INTEGER;
108 *signed_type = PM_PACK_SIGNED;
109 *endian = PM_PACK_NATIVE_ENDIAN;
110 *size = PM_PACK_SIZE_64;
111 break;
112 case 'j':
113 *type = PM_PACK_INTEGER;
114 *signed_type = PM_PACK_SIGNED;
115 *endian = PM_PACK_NATIVE_ENDIAN;
116 *size = PM_PACK_SIZE_P;
117 break;
118 case 'I':
119 *type = PM_PACK_INTEGER;
120 *signed_type = PM_PACK_UNSIGNED;
121 *endian = PM_PACK_NATIVE_ENDIAN;
122 *size = PM_PACK_SIZE_INT;
123 break;
124 case 'i':
125 *type = PM_PACK_INTEGER;
126 *signed_type = PM_PACK_SIGNED;
127 *endian = PM_PACK_NATIVE_ENDIAN;
128 *size = PM_PACK_SIZE_INT;
129 break;
130 case 'n':
131 *type = PM_PACK_INTEGER;
132 *signed_type = PM_PACK_UNSIGNED;
133 *endian = PM_PACK_BIG_ENDIAN;
134 *size = PM_PACK_SIZE_16;
135 length_changed_allowed = false;
136 break;
137 case 'N':
138 *type = PM_PACK_INTEGER;
139 *signed_type = PM_PACK_UNSIGNED;
140 *endian = PM_PACK_BIG_ENDIAN;
141 *size = PM_PACK_SIZE_32;
142 length_changed_allowed = false;
143 break;
144 case 'v':
145 *type = PM_PACK_INTEGER;
146 *signed_type = PM_PACK_UNSIGNED;
147 *endian = PM_PACK_LITTLE_ENDIAN;
148 *size = PM_PACK_SIZE_16;
149 length_changed_allowed = false;
150 break;
151 case 'V':
152 *type = PM_PACK_INTEGER;
153 *signed_type = PM_PACK_UNSIGNED;
154 *endian = PM_PACK_LITTLE_ENDIAN;
155 *size = PM_PACK_SIZE_32;
156 length_changed_allowed = false;
157 break;
158 case 'U':
159 *type = PM_PACK_UTF8;
160 *signed_type = PM_PACK_SIGNED_NA;
161 *endian = PM_PACK_ENDIAN_NA;
162 *size = PM_PACK_SIZE_NA;
163 break;
164 case 'w':
165 *type = PM_PACK_BER;
166 *signed_type = PM_PACK_SIGNED_NA;
167 *endian = PM_PACK_ENDIAN_NA;
168 *size = PM_PACK_SIZE_NA;
169 break;
170 case 'D':
171 case 'd':
172 *type = PM_PACK_FLOAT;
173 *signed_type = PM_PACK_SIGNED_NA;
174 *endian = PM_PACK_NATIVE_ENDIAN;
175 *size = PM_PACK_SIZE_64;
176 break;
177 case 'F':
178 case 'f':
179 *type = PM_PACK_FLOAT;
180 *signed_type = PM_PACK_SIGNED_NA;
181 *endian = PM_PACK_NATIVE_ENDIAN;
182 *size = PM_PACK_SIZE_32;
183 break;
184 case 'E':
185 *type = PM_PACK_FLOAT;
186 *signed_type = PM_PACK_SIGNED_NA;
187 *endian = PM_PACK_LITTLE_ENDIAN;
188 *size = PM_PACK_SIZE_64;
189 break;
190 case 'e':
191 *type = PM_PACK_FLOAT;
192 *signed_type = PM_PACK_SIGNED_NA;
193 *endian = PM_PACK_LITTLE_ENDIAN;
194 *size = PM_PACK_SIZE_32;
195 break;
196 case 'G':
197 *type = PM_PACK_FLOAT;
198 *signed_type = PM_PACK_SIGNED_NA;
199 *endian = PM_PACK_BIG_ENDIAN;
200 *size = PM_PACK_SIZE_64;
201 break;
202 case 'g':
203 *type = PM_PACK_FLOAT;
204 *signed_type = PM_PACK_SIGNED_NA;
205 *endian = PM_PACK_BIG_ENDIAN;
206 *size = PM_PACK_SIZE_32;
207 break;
208 case 'A':
209 *type = PM_PACK_STRING_SPACE_PADDED;
210 *signed_type = PM_PACK_SIGNED_NA;
211 *endian = PM_PACK_ENDIAN_NA;
212 *size = PM_PACK_SIZE_NA;
213 break;
214 case 'a':
215 *type = PM_PACK_STRING_NULL_PADDED;
216 *signed_type = PM_PACK_SIGNED_NA;
217 *endian = PM_PACK_ENDIAN_NA;
218 *size = PM_PACK_SIZE_NA;
219 break;
220 case 'Z':
221 *type = PM_PACK_STRING_NULL_TERMINATED;
222 *signed_type = PM_PACK_SIGNED_NA;
223 *endian = PM_PACK_ENDIAN_NA;
224 *size = PM_PACK_SIZE_NA;
225 break;
226 case 'B':
227 *type = PM_PACK_STRING_MSB;
228 *signed_type = PM_PACK_SIGNED_NA;
229 *endian = PM_PACK_ENDIAN_NA;
230 *size = PM_PACK_SIZE_NA;
231 break;
232 case 'b':
233 *type = PM_PACK_STRING_LSB;
234 *signed_type = PM_PACK_SIGNED_NA;
235 *endian = PM_PACK_ENDIAN_NA;
236 *size = PM_PACK_SIZE_NA;
237 break;
238 case 'H':
239 *type = PM_PACK_STRING_HEX_HIGH;
240 *signed_type = PM_PACK_SIGNED_NA;
241 *endian = PM_PACK_ENDIAN_NA;
242 *size = PM_PACK_SIZE_NA;
243 break;
244 case 'h':
245 *type = PM_PACK_STRING_HEX_LOW;
246 *signed_type = PM_PACK_SIGNED_NA;
247 *endian = PM_PACK_ENDIAN_NA;
248 *size = PM_PACK_SIZE_NA;
249 break;
250 case 'u':
251 *type = PM_PACK_STRING_UU;
252 *signed_type = PM_PACK_SIGNED_NA;
253 *endian = PM_PACK_ENDIAN_NA;
254 *size = PM_PACK_SIZE_NA;
255 break;
256 case 'M':
257 *type = PM_PACK_STRING_MIME;
258 *signed_type = PM_PACK_SIGNED_NA;
259 *endian = PM_PACK_ENDIAN_NA;
260 *size = PM_PACK_SIZE_NA;
261 break;
262 case 'm':
263 *type = PM_PACK_STRING_BASE64;
264 *signed_type = PM_PACK_SIGNED_NA;
265 *endian = PM_PACK_ENDIAN_NA;
266 *size = PM_PACK_SIZE_NA;
267 break;
268 case 'P':
269 *type = PM_PACK_STRING_FIXED;
270 *signed_type = PM_PACK_SIGNED_NA;
271 *endian = PM_PACK_ENDIAN_NA;
272 *size = PM_PACK_SIZE_NA;
273 break;
274 case 'p':
275 *type = PM_PACK_STRING_POINTER;
276 *signed_type = PM_PACK_SIGNED_NA;
277 *endian = PM_PACK_ENDIAN_NA;
278 *size = PM_PACK_SIZE_NA;
279 break;
280 case '@':
281 *type = PM_PACK_MOVE;
282 *signed_type = PM_PACK_SIGNED_NA;
283 *endian = PM_PACK_ENDIAN_NA;
284 *size = PM_PACK_SIZE_NA;
285 break;
286 case 'X':
287 *type = PM_PACK_BACK;
288 *signed_type = PM_PACK_SIGNED_NA;
289 *endian = PM_PACK_ENDIAN_NA;
290 *size = PM_PACK_SIZE_NA;
291 break;
292 case 'x':
293 *type = PM_PACK_NULL;
294 *signed_type = PM_PACK_SIGNED_NA;
295 *endian = PM_PACK_ENDIAN_NA;
296 *size = PM_PACK_SIZE_NA;
297 break;
298 case '%':
299 return PM_PACK_ERROR_UNSUPPORTED_DIRECTIVE;
300 default:
301 return PM_PACK_ERROR_UNKNOWN_DIRECTIVE;
302 }
303
304 bool explicit_endian = false;
305
306 while (*format < format_end) {
307 switch (**format) {
308 case '_':
309 case '!':
310 (*format)++;
311 if (*type != PM_PACK_INTEGER || !length_changed_allowed) {
312 return PM_PACK_ERROR_BANG_NOT_ALLOWED;
313 }
314 switch (*size) {
315 case PM_PACK_SIZE_SHORT:
316 case PM_PACK_SIZE_INT:
317 case PM_PACK_SIZE_LONG:
318 case PM_PACK_SIZE_LONG_LONG:
319 break;
320 case PM_PACK_SIZE_16:
321 *size = PM_PACK_SIZE_SHORT;
322 break;
323 case PM_PACK_SIZE_32:
324 *size = PM_PACK_SIZE_LONG;
325 break;
326 case PM_PACK_SIZE_64:
327 *size = PM_PACK_SIZE_LONG_LONG;
328 break;
329 case PM_PACK_SIZE_P:
330 break;
331 default:
332 return PM_PACK_ERROR_BANG_NOT_ALLOWED;
333 }
334 break;
335 case '<':
336 (*format)++;
337 if (explicit_endian) {
338 return PM_PACK_ERROR_DOUBLE_ENDIAN;
339 }
340 *endian = PM_PACK_LITTLE_ENDIAN;
341 explicit_endian = true;
342 break;
343 case '>':
344 (*format)++;
345 if (explicit_endian) {
346 return PM_PACK_ERROR_DOUBLE_ENDIAN;
347 }
348 *endian = PM_PACK_BIG_ENDIAN;
349 explicit_endian = true;
350 break;
351 default:
352 goto exit_modifier_loop;
353 }
354 }
355
356exit_modifier_loop:
357
358 if (variant == PM_PACK_VARIANT_UNPACK && *type == PM_PACK_MOVE) {
359 *length = 0;
360 }
361
362 if (*format < format_end) {
363 if (**format == '*') {
364 switch (*type) {
365 case PM_PACK_NULL:
366 case PM_PACK_BACK:
367 switch (variant) {
368 case PM_PACK_VARIANT_PACK:
369 *length_type = PM_PACK_LENGTH_FIXED;
370 break;
371 case PM_PACK_VARIANT_UNPACK:
372 *length_type = PM_PACK_LENGTH_MAX;
373 break;
374 }
375 *length = 0;
376 break;
377
378 case PM_PACK_MOVE:
379 switch (variant) {
380 case PM_PACK_VARIANT_PACK:
381 *length_type = PM_PACK_LENGTH_FIXED;
382 break;
383 case PM_PACK_VARIANT_UNPACK:
384 *length_type = PM_PACK_LENGTH_RELATIVE;
385 break;
386 }
387 *length = 0;
388 break;
389
390 case PM_PACK_STRING_UU:
391 *length_type = PM_PACK_LENGTH_FIXED;
392 *length = 0;
393 break;
394
395 case PM_PACK_STRING_FIXED:
396 switch (variant) {
397 case PM_PACK_VARIANT_PACK:
398 *length_type = PM_PACK_LENGTH_FIXED;
399 *length = 1;
400 break;
401 case PM_PACK_VARIANT_UNPACK:
402 *length_type = PM_PACK_LENGTH_MAX;
403 *length = 0;
404 break;
405 }
406 break;
407
408 case PM_PACK_STRING_MIME:
409 case PM_PACK_STRING_BASE64:
410 *length_type = PM_PACK_LENGTH_FIXED;
411 *length = 1;
412 break;
413
414 default:
415 *length_type = PM_PACK_LENGTH_MAX;
416 *length = 0;
417 break;
418 }
419
420 (*format)++;
421 } else if (**format >= '0' && **format <= '9') {
422 errno = 0;
423 *length_type = PM_PACK_LENGTH_FIXED;
424 #if UINTMAX_MAX < UINT64_MAX
425 #error "prism's design assumes uintmax_t is at least as large as uint64_t"
426 #endif
427 uintmax_t length_max = strtoumaxc(format);
428 if (errno || length_max > UINT64_MAX) {
429 return PM_PACK_ERROR_LENGTH_TOO_BIG;
430 }
431 *length = (uint64_t) length_max;
432 }
433 }
434
435 switch (*type) {
436 case PM_PACK_UTF8:
437 /* if encoding is US-ASCII, upgrade to UTF-8 */
438 if (*encoding == PM_PACK_ENCODING_US_ASCII) {
439 *encoding = PM_PACK_ENCODING_UTF_8;
440 }
441 break;
442 case PM_PACK_STRING_MIME:
443 case PM_PACK_STRING_BASE64:
444 case PM_PACK_STRING_UU:
445 /* keep US-ASCII (do nothing) */
446 break;
447 default:
448 /* fall back to BINARY */
449 *encoding = PM_PACK_ENCODING_ASCII_8BIT;
450 break;
451 }
452
453 return PM_PACK_OK;
454}
455
457pm_size_to_native(pm_pack_size size) {
458 switch (size) {
459 case PM_PACK_SIZE_SHORT:
460 return sizeof(short);
461 case PM_PACK_SIZE_INT:
462 return sizeof(int);
463 case PM_PACK_SIZE_LONG:
464 return sizeof(long);
465 case PM_PACK_SIZE_LONG_LONG:
466 return sizeof(long long);
467 case PM_PACK_SIZE_8:
468 return 1;
469 case PM_PACK_SIZE_16:
470 return 2;
471 case PM_PACK_SIZE_32:
472 return 4;
473 case PM_PACK_SIZE_64:
474 return 8;
475 case PM_PACK_SIZE_P:
476 return sizeof(void *);
477 default:
478 return 0;
479 }
480}
481
482static uintmax_t
483strtoumaxc(const char **format) {
484 uintmax_t value = 0;
485 while (**format >= '0' && **format <= '9') {
486 if (value > UINTMAX_MAX / 10) {
487 errno = ERANGE;
488 }
489 value = value * 10 + ((uintmax_t) (**format - '0'));
490 (*format)++;
491 }
492 return value;
493}
A pack template string parser.
pm_pack_encoding
The type of encoding for a pack template string.
Definition pack.h:90
pm_pack_result
The result of parsing a pack template.
Definition pack.h:98
pm_pack_variant
The type of pack template we are parsing.
Definition pack.h:20
pm_pack_endian
The endianness of a pack directive.
Definition pack.h:59
pm_pack_signed
The signness of a pack directive.
Definition pack.h:52
pm_pack_size
The size of an integer pack directive.
Definition pack.h:68
pm_pack_length_type
The type of length of a pack directive.
Definition pack.h:82
pm_pack_type
A directive within the pack template.
Definition pack.h:26
#define PRISM_EXPORTED_FUNCTION
By default, we compile with -fvisibility=hidden.
Definition defines.h:32
#define errno
Ractor-aware version of errno.
Definition ruby.h:388
C99 shim for <stdbool.h>