/src/varnish-cache/bin/varnishd/cache/cache_esi_parse.c
Line | Count | Source (jump to first uncovered line) |
1 | | /*- |
2 | | * Copyright (c) 2011 Varnish Software AS |
3 | | * All rights reserved. |
4 | | * |
5 | | * Author: Poul-Henning Kamp <phk@phk.freebsd.dk> |
6 | | * |
7 | | * SPDX-License-Identifier: BSD-2-Clause |
8 | | * |
9 | | * Redistribution and use in source and binary forms, with or without |
10 | | * modification, are permitted provided that the following conditions |
11 | | * are met: |
12 | | * 1. Redistributions of source code must retain the above copyright |
13 | | * notice, this list of conditions and the following disclaimer. |
14 | | * 2. Redistributions in binary form must reproduce the above copyright |
15 | | * notice, this list of conditions and the following disclaimer in the |
16 | | * documentation and/or other materials provided with the distribution. |
17 | | * |
18 | | * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND |
19 | | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
20 | | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
21 | | * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE |
22 | | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
23 | | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS |
24 | | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
25 | | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT |
26 | | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY |
27 | | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF |
28 | | * SUCH DAMAGE. |
29 | | * |
30 | | * VEP Varnish Esi Parsing |
31 | | */ |
32 | | |
33 | | #include "config.h" |
34 | | |
35 | | #include "cache_varnishd.h" |
36 | | #include "cache_filter.h" |
37 | | |
38 | | #include "cache_vgz.h" |
39 | | #include "cache_esi.h" |
40 | | #include "vct.h" |
41 | | #include "vend.h" |
42 | | #include "vgz.h" |
43 | | |
44 | | //#define Debug(fmt, ...) printf(fmt, __VA_ARGS__) |
45 | | #define Debug(fmt, ...) /**/ |
46 | | |
47 | | struct vep_state; |
48 | | |
49 | | enum dowhat {DO_ATTR, DO_TAG}; |
50 | | typedef void dostuff_f(struct vep_state *, enum dowhat); |
51 | | |
52 | | struct vep_match { |
53 | | const char *match; |
54 | | const char * const *state; |
55 | | }; |
56 | | |
57 | | enum vep_mark { VERBATIM = 0, SKIP }; |
58 | | |
59 | | struct vep_state { |
60 | | unsigned magic; |
61 | | #define VEP_MAGIC 0x55cb9b82 |
62 | | struct vsb *vsb; |
63 | | |
64 | | const char *url; |
65 | | struct vfp_ctx *vc; |
66 | | int dogzip; |
67 | | vep_callback_t *cb; |
68 | | void *cb_priv; |
69 | | |
70 | | /* Internal Counter for default call-back function */ |
71 | | ssize_t cb_x; |
72 | | |
73 | | /* parser state */ |
74 | | const char *state; |
75 | | unsigned startup; |
76 | | unsigned esi_found; |
77 | | |
78 | | unsigned endtag; |
79 | | unsigned emptytag; |
80 | | unsigned canattr; |
81 | | |
82 | | unsigned remove; |
83 | | |
84 | | ssize_t o_wait; |
85 | | ssize_t o_pending; |
86 | | ssize_t o_total; |
87 | | uint32_t crc; |
88 | | ssize_t o_crc; |
89 | | uint32_t crcp; |
90 | | ssize_t o_last; |
91 | | |
92 | | const char *hack_p; |
93 | | const char *ver_p; |
94 | | |
95 | | const char *until; |
96 | | const char *until_p; |
97 | | const char *until_s; |
98 | | |
99 | | int in_esi_tag; |
100 | | |
101 | | const char *esicmt; |
102 | | const char *esicmt_p; |
103 | | |
104 | | struct vep_match *attr; |
105 | | struct vsb *attr_vsb; |
106 | | int attr_delim; |
107 | | |
108 | | struct vep_match *match; |
109 | | struct vep_match *match_hit; |
110 | | |
111 | | char tag[8]; |
112 | | int tag_i; |
113 | | |
114 | | dostuff_f *dostuff; |
115 | | |
116 | | struct vsb *include_src; |
117 | | unsigned include_continue; |
118 | | |
119 | | unsigned nm_skip; |
120 | | unsigned nm_verbatim; |
121 | | unsigned nm_pending; |
122 | | enum vep_mark last_mark; |
123 | | }; |
124 | | |
125 | | /*---------------------------------------------------------------------*/ |
126 | | |
127 | | static const char * const VEP_START = "[Start]"; |
128 | | static const char * const VEP_BOM = "[BOM]"; |
129 | | static const char * const VEP_TESTXML = "[TestXml]"; |
130 | | static const char * const VEP_NOTXML = "[NotXml]"; |
131 | | |
132 | | static const char * const VEP_NEXTTAG = "[NxtTag]"; |
133 | | static const char * const VEP_NOTMYTAG = "[NotMyTag]"; |
134 | | |
135 | | static const char * const VEP_STARTTAG = "[StartTag]"; |
136 | | static const char * const VEP_COMMENTESI = "[CommentESI]"; |
137 | | static const char * const VEP_COMMENT = "[Comment]"; |
138 | | static const char * const VEP_CDATA = "[CDATA]"; |
139 | | static const char * const VEP_ESITAG = "[ESITag]"; |
140 | | static const char * const VEP_ESIENDTAG = "[/ESITag]"; |
141 | | |
142 | | static const char * const VEP_ESIREMOVE = "[ESI:Remove]"; |
143 | | static const char * const VEP_ESIINCLUDE = "[ESI:Include]"; |
144 | | static const char * const VEP_ESICOMMENT = "[ESI:Comment]"; |
145 | | static const char * const VEP_ESIBOGON = "[ESI:Bogon]"; |
146 | | |
147 | | static const char * const VEP_INTAG = "[InTag]"; |
148 | | static const char * const VEP_TAGERROR = "[TagError]"; |
149 | | |
150 | | static const char * const VEP_ATTR = "[Attribute]"; |
151 | | static const char * const VEP_SKIPATTR = "[SkipAttribute]"; |
152 | | static const char * const VEP_ATTRDELIM = "[AttrDelim]"; |
153 | | static const char * const VEP_ATTRGETVAL = "[AttrGetValue]"; |
154 | | static const char * const VEP_ATTRVAL = "[AttrValue]"; |
155 | | |
156 | | static const char * const VEP_UNTIL = "[Until]"; |
157 | | static const char * const VEP_MATCHBUF = "[MatchBuf]"; |
158 | | static const char * const VEP_MATCH = "[Match]"; |
159 | | |
160 | | /*---------------------------------------------------------------------*/ |
161 | | |
162 | | static struct vep_match vep_match_starttag[] = { |
163 | | { "!--esi", &VEP_COMMENTESI }, |
164 | | { "!---->", &VEP_NEXTTAG }, |
165 | | { "!--", &VEP_COMMENT }, |
166 | | { "/esi:", &VEP_ESIENDTAG }, |
167 | | { "esi:", &VEP_ESITAG }, |
168 | | { "![CDATA[", &VEP_CDATA }, |
169 | | { NULL, &VEP_NOTMYTAG } |
170 | | }; |
171 | | |
172 | | /*---------------------------------------------------------------------*/ |
173 | | |
174 | | static struct vep_match vep_match_esi[] = { |
175 | | { "include", &VEP_ESIINCLUDE }, |
176 | | { "remove", &VEP_ESIREMOVE }, |
177 | | { "comment", &VEP_ESICOMMENT }, |
178 | | { NULL, &VEP_ESIBOGON } |
179 | | }; |
180 | | |
181 | | /*---------------------------------------------------------------------*/ |
182 | | |
183 | | static struct vep_match vep_match_attr_include[] = { |
184 | | { "src=", &VEP_ATTRGETVAL }, |
185 | | { "onerror=", &VEP_ATTRGETVAL }, |
186 | | { NULL, &VEP_SKIPATTR } |
187 | | }; |
188 | | |
189 | | /*---------------------------------------------------------------------*/ |
190 | | |
191 | | static struct vep_match vep_match_bom[] = { |
192 | | { "\xeb\xbb\xbf", &VEP_START }, |
193 | | { NULL, &VEP_BOM } |
194 | | }; |
195 | | |
196 | | /*-------------------------------------------------------------------- |
197 | | * Report a parsing error |
198 | | */ |
199 | | |
200 | | static void |
201 | | vep_error(const struct vep_state *vep, const char *p) |
202 | 1.49k | { |
203 | 1.49k | VSC_C_main->esi_errors++; |
204 | 1.49k | VSLb(vep->vc->wrk->vsl, SLT_ESI_xmlerror, "ERR after %zd %s", |
205 | 1.49k | vep->o_last, p); |
206 | 1.49k | } |
207 | | |
208 | | /*-------------------------------------------------------------------- |
209 | | * Report a parsing warning |
210 | | */ |
211 | | |
212 | | static void |
213 | | vep_warn(const struct vep_state *vep, const char *p) |
214 | 0 | { |
215 | 0 | VSC_C_main->esi_warnings++; |
216 | 0 | VSLb(vep->vc->wrk->vsl, SLT_ESI_xmlerror, "WARN after %zd %s", |
217 | 0 | vep->o_last, p); |
218 | 0 | } |
219 | | |
220 | | /*--------------------------------------------------------------------- |
221 | | * return match or NULL if more input needed. |
222 | | */ |
223 | | |
224 | | static struct vep_match * |
225 | | vep_match(const struct vep_state *vep, const char *b, const char *e) |
226 | 693k | { |
227 | 693k | struct vep_match *vm; |
228 | 693k | const char *q, *r; |
229 | | |
230 | 693k | AN(vep->match); |
231 | 1.05M | for (vm = vep->match; vm->match != NULL; vm++) { |
232 | 998k | assert(strlen(vm->match) <= sizeof (vep->tag)); |
233 | 998k | r = b; |
234 | 4.84M | for (q = vm->match; *q != '\0' && r < e; q++, r++) |
235 | 4.20M | if (*q != *r) |
236 | 363k | break; |
237 | 998k | if (*q == '\0') |
238 | 634k | break; |
239 | 363k | if (r == e) |
240 | 39 | return (NULL); |
241 | 363k | } |
242 | 693k | return (vm); |
243 | 693k | } |
244 | | |
245 | | /*--------------------------------------------------------------------- |
246 | | * |
247 | | */ |
248 | | |
249 | | static void |
250 | | vep_emit_len(const struct vep_state *vep, ssize_t l, int m8, int m16, int m64) |
251 | 2.49M | { |
252 | 2.49M | uint8_t buf[9]; |
253 | | |
254 | 2.49M | assert(l > 0); |
255 | 2.49M | if (l < 256) { |
256 | 2.49M | buf[0] = (uint8_t)m8; |
257 | 2.49M | buf[1] = (uint8_t)l; |
258 | 2.49M | assert((ssize_t)buf[1] == l); |
259 | 2.49M | VSB_bcat(vep->vsb, buf, 2); |
260 | 2.49M | } else if (l < 65536) { |
261 | 632 | buf[0] = (uint8_t)m16; |
262 | 632 | vbe16enc(buf + 1, (uint16_t)l); |
263 | 632 | assert((ssize_t)vbe16dec(buf + 1) == l); |
264 | 632 | VSB_bcat(vep->vsb, buf, 3); |
265 | 632 | } else { |
266 | 103 | buf[0] = (uint8_t)m64; |
267 | 103 | vbe64enc(buf + 1, l); |
268 | 103 | assert((ssize_t)vbe64dec(buf + 1) == l); |
269 | 103 | VSB_bcat(vep->vsb, buf, 9); |
270 | 103 | } |
271 | 2.49M | } |
272 | | |
273 | | static void |
274 | | vep_emit_skip(const struct vep_state *vep, ssize_t l) |
275 | 1.25M | { |
276 | | |
277 | 1.25M | vep_emit_len(vep, l, VEC_S1, VEC_S2, VEC_S8); |
278 | 1.25M | } |
279 | | |
280 | | static void |
281 | | vep_emit_verbatim(const struct vep_state *vep, ssize_t l, ssize_t l_crc) |
282 | 1.24M | { |
283 | 1.24M | uint8_t buf[4]; |
284 | | |
285 | 1.24M | vep_emit_len(vep, l, VEC_V1, VEC_V2, VEC_V8); |
286 | 1.24M | if (vep->dogzip) { |
287 | 0 | vep_emit_len(vep, l_crc, VEC_C1, VEC_C2, VEC_C8); |
288 | 0 | vbe32enc(buf, vep->crc); |
289 | 0 | VSB_bcat(vep->vsb, buf, sizeof buf); |
290 | 0 | } |
291 | 1.24M | } |
292 | | |
293 | | static void |
294 | | vep_emit_common(struct vep_state *vep, ssize_t l, enum vep_mark mark) |
295 | 2.49M | { |
296 | | |
297 | 2.49M | assert(l >= 0); |
298 | 2.49M | if (l == 0) |
299 | 768 | return; |
300 | 2.49M | assert(mark == SKIP || mark == VERBATIM); |
301 | 2.49M | if (mark == SKIP) |
302 | 1.25M | vep_emit_skip(vep, l); |
303 | 1.24M | else |
304 | 1.24M | vep_emit_verbatim(vep, l, vep->o_crc); |
305 | | |
306 | 2.49M | vep->crc = crc32(0L, Z_NULL, 0); |
307 | 2.49M | vep->o_crc = 0; |
308 | 2.49M | vep->o_total += l; |
309 | 2.49M | } |
310 | | |
311 | | /*--------------------------------------------------------------------- |
312 | | * |
313 | | */ |
314 | | |
315 | | static void |
316 | | vep_mark_common(struct vep_state *vep, const char *p, enum vep_mark mark) |
317 | 2.57M | { |
318 | 2.57M | ssize_t l, lcb; |
319 | | |
320 | 2.57M | assert(mark == SKIP || mark == VERBATIM); |
321 | | |
322 | | /* The NO-OP case, no data, no pending data & no change of mode */ |
323 | 2.57M | if (vep->last_mark == mark && p == vep->ver_p && vep->o_pending == 0) |
324 | 0 | return; |
325 | | |
326 | | /* |
327 | | * If we changed mode, emit whatever the opposite mode |
328 | | * assembled before the pending bytes. |
329 | | */ |
330 | | |
331 | 2.57M | if (vep->last_mark != mark && (vep->o_wait > 0 || vep->startup)) { |
332 | 2.49M | lcb = vep->cb(vep->vc, vep->cb_priv, 0, |
333 | 2.49M | mark == VERBATIM ? VGZ_RESET : VGZ_ALIGN); |
334 | 2.49M | vep_emit_common(vep, lcb - vep->o_last, vep->last_mark); |
335 | 2.49M | vep->o_last = lcb; |
336 | 2.49M | vep->o_wait = 0; |
337 | 2.49M | } |
338 | | |
339 | | /* Transfer pending bytes CRC into active mode CRC */ |
340 | 2.57M | if (vep->o_pending) { |
341 | 247 | (void)vep->cb(vep->vc, vep->cb_priv, vep->o_pending, |
342 | 247 | VGZ_NORMAL); |
343 | 247 | if (vep->o_crc == 0) { |
344 | 64 | vep->crc = vep->crcp; |
345 | 64 | vep->o_crc = vep->o_pending; |
346 | 183 | } else { |
347 | 183 | vep->crc = crc32_combine(vep->crc, |
348 | 183 | vep->crcp, vep->o_pending); |
349 | 183 | vep->o_crc += vep->o_pending; |
350 | 183 | } |
351 | 247 | vep->crcp = crc32(0L, Z_NULL, 0); |
352 | 247 | vep->o_wait += vep->o_pending; |
353 | 247 | vep->o_pending = 0; |
354 | 247 | } |
355 | | |
356 | | /* * Process this bit of input */ |
357 | 2.57M | AN(vep->ver_p); |
358 | 2.57M | l = p - vep->ver_p; |
359 | 2.57M | assert(l >= 0); |
360 | 2.57M | vep->crc = crc32(vep->crc, (const void*)vep->ver_p, l); |
361 | 2.57M | vep->o_crc += l; |
362 | 2.57M | vep->ver_p = p; |
363 | | |
364 | 2.57M | vep->o_wait += l; |
365 | 2.57M | vep->last_mark = mark; |
366 | 2.57M | (void)vep->cb(vep->vc, vep->cb_priv, l, VGZ_NORMAL); |
367 | 2.57M | } |
368 | | |
369 | | static void |
370 | | vep_mark_verbatim(struct vep_state *vep, const char *p) |
371 | 1.31M | { |
372 | | |
373 | 1.31M | vep_mark_common(vep, p, VERBATIM); |
374 | 1.31M | vep->nm_verbatim++; |
375 | 1.31M | } |
376 | | |
377 | | static void |
378 | | vep_mark_skip(struct vep_state *vep, const char *p) |
379 | 1.25M | { |
380 | | |
381 | 1.25M | vep_mark_common(vep, p, SKIP); |
382 | 1.25M | vep->nm_skip++; |
383 | 1.25M | } |
384 | | |
385 | | static void |
386 | | vep_mark_pending(struct vep_state *vep, const char *p) |
387 | 247 | { |
388 | 247 | ssize_t l; |
389 | | |
390 | 247 | AN(vep->ver_p); |
391 | 247 | l = p - vep->ver_p; |
392 | 247 | assert(l > 0); |
393 | 247 | vep->crcp = crc32(vep->crcp, (const void *)vep->ver_p, l); |
394 | 247 | vep->ver_p = p; |
395 | | |
396 | 247 | vep->o_pending += l; |
397 | 247 | vep->nm_pending++; |
398 | 247 | } |
399 | | |
400 | | /*--------------------------------------------------------------------- |
401 | | */ |
402 | | |
403 | | static void v_matchproto_() |
404 | | vep_do_comment(struct vep_state *vep, enum dowhat what) |
405 | 0 | { |
406 | 0 | Debug("DO_COMMENT(%d)\n", what); |
407 | 0 | assert(what == DO_TAG); |
408 | 0 | if (!vep->emptytag) |
409 | 0 | vep_error(vep, "ESI 1.0 <esi:comment> needs final '/'"); |
410 | 0 | } |
411 | | |
412 | | /*--------------------------------------------------------------------- |
413 | | */ |
414 | | |
415 | | static void v_matchproto_() |
416 | | vep_do_remove(struct vep_state *vep, enum dowhat what) |
417 | 0 | { |
418 | 0 | Debug("DO_REMOVE(%d, end %d empty %d remove %d)\n", |
419 | 0 | what, vep->endtag, vep->emptytag, vep->remove); |
420 | 0 | assert(what == DO_TAG); |
421 | 0 | if (vep->emptytag) |
422 | 0 | vep_error(vep, "ESI 1.0 <esi:remove/> not legal"); |
423 | 0 | else if (vep->remove && !vep->endtag) |
424 | 0 | vep_error(vep, "ESI 1.0 <esi:remove> already open"); |
425 | 0 | else if (!vep->remove && vep->endtag) |
426 | 0 | vep_error(vep, "ESI 1.0 <esi:remove> not open"); |
427 | 0 | else |
428 | 0 | vep->remove = !vep->endtag; |
429 | 0 | } |
430 | | |
431 | | /*--------------------------------------------------------------------- |
432 | | */ |
433 | | |
434 | | static void |
435 | | include_attr_src(struct vep_state *vep) |
436 | 0 | { |
437 | 0 | const char *p; |
438 | |
|
439 | 0 | if (vep->include_src != NULL) { |
440 | 0 | vep_error(vep, |
441 | 0 | "ESI 1.0 <esi:include> " |
442 | 0 | "has multiple src= attributes"); |
443 | 0 | vep->state = VEP_TAGERROR; |
444 | 0 | VSB_destroy(&vep->attr_vsb); |
445 | 0 | VSB_destroy(&vep->include_src); |
446 | 0 | return; |
447 | 0 | } |
448 | 0 | for (p = VSB_data(vep->attr_vsb); *p != '\0'; p++) |
449 | 0 | if (vct_islws(*p)) |
450 | 0 | break; |
451 | 0 | if (*p != '\0') { |
452 | 0 | vep_error(vep, |
453 | 0 | "ESI 1.0 <esi:include> " |
454 | 0 | "has whitespace in src= attribute"); |
455 | 0 | vep->state = VEP_TAGERROR; |
456 | 0 | VSB_destroy(&vep->attr_vsb); |
457 | 0 | if (vep->include_src != NULL) |
458 | 0 | VSB_destroy(&vep->include_src); |
459 | 0 | return; |
460 | 0 | } |
461 | 0 | vep->include_src = vep->attr_vsb; |
462 | 0 | vep->attr_vsb = NULL; |
463 | 0 | } |
464 | | |
465 | | static void |
466 | | include_attr_onerror(struct vep_state *vep) |
467 | 0 | { |
468 | |
|
469 | 0 | vep->include_continue = !strcmp("continue", VSB_data(vep->attr_vsb)); |
470 | 0 | VSB_destroy(&vep->attr_vsb); |
471 | 0 | } |
472 | | |
473 | | static void v_matchproto_() |
474 | | vep_do_include(struct vep_state *vep, enum dowhat what) |
475 | 0 | { |
476 | 0 | const char *p, *q, *h; |
477 | 0 | ssize_t l; |
478 | 0 | char incl; |
479 | |
|
480 | 0 | Debug("DO_INCLUDE(%d)\n", what); |
481 | 0 | if (what == DO_ATTR) { |
482 | 0 | Debug("ATTR (%s) (%s)\n", vep->match_hit->match, |
483 | 0 | VSB_data(vep->attr_vsb)); |
484 | 0 | if (!strcmp("src=", vep->match_hit->match)) { |
485 | 0 | include_attr_src(vep); |
486 | 0 | return; |
487 | 0 | } |
488 | 0 | if (!strcmp("onerror=", vep->match_hit->match)) { |
489 | 0 | include_attr_onerror(vep); |
490 | 0 | return; |
491 | 0 | } |
492 | 0 | WRONG("Unhandled <esi:include> attribute"); |
493 | 0 | } |
494 | 0 | assert(what == DO_TAG); |
495 | 0 | if (!vep->emptytag) |
496 | 0 | vep_warn(vep, "ESI 1.0 <esi:include> lacks final '/'"); |
497 | 0 | if (vep->include_src == NULL) { |
498 | 0 | vep_error(vep, "ESI 1.0 <esi:include> lacks src attr"); |
499 | 0 | return; |
500 | 0 | } |
501 | | |
502 | | /* |
503 | | * Strictly speaking, we ought to spit out any piled up skip before |
504 | | * emitting the VEC for the include, but objectively that makes no |
505 | | * difference and robs us of a chance to collapse another skip into |
506 | | * this on so we don't do that. |
507 | | * However, we cannot tolerate any verbatim stuff piling up. |
508 | | * The mark_skip() before calling dostuff should have taken |
509 | | * care of that. Make sure. |
510 | | */ |
511 | 0 | assert(vep->o_wait == 0 || vep->last_mark == SKIP); |
512 | | /* XXX: what if it contains NUL bytes ?? */ |
513 | 0 | p = VSB_data(vep->include_src); |
514 | 0 | l = VSB_len(vep->include_src); |
515 | 0 | h = 0; |
516 | |
|
517 | 0 | incl = vep->include_continue ? VEC_IC : VEC_IA; |
518 | |
|
519 | 0 | if (l > 7 && !memcmp(p, "http://", 7)) { |
520 | 0 | h = p + 7; |
521 | 0 | p = strchr(h, '/'); |
522 | 0 | if (p == NULL) { |
523 | 0 | vep_error(vep, |
524 | 0 | "ESI 1.0 <esi:include> invalid src= URL"); |
525 | 0 | vep->state = VEP_TAGERROR; |
526 | 0 | AZ(vep->attr_vsb); |
527 | 0 | VSB_destroy(&vep->include_src); |
528 | 0 | return; |
529 | 0 | } |
530 | 0 | Debug("HOST <%.*s> PATH <%s>\n", (int)(p-h),h, p); |
531 | 0 | VSB_printf(vep->vsb, "%c", incl); |
532 | 0 | VSB_printf(vep->vsb, "Host: %.*s%c", (int)(p-h), h, 0); |
533 | 0 | } else if (l > 8 && !memcmp(p, "https://", 8)) { |
534 | 0 | if (!FEATURE(FEATURE_ESI_IGNORE_HTTPS)) { |
535 | 0 | vep_warn(vep, |
536 | 0 | "ESI 1.0 <esi:include> with https:// ignored"); |
537 | 0 | vep->state = VEP_TAGERROR; |
538 | 0 | AZ(vep->attr_vsb); |
539 | 0 | VSB_destroy(&vep->include_src); |
540 | 0 | return; |
541 | 0 | } |
542 | 0 | vep_warn(vep, |
543 | 0 | "ESI 1.0 <esi:include> https:// treated as http://"); |
544 | 0 | h = p + 8; |
545 | 0 | p = strchr(h, '/'); |
546 | 0 | if (p == NULL) { |
547 | 0 | vep_error(vep, |
548 | 0 | "ESI 1.0 <esi:include> invalid src= URL"); |
549 | 0 | vep->state = VEP_TAGERROR; |
550 | 0 | AZ(vep->attr_vsb); |
551 | 0 | VSB_destroy(&vep->include_src); |
552 | 0 | return; |
553 | 0 | } |
554 | 0 | VSB_printf(vep->vsb, "%c", incl); |
555 | 0 | VSB_printf(vep->vsb, "Host: %.*s%c", (int)(p-h), h, 0); |
556 | 0 | } else if (*p == '/') { |
557 | 0 | VSB_printf(vep->vsb, "%c", incl); |
558 | 0 | VSB_printf(vep->vsb, "%c", 0); |
559 | 0 | } else { |
560 | 0 | VSB_printf(vep->vsb, "%c", incl); |
561 | 0 | VSB_printf(vep->vsb, "%c", 0); |
562 | | /* Look for the last / before a '?' */ |
563 | 0 | h = NULL; |
564 | 0 | for (q = vep->url; *q && *q != '?'; q++) |
565 | 0 | if (*q == '/') |
566 | 0 | h = q; |
567 | 0 | if (h == NULL) |
568 | 0 | h = q + 1; |
569 | |
|
570 | 0 | Debug("INCL:: [%.*s]/[%s]\n", |
571 | 0 | (int)(h - vep->url), vep->url, p); |
572 | 0 | VSB_printf(vep->vsb, "%.*s/", (int)(h - vep->url), vep->url); |
573 | 0 | } |
574 | 0 | l -= (p - VSB_data(vep->include_src)); |
575 | 0 | for (q = p; *q != '\0'; ) { |
576 | 0 | if (*q == '&') { |
577 | 0 | #define R(w,f,r) \ |
578 | 0 | if (q + w <= p + l && !memcmp(q, f, w)) { \ |
579 | 0 | VSB_printf(vep->vsb, "%c", r); \ |
580 | 0 | q += w; \ |
581 | 0 | continue; \ |
582 | 0 | } |
583 | 0 | R(6, "'", '\''); |
584 | 0 | R(6, """, '"'); |
585 | 0 | R(4, "<", '<'); |
586 | 0 | R(4, ">", '>'); |
587 | 0 | R(5, "&", '&'); |
588 | 0 | } |
589 | 0 | VSB_printf(vep->vsb, "%c", *q++); |
590 | 0 | } |
591 | 0 | #undef R |
592 | 0 | VSB_printf(vep->vsb, "%c", 0); |
593 | 0 | VSB_destroy(&vep->include_src); |
594 | 0 | vep->include_continue = 0; |
595 | 0 | } |
596 | | |
597 | | /*--------------------------------------------------------------------- |
598 | | * Lex/Parse object for ESI instructions |
599 | | * |
600 | | * This function is called with the input object piecemeal so do not |
601 | | * assume that we have more than one char available at at time, but |
602 | | * optimize for getting huge chunks. |
603 | | * |
604 | | * NB: At the bottom of this source-file, there is a dot-diagram matching |
605 | | * NB: the state-machine. Please maintain it along with the code. |
606 | | */ |
607 | | |
608 | | void |
609 | | VEP_Parse(struct vep_state *vep, const char *p, size_t l) |
610 | 768 | { |
611 | 768 | const char *e; |
612 | 768 | struct vep_match *vm; |
613 | 768 | int i; |
614 | | |
615 | 768 | CHECK_OBJ_NOTNULL(vep, VEP_MAGIC); |
616 | 768 | assert(l > 0); |
617 | | |
618 | 768 | if (vep->startup) { |
619 | | /* |
620 | | * We must force the GZIP header out as a SKIP string, |
621 | | * otherwise an object starting with <esi:include would |
622 | | * have its GZIP header appear after the included object |
623 | | * (e000026.vtc) |
624 | | */ |
625 | 768 | vep->ver_p = ""; |
626 | 768 | vep->last_mark = SKIP; |
627 | 768 | vep_mark_common(vep, vep->ver_p, VERBATIM); |
628 | 768 | vep->startup = 0; |
629 | 768 | AZ(vep->hack_p); |
630 | 768 | vep->hack_p = p; |
631 | 768 | } |
632 | | |
633 | 768 | vep->ver_p = p; |
634 | | |
635 | 768 | e = p + l; |
636 | | |
637 | 2.77M | while (p < e) { |
638 | 2.77M | AN(vep->state); |
639 | 2.77M | Debug("EP %s %d (%.*s) [%.*s]\n", |
640 | 2.77M | vep->state, |
641 | 2.77M | vep->remove, |
642 | 2.77M | vep->tag_i, vep->tag, |
643 | 2.77M | (e - p) > 10 ? 10 : (int)(e-p), p); |
644 | 2.77M | assert(p >= vep->ver_p); |
645 | | |
646 | | /****************************************************** |
647 | | * SECTION A |
648 | | */ |
649 | | |
650 | 2.77M | if (vep->state == VEP_START) { |
651 | 2.71k | if (FEATURE(FEATURE_ESI_REMOVE_BOM) && |
652 | 2.71k | *p == (char)0xeb) { |
653 | 1.99k | vep->match = vep_match_bom; |
654 | 1.99k | vep->state = VEP_MATCH; |
655 | 1.99k | } else |
656 | 717 | vep->state = VEP_BOM; |
657 | 2.76M | } else if (vep->state == VEP_BOM) { |
658 | 745 | vep_mark_skip(vep, p); |
659 | 745 | if (FEATURE(FEATURE_ESI_DISABLE_XML_CHECK)) |
660 | 638 | vep->state = VEP_NEXTTAG; |
661 | 107 | else |
662 | 107 | vep->state = VEP_TESTXML; |
663 | 2.76M | } else if (vep->state == VEP_TESTXML) { |
664 | | /* |
665 | | * If the first non-whitespace char is different |
666 | | * from '<' we assume this is not XML. |
667 | | */ |
668 | 362 | while (p < e && vct_islws(*p)) |
669 | 255 | p++; |
670 | 107 | vep_mark_verbatim(vep, p); |
671 | 107 | if (p < e && *p == '<') { |
672 | 15 | p++; |
673 | 15 | vep->state = VEP_STARTTAG; |
674 | 92 | } else if (p < e && *p == (char)0xeb) { |
675 | 7 | VSLb(vep->vc->wrk->vsl, SLT_ESI_xmlerror, |
676 | 7 | "No ESI processing, " |
677 | 7 | "first char not '<' but BOM." |
678 | 7 | " (See feature esi_remove_bom)" |
679 | 7 | ); |
680 | 7 | vep->state = VEP_NOTXML; |
681 | 85 | } else if (p < e) { |
682 | 76 | VSLb(vep->vc->wrk->vsl, SLT_ESI_xmlerror, |
683 | 76 | "No ESI processing, " |
684 | 76 | "first char not '<'." |
685 | 76 | " (See feature esi_disable_xml_check)" |
686 | 76 | ); |
687 | 76 | vep->state = VEP_NOTXML; |
688 | 76 | } |
689 | 2.76M | } else if (vep->state == VEP_NOTXML) { |
690 | | /* |
691 | | * This is not recognized as XML, just skip thru |
692 | | * vfp_esi_end() will handle the rest |
693 | | */ |
694 | 83 | p = e; |
695 | 83 | vep_mark_verbatim(vep, p); |
696 | | |
697 | | /****************************************************** |
698 | | * SECTION B |
699 | | */ |
700 | | |
701 | 2.76M | } else if (vep->state == VEP_NOTMYTAG) { |
702 | 57.3k | if (FEATURE(FEATURE_ESI_IGNORE_OTHER_ELEMENTS)) { |
703 | 55.8k | p++; |
704 | 55.8k | vep->state = VEP_NEXTTAG; |
705 | 55.8k | } else { |
706 | 1.48k | vep->tag_i = 0; |
707 | 1.23M | while (p < e) { |
708 | 1.23M | if (*p++ == '>') { |
709 | 1.46k | vep->state = VEP_NEXTTAG; |
710 | 1.46k | break; |
711 | 1.46k | } |
712 | 1.23M | } |
713 | 1.48k | } |
714 | 57.3k | if (p == e && !vep->remove) |
715 | 55 | vep_mark_verbatim(vep, p); |
716 | 2.71M | } else if (vep->state == VEP_NEXTTAG) { |
717 | | /* |
718 | | * Hunt for start of next tag and keep an eye |
719 | | * out for end of EsiCmt if armed. |
720 | | */ |
721 | 690k | vep->emptytag = 0; |
722 | 690k | vep->attr = NULL; |
723 | 690k | vep->dostuff = NULL; |
724 | 14.6M | while (p < e && *p != '<') { |
725 | 13.9M | if (vep->esicmt_p == NULL) { |
726 | 11.2M | p++; |
727 | 11.2M | continue; |
728 | 11.2M | } |
729 | 2.64M | if (*p != *vep->esicmt_p) { |
730 | 769k | p++; |
731 | 769k | vep->esicmt_p = vep->esicmt; |
732 | 769k | continue; |
733 | 769k | } |
734 | 1.87M | if (!vep->remove && vep->esicmt_p == vep->esicmt) |
735 | 626k | vep_mark_verbatim(vep, p); |
736 | 1.87M | p++; |
737 | 1.87M | if (*++vep->esicmt_p == '\0') { |
738 | 623k | vep->esi_found = 1; |
739 | 623k | vep->esicmt = NULL; |
740 | 623k | vep->esicmt_p = NULL; |
741 | | /* |
742 | | * The end of the esicmt |
743 | | * should not be emitted. |
744 | | * But the stuff before should |
745 | | */ |
746 | 623k | vep_mark_skip(vep, p); |
747 | 623k | } |
748 | 1.87M | } |
749 | 690k | if (p < e) { |
750 | 690k | if (!vep->remove) |
751 | 690k | vep_mark_verbatim(vep, p); |
752 | 690k | assert(*p == '<'); |
753 | 690k | p++; |
754 | 690k | vep->state = VEP_STARTTAG; |
755 | 690k | } else if (vep->esicmt_p == vep->esicmt && !vep->remove) |
756 | 223 | vep_mark_verbatim(vep, p); |
757 | | |
758 | | /****************************************************** |
759 | | * SECTION C |
760 | | */ |
761 | | |
762 | 2.01M | } else if (vep->state == VEP_STARTTAG) { |
763 | | /* Start of tag, set up match table */ |
764 | 690k | vep->endtag = 0; |
765 | 690k | vep->match = vep_match_starttag; |
766 | 690k | vep->state = VEP_MATCH; |
767 | 1.32M | } else if (vep->state == VEP_COMMENT) { |
768 | 3.08k | vep->esicmt_p = vep->esicmt = NULL; |
769 | 3.08k | vep->until_p = vep->until = "-->"; |
770 | 3.08k | vep->until_s = VEP_NEXTTAG; |
771 | 3.08k | vep->state = VEP_UNTIL; |
772 | 1.32M | } else if (vep->state == VEP_COMMENTESI) { |
773 | 625k | if (vep->remove) |
774 | 0 | vep_error(vep, |
775 | 0 | "ESI 1.0 Nested <!--esi" |
776 | 0 | " element in <esi:remove>"); |
777 | 625k | vep->esicmt_p = vep->esicmt = "-->"; |
778 | 625k | vep->state = VEP_NEXTTAG; |
779 | 625k | vep_mark_skip(vep, p); |
780 | 701k | } else if (vep->state == VEP_CDATA) { |
781 | | /* |
782 | | * Easy: just look for the end of CDATA |
783 | | */ |
784 | 487 | vep->until_p = vep->until = "]]>"; |
785 | 487 | vep->until_s = VEP_NEXTTAG; |
786 | 487 | vep->state = VEP_UNTIL; |
787 | 700k | } else if (vep->state == VEP_ESIENDTAG) { |
788 | 267 | vep->endtag = 1; |
789 | 267 | vep->state = VEP_ESITAG; |
790 | 700k | } else if (vep->state == VEP_ESITAG) { |
791 | 1.19k | vep->in_esi_tag = 1; |
792 | 1.19k | vep->esi_found = 1; |
793 | 1.19k | vep_mark_skip(vep, p); |
794 | 1.19k | vep->match = vep_match_esi; |
795 | 1.19k | vep->state = VEP_MATCH; |
796 | 699k | } else if (vep->state == VEP_ESIINCLUDE) { |
797 | 0 | if (vep->remove) { |
798 | 0 | vep_error(vep, |
799 | 0 | "ESI 1.0 <esi:include> element" |
800 | 0 | " nested in <esi:remove>"); |
801 | 0 | vep->state = VEP_TAGERROR; |
802 | 0 | } else if (vep->endtag) { |
803 | 0 | vep_error(vep, |
804 | 0 | "ESI 1.0 </esi:include> illegal end-tag"); |
805 | 0 | vep->state = VEP_TAGERROR; |
806 | 0 | } else { |
807 | 0 | vep->dostuff = vep_do_include; |
808 | 0 | vep->state = VEP_INTAG; |
809 | 0 | vep->attr = vep_match_attr_include; |
810 | 0 | } |
811 | 699k | } else if (vep->state == VEP_ESIREMOVE) { |
812 | 0 | vep->dostuff = vep_do_remove; |
813 | 0 | vep->state = VEP_INTAG; |
814 | 699k | } else if (vep->state == VEP_ESICOMMENT) { |
815 | 0 | if (vep->remove) { |
816 | 0 | vep_error(vep, |
817 | 0 | "ESI 1.0 <esi:comment> element" |
818 | 0 | " nested in <esi:remove>"); |
819 | 0 | vep->state = VEP_TAGERROR; |
820 | 0 | } else if (vep->endtag) { |
821 | 0 | vep_error(vep, |
822 | 0 | "ESI 1.0 </esi:comment> illegal end-tag"); |
823 | 0 | vep->state = VEP_TAGERROR; |
824 | 0 | } else { |
825 | 0 | vep->dostuff = vep_do_comment; |
826 | 0 | vep->state = VEP_INTAG; |
827 | 0 | } |
828 | 699k | } else if (vep->state == VEP_ESIBOGON) { |
829 | 1.19k | vep_error(vep, |
830 | 1.19k | "ESI 1.0 <esi:bogus> element"); |
831 | 1.19k | vep->state = VEP_TAGERROR; |
832 | | |
833 | | /****************************************************** |
834 | | * SECTION D |
835 | | */ |
836 | | |
837 | 698k | } else if (vep->state == VEP_INTAG) { |
838 | 0 | vep->tag_i = 0; |
839 | 0 | while (p < e && vct_islws(*p) && !vep->emptytag) { |
840 | 0 | p++; |
841 | 0 | vep->canattr = 1; |
842 | 0 | } |
843 | 0 | if (p < e && *p == '/' && !vep->emptytag) { |
844 | 0 | p++; |
845 | 0 | vep->emptytag = 1; |
846 | 0 | vep->canattr = 0; |
847 | 0 | } |
848 | 0 | if (p < e && *p == '>') { |
849 | 0 | p++; |
850 | 0 | AN(vep->dostuff); |
851 | 0 | vep_mark_skip(vep, p); |
852 | 0 | vep->dostuff(vep, DO_TAG); |
853 | 0 | vep->in_esi_tag = 0; |
854 | 0 | vep->state = VEP_NEXTTAG; |
855 | 0 | } else if (p < e && vep->emptytag) { |
856 | 0 | vep_error(vep, |
857 | 0 | "XML 1.0 '>' does not follow '/' in tag"); |
858 | 0 | vep->state = VEP_TAGERROR; |
859 | 0 | } else if (p < e && vep->canattr && |
860 | 0 | vct_isxmlnamestart(*p)) { |
861 | 0 | vep->state = VEP_ATTR; |
862 | 0 | } else if (p < e) { |
863 | 0 | vep_error(vep, |
864 | 0 | "XML 1.0 Illegal attribute start char"); |
865 | 0 | vep->state = VEP_TAGERROR; |
866 | 0 | } |
867 | 698k | } else if (vep->state == VEP_TAGERROR) { |
868 | 1.91M | while (p < e && *p != '>') |
869 | 1.91M | p++; |
870 | 1.19k | if (p < e) { |
871 | 1.15k | p++; |
872 | 1.15k | vep_mark_skip(vep, p); |
873 | 1.15k | vep->in_esi_tag = 0; |
874 | 1.15k | vep->state = VEP_NEXTTAG; |
875 | 1.15k | if (vep->attr_vsb) |
876 | 0 | VSB_destroy(&vep->attr_vsb); |
877 | 1.15k | } |
878 | | |
879 | | /****************************************************** |
880 | | * SECTION E |
881 | | */ |
882 | | |
883 | 696k | } else if (vep->state == VEP_ATTR) { |
884 | 0 | AZ(vep->attr_delim); |
885 | 0 | if (vep->attr == NULL) { |
886 | 0 | p++; |
887 | 0 | AZ(vep->attr_vsb); |
888 | 0 | vep->state = VEP_SKIPATTR; |
889 | 0 | } else { |
890 | 0 | vep->match = vep->attr; |
891 | 0 | vep->state = VEP_MATCH; |
892 | 0 | } |
893 | 696k | } else if (vep->state == VEP_SKIPATTR) { |
894 | 0 | while (p < e && vct_isxmlname(*p)) |
895 | 0 | p++; |
896 | 0 | if (p < e && *p == '=') { |
897 | 0 | p++; |
898 | 0 | vep->state = VEP_ATTRDELIM; |
899 | 0 | } else if (p < e && *p == '>') { |
900 | 0 | vep->state = VEP_INTAG; |
901 | 0 | } else if (p < e && *p == '/') { |
902 | 0 | vep->state = VEP_INTAG; |
903 | 0 | } else if (p < e && vct_issp(*p)) { |
904 | 0 | vep->state = VEP_INTAG; |
905 | 0 | } else if (p < e) { |
906 | 0 | vep_error(vep, |
907 | 0 | "XML 1.0 Illegal attr char"); |
908 | 0 | vep->state = VEP_TAGERROR; |
909 | 0 | } |
910 | 696k | } else if (vep->state == VEP_ATTRGETVAL) { |
911 | 0 | AZ(vep->attr_vsb); |
912 | 0 | vep->attr_vsb = VSB_new_auto(); |
913 | 0 | vep->state = VEP_ATTRDELIM; |
914 | 696k | } else if (vep->state == VEP_ATTRDELIM) { |
915 | 0 | AZ(vep->attr_delim); |
916 | 0 | if (*p == '"' || *p == '\'') { |
917 | 0 | vep->attr_delim = *p++; |
918 | 0 | vep->state = VEP_ATTRVAL; |
919 | 0 | } else if (!vct_issp(*p)) { |
920 | 0 | vep->attr_delim = ' '; |
921 | 0 | vep->state = VEP_ATTRVAL; |
922 | 0 | } else { |
923 | 0 | vep_error(vep, |
924 | 0 | "XML 1.0 Illegal attribute delimiter"); |
925 | 0 | vep->state = VEP_TAGERROR; |
926 | 0 | } |
927 | |
|
928 | 696k | } else if (vep->state == VEP_ATTRVAL) { |
929 | 0 | while (p < e && *p != '>' && *p != vep->attr_delim && |
930 | 0 | (vep->attr_delim != ' ' || !vct_issp(*p))) { |
931 | 0 | if (vep->attr_vsb != NULL) |
932 | 0 | VSB_putc(vep->attr_vsb, *p); |
933 | 0 | p++; |
934 | 0 | } |
935 | 0 | if (p < e && *p == '>') { |
936 | 0 | vep_error(vep, |
937 | 0 | "XML 1.0 Missing end attribute delimiter"); |
938 | 0 | vep->state = VEP_TAGERROR; |
939 | 0 | vep->attr_delim = 0; |
940 | 0 | if (vep->attr_vsb != NULL) { |
941 | 0 | AZ(VSB_finish(vep->attr_vsb)); |
942 | 0 | VSB_destroy(&vep->attr_vsb); |
943 | 0 | } |
944 | 0 | } else if (p < e) { |
945 | 0 | vep->attr_delim = 0; |
946 | 0 | p++; |
947 | 0 | vep->state = VEP_INTAG; |
948 | 0 | if (vep->attr_vsb != NULL) { |
949 | 0 | AZ(VSB_finish(vep->attr_vsb)); |
950 | 0 | AN(vep->dostuff); |
951 | 0 | vep->dostuff(vep, DO_ATTR); |
952 | 0 | vep->attr_vsb = NULL; |
953 | 0 | } |
954 | 0 | } |
955 | | |
956 | | /****************************************************** |
957 | | * Utility Section |
958 | | */ |
959 | |
|
960 | 696k | } else if (vep->state == VEP_MATCH) { |
961 | | /* |
962 | | * Match against a table |
963 | | */ |
964 | 693k | vm = vep_match(vep, p, e); |
965 | 693k | vep->match_hit = vm; |
966 | 693k | if (vm != NULL) { |
967 | 693k | if (vm->match != NULL) |
968 | 634k | p += strlen(vm->match); |
969 | 693k | vep->state = *vm->state; |
970 | 693k | vep->match = NULL; |
971 | 693k | vep->tag_i = 0; |
972 | 693k | } else { |
973 | 39 | assert(p + sizeof(vep->tag) >= e); |
974 | 39 | memcpy(vep->tag, p, e - p); |
975 | 39 | vep->tag_i = e - p; |
976 | 39 | vep->state = VEP_MATCHBUF; |
977 | 39 | p = e; |
978 | 39 | } |
979 | 693k | } else if (vep->state == VEP_MATCHBUF) { |
980 | | /* |
981 | | * Match against a table while split over input |
982 | | * sections. |
983 | | */ |
984 | 0 | AN(vep->match); |
985 | 0 | i = sizeof(vep->tag) - vep->tag_i; |
986 | 0 | if (i > e - p) |
987 | 0 | i = e - p; |
988 | 0 | memcpy(vep->tag + vep->tag_i, p, i); |
989 | 0 | vm = vep_match(vep, vep->tag, |
990 | 0 | vep->tag + vep->tag_i + i); |
991 | 0 | Debug("MB (%.*s) tag_i %d i %d = vm %p match %s\n", |
992 | 0 | vep->tag_i + i, vep->tag, |
993 | 0 | vep->tag_i, |
994 | 0 | i, |
995 | 0 | vm, |
996 | 0 | vm ? vm->match : "(nil)"); |
997 | |
|
998 | 0 | if (vm == NULL) { |
999 | 0 | vep->tag_i += i; |
1000 | 0 | p += i; |
1001 | 0 | assert(p == e); |
1002 | 0 | } else { |
1003 | 0 | vep->match_hit = vm; |
1004 | 0 | vep->state = *vm->state; |
1005 | 0 | if (vm->match != NULL) { |
1006 | 0 | i = strlen(vm->match); |
1007 | 0 | if (i > vep->tag_i) |
1008 | 0 | p += i - vep->tag_i; |
1009 | 0 | } |
1010 | 0 | vep->match = NULL; |
1011 | 0 | vep->tag_i = 0; |
1012 | 0 | } |
1013 | 3.56k | } else if (vep->state == VEP_UNTIL) { |
1014 | | /* |
1015 | | * Skip until we see magic string |
1016 | | */ |
1017 | 14.6M | while (p < e) { |
1018 | 14.6M | if (*p++ != *vep->until_p++) { |
1019 | 14.6M | vep->until_p = vep->until; |
1020 | 14.6M | } else if (*vep->until_p == '\0') { |
1021 | 3.48k | vep->state = vep->until_s; |
1022 | 3.48k | break; |
1023 | 3.48k | } |
1024 | 14.6M | } |
1025 | 3.56k | if (p == e && !vep->remove) |
1026 | 96 | vep_mark_verbatim(vep, p); |
1027 | 3.56k | } else { |
1028 | 0 | Debug("*** Unknown state %s\n", vep->state); |
1029 | 0 | WRONG("WRONG ESI PARSER STATE"); |
1030 | 0 | } |
1031 | 2.77M | } |
1032 | | /* |
1033 | | * We must always mark up the storage we got, try to do so |
1034 | | * in the most efficient way, in particular with respect to |
1035 | | * minimizing and limiting use of pending. |
1036 | | */ |
1037 | 768 | if (p == vep->ver_p) |
1038 | 479 | ; |
1039 | 289 | else if (vep->in_esi_tag) |
1040 | 42 | vep_mark_skip(vep, p); |
1041 | 247 | else if (vep->remove) |
1042 | 0 | vep_mark_skip(vep, p); |
1043 | 247 | else |
1044 | 247 | vep_mark_pending(vep, p); |
1045 | 768 | } |
1046 | | |
1047 | | /*--------------------------------------------------------------------- |
1048 | | */ |
1049 | | |
1050 | | static ssize_t v_matchproto_(vep_callback_t) |
1051 | | vep_default_cb(struct vfp_ctx *vc, void *priv, ssize_t l, enum vgz_flag flg) |
1052 | 5.06M | { |
1053 | 5.06M | ssize_t *s; |
1054 | | |
1055 | 5.06M | CHECK_OBJ_NOTNULL(vc, VFP_CTX_MAGIC); |
1056 | 5.06M | AN(priv); |
1057 | 5.06M | s = priv; |
1058 | 5.06M | *s += l; |
1059 | 5.06M | (void)flg; |
1060 | 5.06M | return (*s); |
1061 | 5.06M | } |
1062 | | |
1063 | | /*--------------------------------------------------------------------- |
1064 | | */ |
1065 | | |
1066 | | struct vep_state * |
1067 | | VEP_Init(struct vfp_ctx *vc, const struct http *req, vep_callback_t *cb, |
1068 | | void *cb_priv) |
1069 | 768 | { |
1070 | 768 | struct vep_state *vep; |
1071 | | |
1072 | 768 | CHECK_OBJ_NOTNULL(vc, VFP_CTX_MAGIC); |
1073 | 768 | CHECK_OBJ_NOTNULL(req, HTTP_MAGIC); |
1074 | 768 | vep = WS_Alloc(vc->resp->ws, sizeof *vep); |
1075 | 768 | if (vep == NULL) { |
1076 | 0 | VSLb(vc->wrk->vsl, SLT_VCL_Error, |
1077 | 0 | "VEP_Init() workspace overflow"); |
1078 | 0 | return (NULL); |
1079 | 0 | } |
1080 | | |
1081 | 768 | INIT_OBJ(vep, VEP_MAGIC); |
1082 | 768 | vep->url = req->hd[HTTP_HDR_URL].b; |
1083 | 768 | vep->vc = vc; |
1084 | 768 | vep->vsb = VSB_new_auto(); |
1085 | 768 | AN(vep->vsb); |
1086 | | |
1087 | 768 | if (cb != NULL) { |
1088 | 0 | vep->dogzip = 1; |
1089 | | /* XXX */ |
1090 | 0 | VSB_printf(vep->vsb, "%c", VEC_GZ); |
1091 | 0 | vep->cb = cb; |
1092 | 0 | vep->cb_priv = cb_priv; |
1093 | 768 | } else { |
1094 | 768 | vep->cb = vep_default_cb; |
1095 | 768 | vep->cb_priv = &vep->cb_x; |
1096 | 768 | } |
1097 | | |
1098 | 768 | vep->state = VEP_START; |
1099 | 768 | vep->crc = crc32(0L, Z_NULL, 0); |
1100 | 768 | vep->crcp = crc32(0L, Z_NULL, 0); |
1101 | | |
1102 | 768 | vep->startup = 1; |
1103 | 768 | return (vep); |
1104 | 768 | } |
1105 | | |
1106 | | /*--------------------------------------------------------------------- |
1107 | | */ |
1108 | | |
1109 | | struct vsb * |
1110 | | VEP_Finish(struct vep_state *vep) |
1111 | 768 | { |
1112 | 768 | ssize_t l, lcb; |
1113 | | |
1114 | 768 | CHECK_OBJ_NOTNULL(vep, VEP_MAGIC); |
1115 | | |
1116 | 768 | if (vep->include_src) |
1117 | 0 | VSB_destroy(&vep->include_src); |
1118 | 768 | if (vep->attr_vsb) |
1119 | 0 | VSB_destroy(&vep->attr_vsb); |
1120 | | |
1121 | 768 | if (vep->state != VEP_START && |
1122 | 768 | vep->state != VEP_BOM && |
1123 | 768 | vep->state != VEP_TESTXML && |
1124 | 768 | vep->state != VEP_NOTXML && |
1125 | 768 | vep->state != VEP_NEXTTAG) { |
1126 | 303 | vep_error(vep, "VEP ended inside a tag"); |
1127 | 303 | } |
1128 | | |
1129 | 768 | if (vep->o_pending) |
1130 | 247 | vep_mark_common(vep, vep->ver_p, vep->last_mark); |
1131 | 768 | if (vep->o_wait > 0) { |
1132 | 736 | lcb = vep->cb(vep->vc, vep->cb_priv, 0, VGZ_ALIGN); |
1133 | 736 | vep_emit_common(vep, lcb - vep->o_last, vep->last_mark); |
1134 | 736 | } |
1135 | | // NB: We don't account for PAD+SUM+LEN in gzipped objects |
1136 | 768 | (void)vep->cb(vep->vc, vep->cb_priv, 0, VGZ_FINISH); |
1137 | | |
1138 | 768 | AZ(VSB_finish(vep->vsb)); |
1139 | 768 | l = VSB_len(vep->vsb); |
1140 | 768 | if (vep->esi_found && l > 0) |
1141 | 220 | return (vep->vsb); |
1142 | 548 | VSB_destroy(&vep->vsb); |
1143 | 548 | return (NULL); |
1144 | 768 | } |
1145 | | |
1146 | | #if 0 |
1147 | | |
1148 | | digraph xml { |
1149 | | rankdir="LR" |
1150 | | size="7,10" |
1151 | | ################################################################# |
1152 | | # SECTION A |
1153 | | # |
1154 | | |
1155 | | START [shape=ellipse] |
1156 | | TESTXML [shape=ellipse] |
1157 | | NOTXML [shape=ellipse] |
1158 | | NEXTTAGa [shape=hexagon, label="NEXTTAG"] |
1159 | | STARTTAGa [shape=hexagon, label="STARTTAG"] |
1160 | | START -> TESTXML |
1161 | | START -> NEXTTAGa [style=dotted, label="syntax:1"] |
1162 | | TESTXML -> TESTXML [label="lws"] |
1163 | | TESTXML -> NOTXML |
1164 | | TESTXML -> STARTTAGa [label="'<'"] |
1165 | | |
1166 | | ################################################################# |
1167 | | # SECTION B |
1168 | | |
1169 | | NOTMYTAG [shape=ellipse] |
1170 | | NEXTTAG [shape=ellipse] |
1171 | | NOTMYTAG -> NEXTTAG [style=dotted, label="syntax:2"] |
1172 | | STARTTAGb [shape=hexagon, label="STARTTAG"] |
1173 | | NOTMYTAG -> NEXTTAG [label="'>'"] |
1174 | | NOTMYTAG -> NOTMYTAG [label="*"] |
1175 | | NEXTTAG -> NEXTTAG [label="'-->'"] |
1176 | | NEXTTAG -> NEXTTAG [label="*"] |
1177 | | NEXTTAG -> STARTTAGb [label="'<'"] |
1178 | | |
1179 | | ################################################################# |
1180 | | # SECTION C |
1181 | | |
1182 | | STARTTAG [shape=ellipse] |
1183 | | COMMENT [shape=ellipse] |
1184 | | CDATA [shape=ellipse] |
1185 | | ESITAG [shape=ellipse] |
1186 | | ESIETAG [shape=ellipse] |
1187 | | ESIINCLUDE [shape=ellipse] |
1188 | | ESIREMOVE [shape=ellipse] |
1189 | | ESICOMMENT [shape=ellipse] |
1190 | | ESIBOGON [shape=ellipse] |
1191 | | INTAGc [shape=hexagon, label="INTAG"] |
1192 | | NOTMYTAGc [shape=hexagon, label="NOTMYTAG"] |
1193 | | NEXTTAGc [shape=hexagon, label="NEXTTAG"] |
1194 | | TAGERRORc [shape=hexagon, label="TAGERROR"] |
1195 | | C1 [shape=circle,label=""] |
1196 | | STARTTAG -> COMMENT [label="'<!--'"] |
1197 | | STARTTAG -> ESITAG [label="'<esi'"] |
1198 | | STARTTAG -> CDATA [label="'<![CDATA['"] |
1199 | | STARTTAG -> NOTMYTAGc [label="'*'"] |
1200 | | COMMENT -> NEXTTAGc [label="'esi'"] |
1201 | | COMMENT -> C1 [label="*"] |
1202 | | C1 -> C1 [label="*"] |
1203 | | C1 -> NEXTTAGc [label="-->"] |
1204 | | CDATA -> CDATA [label="*"] |
1205 | | CDATA -> NEXTTAGc [label="]]>"] |
1206 | | ESITAG -> ESIINCLUDE [label="'include'"] |
1207 | | ESITAG -> ESIREMOVE [label="'remove'"] |
1208 | | ESITAG -> ESICOMMENT [label="'comment'"] |
1209 | | ESITAG -> ESIBOGON [label="*"] |
1210 | | ESICOMMENT -> INTAGc |
1211 | | ESICOMMENT -> TAGERRORc |
1212 | | ESICOMMENT -> TAGERRORc [style=dotted, label="nested\nin\nremove"] |
1213 | | ESIREMOVE -> INTAGc |
1214 | | ESIREMOVE -> TAGERRORc |
1215 | | ESIINCLUDE -> INTAGc |
1216 | | ESIINCLUDE -> TAGERRORc |
1217 | | ESIINCLUDE -> TAGERRORc [style=dotted, label="nested\nin\nremove"] |
1218 | | ESIBOGON -> TAGERRORc |
1219 | | |
1220 | | ################################################################# |
1221 | | # SECTION D |
1222 | | |
1223 | | INTAG [shape=ellipse] |
1224 | | TAGERROR [shape=ellipse] |
1225 | | NEXTTAGd [shape=hexagon, label="NEXTTAG"] |
1226 | | ATTRd [shape=hexagon, label="ATTR"] |
1227 | | D1 [shape=circle, label=""] |
1228 | | D2 [shape=circle, label=""] |
1229 | | INTAG -> D1 [label="lws"] |
1230 | | D1 -> D2 [label="/"] |
1231 | | INTAG -> D2 [label="/"] |
1232 | | INTAG -> NEXTTAGd [label=">"] |
1233 | | D1 -> NEXTTAGd [label=">"] |
1234 | | D2 -> NEXTTAGd [label=">"] |
1235 | | D1 -> ATTRd [label="XMLstartchar"] |
1236 | | D1 -> TAGERROR [label="*"] |
1237 | | D2 -> TAGERROR [label="*"] |
1238 | | TAGERROR -> TAGERROR [label="*"] |
1239 | | TAGERROR -> NEXTTAGd [label="'>'"] |
1240 | | |
1241 | | ################################################################# |
1242 | | # SECTION E |
1243 | | |
1244 | | ATTR [shape=ellipse] |
1245 | | SKIPATTR [shape=ellipse] |
1246 | | ATTRGETVAL [shape=ellipse] |
1247 | | ATTRDELIM [shape=ellipse] |
1248 | | ATTRVAL [shape=ellipse] |
1249 | | TAGERRORe [shape=hexagon, label="TAGERROR"] |
1250 | | INTAGe [shape=hexagon, label="INTAG"] |
1251 | | ATTR -> SKIPATTR [label="*"] |
1252 | | ATTR -> ATTRGETVAL [label="wanted attr"] |
1253 | | SKIPATTR -> SKIPATTR [label="XMLname"] |
1254 | | SKIPATTR -> ATTRDELIM [label="'='"] |
1255 | | SKIPATTR -> TAGERRORe [label="*"] |
1256 | | ATTRGETVAL -> ATTRDELIM |
1257 | | ATTRDELIM -> ATTRVAL [label="\""] |
1258 | | ATTRDELIM -> ATTRVAL [label="\'"] |
1259 | | ATTRDELIM -> ATTRVAL [label="*"] |
1260 | | ATTRDELIM -> TAGERRORe [label="lws"] |
1261 | | ATTRVAL -> TAGERRORe [label="'>'"] |
1262 | | ATTRVAL -> INTAGe [label="delim"] |
1263 | | ATTRVAL -> ATTRVAL [label="*"] |
1264 | | |
1265 | | } |
1266 | | |
1267 | | #endif |