Coverage Report

Created: 2025-06-07 06:18

/src/varnish-cache/bin/varnishd/cache/cache_esi_parse.c
Line
Count
Source (jump to first uncovered line)
1
/*-
2
 * Copyright (c) 2011 Varnish Software AS
3
 * All rights reserved.
4
 *
5
 * Author: Poul-Henning Kamp <phk@phk.freebsd.dk>
6
 *
7
 * SPDX-License-Identifier: BSD-2-Clause
8
 *
9
 * Redistribution and use in source and binary forms, with or without
10
 * modification, are permitted provided that the following conditions
11
 * are met:
12
 * 1. Redistributions of source code must retain the above copyright
13
 *    notice, this list of conditions and the following disclaimer.
14
 * 2. Redistributions in binary form must reproduce the above copyright
15
 *    notice, this list of conditions and the following disclaimer in the
16
 *    documentation and/or other materials provided with the distribution.
17
 *
18
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
19
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21
 * ARE DISCLAIMED.  IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
22
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28
 * SUCH DAMAGE.
29
 *
30
 * VEP Varnish Esi Parsing
31
 */
32
33
#include "config.h"
34
35
#include "cache_varnishd.h"
36
#include "cache_filter.h"
37
38
#include "cache_vgz.h"
39
#include "cache_esi.h"
40
#include "vct.h"
41
#include "vend.h"
42
#include "vgz.h"
43
44
//#define Debug(fmt, ...) printf(fmt, __VA_ARGS__)
45
#define Debug(fmt, ...) /**/
46
47
struct vep_state;
48
49
enum dowhat {DO_ATTR, DO_TAG};
50
typedef void dostuff_f(struct vep_state *, enum dowhat);
51
52
struct vep_match {
53
  const char  *match;
54
  const char  * const *state;
55
};
56
57
enum vep_mark { VERBATIM = 0, SKIP };
58
59
struct vep_state {
60
  unsigned    magic;
61
#define VEP_MAGIC   0x55cb9b82
62
  struct vsb    *vsb;
63
64
  const char    *url;
65
  struct vfp_ctx    *vc;
66
  int     dogzip;
67
  vep_callback_t    *cb;
68
  void      *cb_priv;
69
70
  /* Internal Counter for default call-back function */
71
  ssize_t     cb_x;
72
73
  /* parser state */
74
  const char    *state;
75
  unsigned    startup;
76
  unsigned    esi_found;
77
78
  unsigned    endtag;
79
  unsigned    emptytag;
80
  unsigned    canattr;
81
82
  unsigned    remove;
83
84
  ssize_t     o_wait;
85
  ssize_t     o_pending;
86
  ssize_t     o_total;
87
  uint32_t    crc;
88
  ssize_t     o_crc;
89
  uint32_t    crcp;
90
  ssize_t     o_last;
91
92
  const char    *hack_p;
93
  const char    *ver_p;
94
95
  const char    *until;
96
  const char    *until_p;
97
  const char    *until_s;
98
99
  int     in_esi_tag;
100
101
  const char    *esicmt;
102
  const char    *esicmt_p;
103
104
  struct vep_match  *attr;
105
  struct vsb    *attr_vsb;
106
  int     attr_delim;
107
108
  struct vep_match  *match;
109
  struct vep_match  *match_hit;
110
111
  char      tag[8];
112
  int     tag_i;
113
114
  dostuff_f   *dostuff;
115
116
  struct vsb    *include_src;
117
  unsigned    include_continue;
118
119
  unsigned    nm_skip;
120
  unsigned    nm_verbatim;
121
  unsigned    nm_pending;
122
  enum vep_mark   last_mark;
123
};
124
125
/*---------------------------------------------------------------------*/
126
127
static const char * const VEP_START =   "[Start]";
128
static const char * const VEP_BOM =   "[BOM]";
129
static const char * const VEP_TESTXML =   "[TestXml]";
130
static const char * const VEP_NOTXML =    "[NotXml]";
131
132
static const char * const VEP_NEXTTAG =   "[NxtTag]";
133
static const char * const VEP_NOTMYTAG =  "[NotMyTag]";
134
135
static const char * const VEP_STARTTAG =  "[StartTag]";
136
static const char * const VEP_COMMENTESI =  "[CommentESI]";
137
static const char * const VEP_COMMENT =   "[Comment]";
138
static const char * const VEP_CDATA =   "[CDATA]";
139
static const char * const VEP_ESITAG =    "[ESITag]";
140
static const char * const VEP_ESIENDTAG = "[/ESITag]";
141
142
static const char * const VEP_ESIREMOVE = "[ESI:Remove]";
143
static const char * const VEP_ESIINCLUDE =  "[ESI:Include]";
144
static const char * const VEP_ESICOMMENT =  "[ESI:Comment]";
145
static const char * const VEP_ESIBOGON =  "[ESI:Bogon]";
146
147
static const char * const VEP_INTAG =   "[InTag]";
148
static const char * const VEP_TAGERROR =  "[TagError]";
149
150
static const char * const VEP_ATTR =    "[Attribute]";
151
static const char * const VEP_SKIPATTR =  "[SkipAttribute]";
152
static const char * const VEP_ATTRDELIM = "[AttrDelim]";
153
static const char * const VEP_ATTRGETVAL =  "[AttrGetValue]";
154
static const char * const VEP_ATTRVAL =   "[AttrValue]";
155
156
static const char * const VEP_UNTIL =   "[Until]";
157
static const char * const VEP_MATCHBUF =  "[MatchBuf]";
158
static const char * const VEP_MATCH =   "[Match]";
159
160
/*---------------------------------------------------------------------*/
161
162
static struct vep_match vep_match_starttag[] = {
163
  { "!--esi", &VEP_COMMENTESI },
164
  { "!---->", &VEP_NEXTTAG },
165
  { "!--",  &VEP_COMMENT },
166
  { "/esi:",  &VEP_ESIENDTAG },
167
  { "esi:", &VEP_ESITAG },
168
  { "![CDATA[", &VEP_CDATA },
169
  { NULL,   &VEP_NOTMYTAG }
170
};
171
172
/*---------------------------------------------------------------------*/
173
174
static struct vep_match vep_match_esi[] = {
175
  { "include",  &VEP_ESIINCLUDE },
176
  { "remove", &VEP_ESIREMOVE },
177
  { "comment",  &VEP_ESICOMMENT },
178
  { NULL,   &VEP_ESIBOGON }
179
};
180
181
/*---------------------------------------------------------------------*/
182
183
static struct vep_match vep_match_attr_include[] = {
184
  { "src=", &VEP_ATTRGETVAL },
185
  { "onerror=", &VEP_ATTRGETVAL },
186
  { NULL,   &VEP_SKIPATTR }
187
};
188
189
/*---------------------------------------------------------------------*/
190
191
static struct vep_match vep_match_bom[] = {
192
  { "\xeb\xbb\xbf", &VEP_START },
193
  { NULL,     &VEP_BOM }
194
};
195
196
/*--------------------------------------------------------------------
197
 * Report a parsing error
198
 */
199
200
static void
201
vep_error(const struct vep_state *vep, const char *p)
202
1.49k
{
203
1.49k
  VSC_C_main->esi_errors++;
204
1.49k
  VSLb(vep->vc->wrk->vsl, SLT_ESI_xmlerror, "ERR after %zd %s",
205
1.49k
       vep->o_last, p);
206
1.49k
}
207
208
/*--------------------------------------------------------------------
209
 * Report a parsing warning
210
 */
211
212
static void
213
vep_warn(const struct vep_state *vep, const char *p)
214
0
{
215
0
  VSC_C_main->esi_warnings++;
216
0
  VSLb(vep->vc->wrk->vsl, SLT_ESI_xmlerror, "WARN after %zd %s",
217
0
       vep->o_last, p);
218
0
}
219
220
/*---------------------------------------------------------------------
221
 * return match or NULL if more input needed.
222
 */
223
224
static struct vep_match *
225
vep_match(const struct vep_state *vep, const char *b, const char *e)
226
693k
{
227
693k
  struct vep_match *vm;
228
693k
  const char *q, *r;
229
230
693k
  AN(vep->match);
231
1.05M
  for (vm = vep->match; vm->match != NULL; vm++) {
232
998k
    assert(strlen(vm->match) <= sizeof (vep->tag));
233
998k
    r = b;
234
4.84M
    for (q = vm->match; *q != '\0' && r < e; q++, r++)
235
4.20M
      if (*q != *r)
236
363k
        break;
237
998k
    if (*q == '\0')
238
634k
      break;
239
363k
    if (r == e)
240
39
      return (NULL);
241
363k
  }
242
693k
  return (vm);
243
693k
}
244
245
/*---------------------------------------------------------------------
246
 *
247
 */
248
249
static void
250
vep_emit_len(const struct vep_state *vep, ssize_t l, int m8, int m16, int m64)
251
2.49M
{
252
2.49M
  uint8_t buf[9];
253
254
2.49M
  assert(l > 0);
255
2.49M
  if (l < 256) {
256
2.49M
    buf[0] = (uint8_t)m8;
257
2.49M
    buf[1] = (uint8_t)l;
258
2.49M
    assert((ssize_t)buf[1] == l);
259
2.49M
    VSB_bcat(vep->vsb, buf, 2);
260
2.49M
  } else if (l < 65536) {
261
632
    buf[0] = (uint8_t)m16;
262
632
    vbe16enc(buf + 1, (uint16_t)l);
263
632
    assert((ssize_t)vbe16dec(buf + 1) == l);
264
632
    VSB_bcat(vep->vsb, buf, 3);
265
632
  } else {
266
103
    buf[0] = (uint8_t)m64;
267
103
    vbe64enc(buf + 1, l);
268
103
    assert((ssize_t)vbe64dec(buf + 1) == l);
269
103
    VSB_bcat(vep->vsb, buf, 9);
270
103
  }
271
2.49M
}
272
273
static void
274
vep_emit_skip(const struct vep_state *vep, ssize_t l)
275
1.25M
{
276
277
1.25M
  vep_emit_len(vep, l, VEC_S1, VEC_S2, VEC_S8);
278
1.25M
}
279
280
static void
281
vep_emit_verbatim(const struct vep_state *vep, ssize_t l, ssize_t l_crc)
282
1.24M
{
283
1.24M
  uint8_t buf[4];
284
285
1.24M
  vep_emit_len(vep, l, VEC_V1, VEC_V2, VEC_V8);
286
1.24M
  if (vep->dogzip) {
287
0
    vep_emit_len(vep, l_crc, VEC_C1, VEC_C2, VEC_C8);
288
0
    vbe32enc(buf, vep->crc);
289
0
    VSB_bcat(vep->vsb, buf, sizeof buf);
290
0
  }
291
1.24M
}
292
293
static void
294
vep_emit_common(struct vep_state *vep, ssize_t l, enum vep_mark mark)
295
2.49M
{
296
297
2.49M
  assert(l >= 0);
298
2.49M
  if (l == 0)
299
768
    return;
300
2.49M
  assert(mark == SKIP || mark == VERBATIM);
301
2.49M
  if (mark == SKIP)
302
1.25M
    vep_emit_skip(vep, l);
303
1.24M
  else
304
1.24M
    vep_emit_verbatim(vep, l, vep->o_crc);
305
306
2.49M
  vep->crc = crc32(0L, Z_NULL, 0);
307
2.49M
  vep->o_crc = 0;
308
2.49M
  vep->o_total += l;
309
2.49M
}
310
311
/*---------------------------------------------------------------------
312
 *
313
 */
314
315
static void
316
vep_mark_common(struct vep_state *vep, const char *p, enum vep_mark mark)
317
2.57M
{
318
2.57M
  ssize_t l, lcb;
319
320
2.57M
  assert(mark == SKIP || mark == VERBATIM);
321
322
  /* The NO-OP case, no data, no pending data & no change of mode */
323
2.57M
  if (vep->last_mark == mark && p == vep->ver_p && vep->o_pending == 0)
324
0
    return;
325
326
  /*
327
   * If we changed mode, emit whatever the opposite mode
328
   * assembled before the pending bytes.
329
   */
330
331
2.57M
  if (vep->last_mark != mark && (vep->o_wait > 0 || vep->startup)) {
332
2.49M
    lcb = vep->cb(vep->vc, vep->cb_priv, 0,
333
2.49M
        mark == VERBATIM ? VGZ_RESET : VGZ_ALIGN);
334
2.49M
    vep_emit_common(vep, lcb - vep->o_last, vep->last_mark);
335
2.49M
    vep->o_last = lcb;
336
2.49M
    vep->o_wait = 0;
337
2.49M
  }
338
339
  /* Transfer pending bytes CRC into active mode CRC */
340
2.57M
  if (vep->o_pending) {
341
247
    (void)vep->cb(vep->vc, vep->cb_priv, vep->o_pending,
342
247
         VGZ_NORMAL);
343
247
    if (vep->o_crc == 0) {
344
64
      vep->crc = vep->crcp;
345
64
      vep->o_crc = vep->o_pending;
346
183
    } else {
347
183
      vep->crc = crc32_combine(vep->crc,
348
183
          vep->crcp, vep->o_pending);
349
183
      vep->o_crc += vep->o_pending;
350
183
    }
351
247
    vep->crcp = crc32(0L, Z_NULL, 0);
352
247
    vep->o_wait += vep->o_pending;
353
247
    vep->o_pending = 0;
354
247
  }
355
356
  /* * Process this bit of input */
357
2.57M
  AN(vep->ver_p);
358
2.57M
  l = p - vep->ver_p;
359
2.57M
  assert(l >= 0);
360
2.57M
  vep->crc = crc32(vep->crc, (const void*)vep->ver_p, l);
361
2.57M
  vep->o_crc += l;
362
2.57M
  vep->ver_p = p;
363
364
2.57M
  vep->o_wait += l;
365
2.57M
  vep->last_mark = mark;
366
2.57M
  (void)vep->cb(vep->vc, vep->cb_priv, l, VGZ_NORMAL);
367
2.57M
}
368
369
static void
370
vep_mark_verbatim(struct vep_state *vep, const char *p)
371
1.31M
{
372
373
1.31M
  vep_mark_common(vep, p, VERBATIM);
374
1.31M
  vep->nm_verbatim++;
375
1.31M
}
376
377
static void
378
vep_mark_skip(struct vep_state *vep, const char *p)
379
1.25M
{
380
381
1.25M
  vep_mark_common(vep, p, SKIP);
382
1.25M
  vep->nm_skip++;
383
1.25M
}
384
385
static void
386
vep_mark_pending(struct vep_state *vep, const char *p)
387
247
{
388
247
  ssize_t l;
389
390
247
  AN(vep->ver_p);
391
247
  l = p - vep->ver_p;
392
247
  assert(l > 0);
393
247
  vep->crcp = crc32(vep->crcp, (const void *)vep->ver_p, l);
394
247
  vep->ver_p = p;
395
396
247
  vep->o_pending += l;
397
247
  vep->nm_pending++;
398
247
}
399
400
/*---------------------------------------------------------------------
401
 */
402
403
static void v_matchproto_()
404
vep_do_comment(struct vep_state *vep, enum dowhat what)
405
0
{
406
0
  Debug("DO_COMMENT(%d)\n", what);
407
0
  assert(what == DO_TAG);
408
0
  if (!vep->emptytag)
409
0
    vep_error(vep, "ESI 1.0 <esi:comment> needs final '/'");
410
0
}
411
412
/*---------------------------------------------------------------------
413
 */
414
415
static void v_matchproto_()
416
vep_do_remove(struct vep_state *vep, enum dowhat what)
417
0
{
418
0
  Debug("DO_REMOVE(%d, end %d empty %d remove %d)\n",
419
0
      what, vep->endtag, vep->emptytag, vep->remove);
420
0
  assert(what == DO_TAG);
421
0
  if (vep->emptytag)
422
0
    vep_error(vep, "ESI 1.0 <esi:remove/> not legal");
423
0
  else if (vep->remove && !vep->endtag)
424
0
    vep_error(vep, "ESI 1.0 <esi:remove> already open");
425
0
  else if (!vep->remove && vep->endtag)
426
0
    vep_error(vep, "ESI 1.0 <esi:remove> not open");
427
0
  else
428
0
    vep->remove = !vep->endtag;
429
0
}
430
431
/*---------------------------------------------------------------------
432
 */
433
434
static void
435
include_attr_src(struct vep_state *vep)
436
0
{
437
0
  const char *p;
438
439
0
  if (vep->include_src != NULL) {
440
0
    vep_error(vep,
441
0
        "ESI 1.0 <esi:include> "
442
0
        "has multiple src= attributes");
443
0
    vep->state = VEP_TAGERROR;
444
0
    VSB_destroy(&vep->attr_vsb);
445
0
    VSB_destroy(&vep->include_src);
446
0
    return;
447
0
  }
448
0
  for (p = VSB_data(vep->attr_vsb); *p != '\0'; p++)
449
0
    if (vct_islws(*p))
450
0
      break;
451
0
  if (*p != '\0') {
452
0
    vep_error(vep,
453
0
        "ESI 1.0 <esi:include> "
454
0
        "has whitespace in src= attribute");
455
0
    vep->state = VEP_TAGERROR;
456
0
    VSB_destroy(&vep->attr_vsb);
457
0
    if (vep->include_src != NULL)
458
0
      VSB_destroy(&vep->include_src);
459
0
    return;
460
0
  }
461
0
  vep->include_src = vep->attr_vsb;
462
0
  vep->attr_vsb = NULL;
463
0
}
464
465
static void
466
include_attr_onerror(struct vep_state *vep)
467
0
{
468
469
0
  vep->include_continue = !strcmp("continue", VSB_data(vep->attr_vsb));
470
0
  VSB_destroy(&vep->attr_vsb);
471
0
}
472
473
static void v_matchproto_()
474
vep_do_include(struct vep_state *vep, enum dowhat what)
475
0
{
476
0
  const char *p, *q, *h;
477
0
  ssize_t l;
478
0
  char incl;
479
480
0
  Debug("DO_INCLUDE(%d)\n", what);
481
0
  if (what == DO_ATTR) {
482
0
    Debug("ATTR (%s) (%s)\n", vep->match_hit->match,
483
0
      VSB_data(vep->attr_vsb));
484
0
    if (!strcmp("src=", vep->match_hit->match)) {
485
0
      include_attr_src(vep);
486
0
      return;
487
0
    }
488
0
    if (!strcmp("onerror=", vep->match_hit->match)) {
489
0
      include_attr_onerror(vep);
490
0
      return;
491
0
    }
492
0
    WRONG("Unhandled <esi:include> attribute");
493
0
  }
494
0
  assert(what == DO_TAG);
495
0
  if (!vep->emptytag)
496
0
    vep_warn(vep, "ESI 1.0 <esi:include> lacks final '/'");
497
0
  if (vep->include_src == NULL) {
498
0
    vep_error(vep, "ESI 1.0 <esi:include> lacks src attr");
499
0
    return;
500
0
  }
501
502
  /*
503
   * Strictly speaking, we ought to spit out any piled up skip before
504
   * emitting the VEC for the include, but objectively that makes no
505
   * difference and robs us of a chance to collapse another skip into
506
   * this on so we don't do that.
507
   * However, we cannot tolerate any verbatim stuff piling up.
508
   * The mark_skip() before calling dostuff should have taken
509
   * care of that.  Make sure.
510
   */
511
0
  assert(vep->o_wait == 0 || vep->last_mark == SKIP);
512
  /* XXX: what if it contains NUL bytes ?? */
513
0
  p = VSB_data(vep->include_src);
514
0
  l = VSB_len(vep->include_src);
515
0
  h = 0;
516
517
0
  incl = vep->include_continue ? VEC_IC : VEC_IA;
518
519
0
  if (l > 7 && !memcmp(p, "http://", 7)) {
520
0
    h = p + 7;
521
0
    p = strchr(h, '/');
522
0
    if (p == NULL) {
523
0
      vep_error(vep,
524
0
          "ESI 1.0 <esi:include> invalid src= URL");
525
0
      vep->state = VEP_TAGERROR;
526
0
      AZ(vep->attr_vsb);
527
0
      VSB_destroy(&vep->include_src);
528
0
      return;
529
0
    }
530
0
    Debug("HOST <%.*s> PATH <%s>\n", (int)(p-h),h, p);
531
0
    VSB_printf(vep->vsb, "%c", incl);
532
0
    VSB_printf(vep->vsb, "Host: %.*s%c", (int)(p-h), h, 0);
533
0
  } else if (l > 8 && !memcmp(p, "https://", 8)) {
534
0
    if (!FEATURE(FEATURE_ESI_IGNORE_HTTPS)) {
535
0
      vep_warn(vep,
536
0
          "ESI 1.0 <esi:include> with https:// ignored");
537
0
      vep->state = VEP_TAGERROR;
538
0
      AZ(vep->attr_vsb);
539
0
      VSB_destroy(&vep->include_src);
540
0
      return;
541
0
    }
542
0
    vep_warn(vep,
543
0
        "ESI 1.0 <esi:include> https:// treated as http://");
544
0
    h = p + 8;
545
0
    p = strchr(h, '/');
546
0
    if (p == NULL) {
547
0
      vep_error(vep,
548
0
          "ESI 1.0 <esi:include> invalid src= URL");
549
0
      vep->state = VEP_TAGERROR;
550
0
      AZ(vep->attr_vsb);
551
0
      VSB_destroy(&vep->include_src);
552
0
      return;
553
0
    }
554
0
    VSB_printf(vep->vsb, "%c", incl);
555
0
    VSB_printf(vep->vsb, "Host: %.*s%c", (int)(p-h), h, 0);
556
0
  } else if (*p == '/') {
557
0
    VSB_printf(vep->vsb, "%c", incl);
558
0
    VSB_printf(vep->vsb, "%c", 0);
559
0
  } else {
560
0
    VSB_printf(vep->vsb, "%c", incl);
561
0
    VSB_printf(vep->vsb, "%c", 0);
562
    /* Look for the last / before a '?' */
563
0
    h = NULL;
564
0
    for (q = vep->url; *q && *q != '?'; q++)
565
0
      if (*q == '/')
566
0
        h = q;
567
0
    if (h == NULL)
568
0
      h = q + 1;
569
570
0
    Debug("INCL:: [%.*s]/[%s]\n",
571
0
        (int)(h - vep->url), vep->url, p);
572
0
    VSB_printf(vep->vsb, "%.*s/", (int)(h - vep->url), vep->url);
573
0
  }
574
0
  l -= (p - VSB_data(vep->include_src));
575
0
  for (q = p; *q != '\0'; ) {
576
0
    if (*q == '&') {
577
0
#define R(w,f,r)              \
578
0
      if (q + w <= p + l && !memcmp(q, f, w)) { \
579
0
        VSB_printf(vep->vsb, "%c", r);  \
580
0
        q += w;       \
581
0
        continue;     \
582
0
      }
583
0
      R(6, "&apos;", '\'');
584
0
      R(6, "&quot;", '"');
585
0
      R(4, "&lt;", '<');
586
0
      R(4, "&gt;", '>');
587
0
      R(5, "&amp;", '&');
588
0
    }
589
0
    VSB_printf(vep->vsb, "%c", *q++);
590
0
  }
591
0
#undef R
592
0
  VSB_printf(vep->vsb, "%c", 0);
593
0
  VSB_destroy(&vep->include_src);
594
0
  vep->include_continue = 0;
595
0
}
596
597
/*---------------------------------------------------------------------
598
 * Lex/Parse object for ESI instructions
599
 *
600
 * This function is called with the input object piecemeal so do not
601
 * assume that we have more than one char available at at time, but
602
 * optimize for getting huge chunks.
603
 *
604
 * NB: At the bottom of this source-file, there is a dot-diagram matching
605
 * NB: the state-machine.  Please maintain it along with the code.
606
 */
607
608
void
609
VEP_Parse(struct vep_state *vep, const char *p, size_t l)
610
768
{
611
768
  const char *e;
612
768
  struct vep_match *vm;
613
768
  int i;
614
615
768
  CHECK_OBJ_NOTNULL(vep, VEP_MAGIC);
616
768
  assert(l > 0);
617
618
768
  if (vep->startup) {
619
    /*
620
     * We must force the GZIP header out as a SKIP string,
621
     * otherwise an object starting with <esi:include would
622
     * have its GZIP header appear after the included object
623
     * (e000026.vtc)
624
     */
625
768
    vep->ver_p = "";
626
768
    vep->last_mark = SKIP;
627
768
    vep_mark_common(vep, vep->ver_p, VERBATIM);
628
768
    vep->startup = 0;
629
768
    AZ(vep->hack_p);
630
768
    vep->hack_p = p;
631
768
  }
632
633
768
  vep->ver_p = p;
634
635
768
  e = p + l;
636
637
2.77M
  while (p < e) {
638
2.77M
    AN(vep->state);
639
2.77M
    Debug("EP %s %d (%.*s) [%.*s]\n",
640
2.77M
        vep->state,
641
2.77M
        vep->remove,
642
2.77M
        vep->tag_i, vep->tag,
643
2.77M
        (e - p) > 10 ? 10 : (int)(e-p), p);
644
2.77M
    assert(p >= vep->ver_p);
645
646
    /******************************************************
647
     * SECTION A
648
     */
649
650
2.77M
    if (vep->state == VEP_START) {
651
2.71k
      if (FEATURE(FEATURE_ESI_REMOVE_BOM) &&
652
2.71k
          *p == (char)0xeb) {
653
1.99k
        vep->match = vep_match_bom;
654
1.99k
        vep->state = VEP_MATCH;
655
1.99k
      } else
656
717
        vep->state = VEP_BOM;
657
2.76M
    } else if (vep->state == VEP_BOM) {
658
745
      vep_mark_skip(vep, p);
659
745
      if (FEATURE(FEATURE_ESI_DISABLE_XML_CHECK))
660
638
        vep->state = VEP_NEXTTAG;
661
107
      else
662
107
        vep->state = VEP_TESTXML;
663
2.76M
    } else if (vep->state == VEP_TESTXML) {
664
      /*
665
       * If the first non-whitespace char is different
666
       * from '<' we assume this is not XML.
667
       */
668
362
      while (p < e && vct_islws(*p))
669
255
        p++;
670
107
      vep_mark_verbatim(vep, p);
671
107
      if (p < e && *p == '<') {
672
15
        p++;
673
15
        vep->state = VEP_STARTTAG;
674
92
      } else if (p < e && *p == (char)0xeb) {
675
7
        VSLb(vep->vc->wrk->vsl, SLT_ESI_xmlerror,
676
7
            "No ESI processing, "
677
7
            "first char not '<' but BOM."
678
7
            " (See feature esi_remove_bom)"
679
7
        );
680
7
        vep->state = VEP_NOTXML;
681
85
      } else if (p < e) {
682
76
        VSLb(vep->vc->wrk->vsl, SLT_ESI_xmlerror,
683
76
            "No ESI processing, "
684
76
            "first char not '<'."
685
76
            " (See feature esi_disable_xml_check)"
686
76
        );
687
76
        vep->state = VEP_NOTXML;
688
76
      }
689
2.76M
    } else if (vep->state == VEP_NOTXML) {
690
      /*
691
       * This is not recognized as XML, just skip thru
692
       * vfp_esi_end() will handle the rest
693
       */
694
83
      p = e;
695
83
      vep_mark_verbatim(vep, p);
696
697
    /******************************************************
698
     * SECTION B
699
     */
700
701
2.76M
    } else if (vep->state == VEP_NOTMYTAG) {
702
57.3k
      if (FEATURE(FEATURE_ESI_IGNORE_OTHER_ELEMENTS)) {
703
55.8k
        p++;
704
55.8k
        vep->state = VEP_NEXTTAG;
705
55.8k
      } else {
706
1.48k
        vep->tag_i = 0;
707
1.23M
        while (p < e) {
708
1.23M
          if (*p++ == '>') {
709
1.46k
            vep->state = VEP_NEXTTAG;
710
1.46k
            break;
711
1.46k
          }
712
1.23M
        }
713
1.48k
      }
714
57.3k
      if (p == e && !vep->remove)
715
55
        vep_mark_verbatim(vep, p);
716
2.71M
    } else if (vep->state == VEP_NEXTTAG) {
717
      /*
718
       * Hunt for start of next tag and keep an eye
719
       * out for end of EsiCmt if armed.
720
       */
721
690k
      vep->emptytag = 0;
722
690k
      vep->attr = NULL;
723
690k
      vep->dostuff = NULL;
724
14.6M
      while (p < e && *p != '<') {
725
13.9M
        if (vep->esicmt_p == NULL) {
726
11.2M
          p++;
727
11.2M
          continue;
728
11.2M
        }
729
2.64M
        if (*p != *vep->esicmt_p) {
730
769k
          p++;
731
769k
          vep->esicmt_p = vep->esicmt;
732
769k
          continue;
733
769k
        }
734
1.87M
        if (!vep->remove && vep->esicmt_p == vep->esicmt)
735
626k
          vep_mark_verbatim(vep, p);
736
1.87M
        p++;
737
1.87M
        if (*++vep->esicmt_p == '\0') {
738
623k
          vep->esi_found = 1;
739
623k
          vep->esicmt = NULL;
740
623k
          vep->esicmt_p = NULL;
741
          /*
742
           * The end of the esicmt
743
           * should not be emitted.
744
           * But the stuff before should
745
           */
746
623k
          vep_mark_skip(vep, p);
747
623k
        }
748
1.87M
      }
749
690k
      if (p < e) {
750
690k
        if (!vep->remove)
751
690k
          vep_mark_verbatim(vep, p);
752
690k
        assert(*p == '<');
753
690k
        p++;
754
690k
        vep->state = VEP_STARTTAG;
755
690k
      } else if (vep->esicmt_p == vep->esicmt && !vep->remove)
756
223
        vep_mark_verbatim(vep, p);
757
758
    /******************************************************
759
     * SECTION C
760
     */
761
762
2.01M
    } else if (vep->state == VEP_STARTTAG) {
763
      /* Start of tag, set up match table */
764
690k
      vep->endtag = 0;
765
690k
      vep->match = vep_match_starttag;
766
690k
      vep->state = VEP_MATCH;
767
1.32M
    } else if (vep->state == VEP_COMMENT) {
768
3.08k
      vep->esicmt_p = vep->esicmt = NULL;
769
3.08k
      vep->until_p = vep->until = "-->";
770
3.08k
      vep->until_s = VEP_NEXTTAG;
771
3.08k
      vep->state = VEP_UNTIL;
772
1.32M
    } else if (vep->state == VEP_COMMENTESI) {
773
625k
      if (vep->remove)
774
0
        vep_error(vep,
775
0
            "ESI 1.0 Nested <!--esi"
776
0
            " element in <esi:remove>");
777
625k
      vep->esicmt_p = vep->esicmt = "-->";
778
625k
      vep->state = VEP_NEXTTAG;
779
625k
      vep_mark_skip(vep, p);
780
701k
    } else if (vep->state == VEP_CDATA) {
781
      /*
782
       * Easy: just look for the end of CDATA
783
       */
784
487
      vep->until_p = vep->until = "]]>";
785
487
      vep->until_s = VEP_NEXTTAG;
786
487
      vep->state = VEP_UNTIL;
787
700k
    } else if (vep->state == VEP_ESIENDTAG) {
788
267
      vep->endtag = 1;
789
267
      vep->state = VEP_ESITAG;
790
700k
    } else if (vep->state == VEP_ESITAG) {
791
1.19k
      vep->in_esi_tag = 1;
792
1.19k
      vep->esi_found = 1;
793
1.19k
      vep_mark_skip(vep, p);
794
1.19k
      vep->match = vep_match_esi;
795
1.19k
      vep->state = VEP_MATCH;
796
699k
    } else if (vep->state == VEP_ESIINCLUDE) {
797
0
      if (vep->remove) {
798
0
        vep_error(vep,
799
0
            "ESI 1.0 <esi:include> element"
800
0
            " nested in <esi:remove>");
801
0
        vep->state = VEP_TAGERROR;
802
0
      } else if (vep->endtag) {
803
0
        vep_error(vep,
804
0
            "ESI 1.0 </esi:include> illegal end-tag");
805
0
        vep->state = VEP_TAGERROR;
806
0
      } else {
807
0
        vep->dostuff = vep_do_include;
808
0
        vep->state = VEP_INTAG;
809
0
        vep->attr = vep_match_attr_include;
810
0
      }
811
699k
    } else if (vep->state == VEP_ESIREMOVE) {
812
0
      vep->dostuff = vep_do_remove;
813
0
      vep->state = VEP_INTAG;
814
699k
    } else if (vep->state == VEP_ESICOMMENT) {
815
0
      if (vep->remove) {
816
0
        vep_error(vep,
817
0
            "ESI 1.0 <esi:comment> element"
818
0
            " nested in <esi:remove>");
819
0
        vep->state = VEP_TAGERROR;
820
0
      } else if (vep->endtag) {
821
0
        vep_error(vep,
822
0
            "ESI 1.0 </esi:comment> illegal end-tag");
823
0
        vep->state = VEP_TAGERROR;
824
0
      } else {
825
0
        vep->dostuff = vep_do_comment;
826
0
        vep->state = VEP_INTAG;
827
0
      }
828
699k
    } else if (vep->state == VEP_ESIBOGON) {
829
1.19k
      vep_error(vep,
830
1.19k
          "ESI 1.0 <esi:bogus> element");
831
1.19k
      vep->state = VEP_TAGERROR;
832
833
    /******************************************************
834
     * SECTION D
835
     */
836
837
698k
    } else if (vep->state == VEP_INTAG) {
838
0
      vep->tag_i = 0;
839
0
      while (p < e && vct_islws(*p) && !vep->emptytag) {
840
0
        p++;
841
0
        vep->canattr = 1;
842
0
      }
843
0
      if (p < e && *p == '/' && !vep->emptytag) {
844
0
        p++;
845
0
        vep->emptytag = 1;
846
0
        vep->canattr = 0;
847
0
      }
848
0
      if (p < e && *p == '>') {
849
0
        p++;
850
0
        AN(vep->dostuff);
851
0
        vep_mark_skip(vep, p);
852
0
        vep->dostuff(vep, DO_TAG);
853
0
        vep->in_esi_tag = 0;
854
0
        vep->state = VEP_NEXTTAG;
855
0
      } else if (p < e && vep->emptytag) {
856
0
        vep_error(vep,
857
0
            "XML 1.0 '>' does not follow '/' in tag");
858
0
        vep->state = VEP_TAGERROR;
859
0
      } else if (p < e && vep->canattr &&
860
0
          vct_isxmlnamestart(*p)) {
861
0
        vep->state = VEP_ATTR;
862
0
      } else if (p < e) {
863
0
        vep_error(vep,
864
0
            "XML 1.0 Illegal attribute start char");
865
0
        vep->state = VEP_TAGERROR;
866
0
      }
867
698k
    } else if (vep->state == VEP_TAGERROR) {
868
1.91M
      while (p < e && *p != '>')
869
1.91M
        p++;
870
1.19k
      if (p < e) {
871
1.15k
        p++;
872
1.15k
        vep_mark_skip(vep, p);
873
1.15k
        vep->in_esi_tag = 0;
874
1.15k
        vep->state = VEP_NEXTTAG;
875
1.15k
        if (vep->attr_vsb)
876
0
          VSB_destroy(&vep->attr_vsb);
877
1.15k
      }
878
879
    /******************************************************
880
     * SECTION E
881
     */
882
883
696k
    } else if (vep->state == VEP_ATTR) {
884
0
      AZ(vep->attr_delim);
885
0
      if (vep->attr == NULL) {
886
0
        p++;
887
0
        AZ(vep->attr_vsb);
888
0
        vep->state = VEP_SKIPATTR;
889
0
      } else {
890
0
        vep->match = vep->attr;
891
0
        vep->state = VEP_MATCH;
892
0
      }
893
696k
    } else if (vep->state == VEP_SKIPATTR) {
894
0
      while (p < e && vct_isxmlname(*p))
895
0
        p++;
896
0
      if (p < e && *p == '=') {
897
0
        p++;
898
0
        vep->state = VEP_ATTRDELIM;
899
0
      } else if (p < e && *p == '>') {
900
0
        vep->state = VEP_INTAG;
901
0
      } else if (p < e && *p == '/') {
902
0
        vep->state = VEP_INTAG;
903
0
      } else if (p < e && vct_issp(*p)) {
904
0
        vep->state = VEP_INTAG;
905
0
      } else if (p < e) {
906
0
        vep_error(vep,
907
0
            "XML 1.0 Illegal attr char");
908
0
        vep->state = VEP_TAGERROR;
909
0
      }
910
696k
    } else if (vep->state == VEP_ATTRGETVAL) {
911
0
      AZ(vep->attr_vsb);
912
0
      vep->attr_vsb = VSB_new_auto();
913
0
      vep->state = VEP_ATTRDELIM;
914
696k
    } else if (vep->state == VEP_ATTRDELIM) {
915
0
      AZ(vep->attr_delim);
916
0
      if (*p == '"' || *p == '\'') {
917
0
        vep->attr_delim = *p++;
918
0
        vep->state = VEP_ATTRVAL;
919
0
      } else if (!vct_issp(*p)) {
920
0
        vep->attr_delim = ' ';
921
0
        vep->state = VEP_ATTRVAL;
922
0
      } else {
923
0
        vep_error(vep,
924
0
            "XML 1.0 Illegal attribute delimiter");
925
0
        vep->state = VEP_TAGERROR;
926
0
      }
927
928
696k
    } else if (vep->state == VEP_ATTRVAL) {
929
0
      while (p < e && *p != '>' && *p != vep->attr_delim &&
930
0
         (vep->attr_delim != ' ' || !vct_issp(*p))) {
931
0
        if (vep->attr_vsb != NULL)
932
0
          VSB_putc(vep->attr_vsb, *p);
933
0
        p++;
934
0
      }
935
0
      if (p < e && *p == '>') {
936
0
        vep_error(vep,
937
0
            "XML 1.0 Missing end attribute delimiter");
938
0
        vep->state = VEP_TAGERROR;
939
0
        vep->attr_delim = 0;
940
0
        if (vep->attr_vsb != NULL) {
941
0
          AZ(VSB_finish(vep->attr_vsb));
942
0
          VSB_destroy(&vep->attr_vsb);
943
0
        }
944
0
      } else if (p < e) {
945
0
        vep->attr_delim = 0;
946
0
        p++;
947
0
        vep->state = VEP_INTAG;
948
0
        if (vep->attr_vsb != NULL) {
949
0
          AZ(VSB_finish(vep->attr_vsb));
950
0
          AN(vep->dostuff);
951
0
          vep->dostuff(vep, DO_ATTR);
952
0
          vep->attr_vsb = NULL;
953
0
        }
954
0
      }
955
956
    /******************************************************
957
     * Utility Section
958
     */
959
960
696k
    } else if (vep->state == VEP_MATCH) {
961
      /*
962
       * Match against a table
963
       */
964
693k
      vm = vep_match(vep, p, e);
965
693k
      vep->match_hit = vm;
966
693k
      if (vm != NULL) {
967
693k
        if (vm->match != NULL)
968
634k
          p += strlen(vm->match);
969
693k
        vep->state = *vm->state;
970
693k
        vep->match = NULL;
971
693k
        vep->tag_i = 0;
972
693k
      } else {
973
39
        assert(p + sizeof(vep->tag) >= e);
974
39
        memcpy(vep->tag, p, e - p);
975
39
        vep->tag_i = e - p;
976
39
        vep->state = VEP_MATCHBUF;
977
39
        p = e;
978
39
      }
979
693k
    } else if (vep->state == VEP_MATCHBUF) {
980
      /*
981
       * Match against a table while split over input
982
       * sections.
983
       */
984
0
      AN(vep->match);
985
0
      i = sizeof(vep->tag) - vep->tag_i;
986
0
      if (i > e - p)
987
0
        i = e - p;
988
0
      memcpy(vep->tag + vep->tag_i, p, i);
989
0
      vm = vep_match(vep, vep->tag,
990
0
          vep->tag + vep->tag_i + i);
991
0
      Debug("MB (%.*s) tag_i %d i %d = vm %p match %s\n",
992
0
          vep->tag_i + i, vep->tag,
993
0
          vep->tag_i,
994
0
          i,
995
0
          vm,
996
0
          vm ? vm->match : "(nil)");
997
998
0
      if (vm == NULL) {
999
0
        vep->tag_i += i;
1000
0
        p += i;
1001
0
        assert(p == e);
1002
0
      } else {
1003
0
        vep->match_hit = vm;
1004
0
        vep->state = *vm->state;
1005
0
        if (vm->match != NULL) {
1006
0
          i = strlen(vm->match);
1007
0
          if (i > vep->tag_i)
1008
0
            p += i - vep->tag_i;
1009
0
        }
1010
0
        vep->match = NULL;
1011
0
        vep->tag_i = 0;
1012
0
      }
1013
3.56k
    } else if (vep->state == VEP_UNTIL) {
1014
      /*
1015
       * Skip until we see magic string
1016
       */
1017
14.6M
      while (p < e) {
1018
14.6M
        if (*p++ != *vep->until_p++) {
1019
14.6M
          vep->until_p = vep->until;
1020
14.6M
        } else if (*vep->until_p == '\0') {
1021
3.48k
          vep->state = vep->until_s;
1022
3.48k
          break;
1023
3.48k
        }
1024
14.6M
      }
1025
3.56k
      if (p == e && !vep->remove)
1026
96
        vep_mark_verbatim(vep, p);
1027
3.56k
    } else {
1028
0
      Debug("*** Unknown state %s\n", vep->state);
1029
0
      WRONG("WRONG ESI PARSER STATE");
1030
0
    }
1031
2.77M
  }
1032
  /*
1033
   * We must always mark up the storage we got, try to do so
1034
   * in the most efficient way, in particular with respect to
1035
   * minimizing and limiting use of pending.
1036
   */
1037
768
  if (p == vep->ver_p)
1038
479
    ;
1039
289
  else if (vep->in_esi_tag)
1040
42
    vep_mark_skip(vep, p);
1041
247
  else if (vep->remove)
1042
0
    vep_mark_skip(vep, p);
1043
247
  else
1044
247
    vep_mark_pending(vep, p);
1045
768
}
1046
1047
/*---------------------------------------------------------------------
1048
 */
1049
1050
static ssize_t v_matchproto_(vep_callback_t)
1051
vep_default_cb(struct vfp_ctx *vc, void *priv, ssize_t l, enum vgz_flag flg)
1052
5.06M
{
1053
5.06M
  ssize_t *s;
1054
1055
5.06M
  CHECK_OBJ_NOTNULL(vc, VFP_CTX_MAGIC);
1056
5.06M
  AN(priv);
1057
5.06M
  s = priv;
1058
5.06M
  *s += l;
1059
5.06M
  (void)flg;
1060
5.06M
  return (*s);
1061
5.06M
}
1062
1063
/*---------------------------------------------------------------------
1064
 */
1065
1066
struct vep_state *
1067
VEP_Init(struct vfp_ctx *vc, const struct http *req, vep_callback_t *cb,
1068
    void *cb_priv)
1069
768
{
1070
768
  struct vep_state *vep;
1071
1072
768
  CHECK_OBJ_NOTNULL(vc, VFP_CTX_MAGIC);
1073
768
  CHECK_OBJ_NOTNULL(req, HTTP_MAGIC);
1074
768
  vep = WS_Alloc(vc->resp->ws, sizeof *vep);
1075
768
  if (vep == NULL) {
1076
0
    VSLb(vc->wrk->vsl, SLT_VCL_Error,
1077
0
         "VEP_Init() workspace overflow");
1078
0
    return (NULL);
1079
0
  }
1080
1081
768
  INIT_OBJ(vep, VEP_MAGIC);
1082
768
  vep->url = req->hd[HTTP_HDR_URL].b;
1083
768
  vep->vc = vc;
1084
768
  vep->vsb = VSB_new_auto();
1085
768
  AN(vep->vsb);
1086
1087
768
  if (cb != NULL) {
1088
0
    vep->dogzip = 1;
1089
    /* XXX */
1090
0
    VSB_printf(vep->vsb, "%c", VEC_GZ);
1091
0
    vep->cb = cb;
1092
0
    vep->cb_priv = cb_priv;
1093
768
  } else {
1094
768
    vep->cb = vep_default_cb;
1095
768
    vep->cb_priv = &vep->cb_x;
1096
768
  }
1097
1098
768
  vep->state = VEP_START;
1099
768
  vep->crc = crc32(0L, Z_NULL, 0);
1100
768
  vep->crcp = crc32(0L, Z_NULL, 0);
1101
1102
768
  vep->startup = 1;
1103
768
  return (vep);
1104
768
}
1105
1106
/*---------------------------------------------------------------------
1107
 */
1108
1109
struct vsb *
1110
VEP_Finish(struct vep_state *vep)
1111
768
{
1112
768
  ssize_t l, lcb;
1113
1114
768
  CHECK_OBJ_NOTNULL(vep, VEP_MAGIC);
1115
1116
768
  if (vep->include_src)
1117
0
    VSB_destroy(&vep->include_src);
1118
768
  if (vep->attr_vsb)
1119
0
    VSB_destroy(&vep->attr_vsb);
1120
1121
768
  if (vep->state != VEP_START &&
1122
768
      vep->state != VEP_BOM &&
1123
768
      vep->state != VEP_TESTXML &&
1124
768
      vep->state != VEP_NOTXML &&
1125
768
      vep->state != VEP_NEXTTAG) {
1126
303
    vep_error(vep, "VEP ended inside a tag");
1127
303
  }
1128
1129
768
  if (vep->o_pending)
1130
247
    vep_mark_common(vep, vep->ver_p, vep->last_mark);
1131
768
  if (vep->o_wait > 0) {
1132
736
    lcb = vep->cb(vep->vc, vep->cb_priv, 0, VGZ_ALIGN);
1133
736
    vep_emit_common(vep, lcb - vep->o_last, vep->last_mark);
1134
736
  }
1135
  // NB: We don't account for PAD+SUM+LEN in gzipped objects
1136
768
  (void)vep->cb(vep->vc, vep->cb_priv, 0, VGZ_FINISH);
1137
1138
768
  AZ(VSB_finish(vep->vsb));
1139
768
  l = VSB_len(vep->vsb);
1140
768
  if (vep->esi_found && l > 0)
1141
220
    return (vep->vsb);
1142
548
  VSB_destroy(&vep->vsb);
1143
548
  return (NULL);
1144
768
}
1145
1146
#if 0
1147
1148
digraph xml {
1149
  rankdir="LR"
1150
  size="7,10"
1151
#################################################################
1152
# SECTION A
1153
#
1154
1155
START   [shape=ellipse]
1156
TESTXML   [shape=ellipse]
1157
NOTXML    [shape=ellipse]
1158
NEXTTAGa  [shape=hexagon, label="NEXTTAG"]
1159
STARTTAGa [shape=hexagon, label="STARTTAG"]
1160
START   -> TESTXML
1161
START   -> NEXTTAGa [style=dotted, label="syntax:1"]
1162
TESTXML   -> TESTXML  [label="lws"]
1163
TESTXML   -> NOTXML
1164
TESTXML   -> STARTTAGa  [label="'<'"]
1165
1166
#################################################################
1167
# SECTION B
1168
1169
NOTMYTAG  [shape=ellipse]
1170
NEXTTAG   [shape=ellipse]
1171
NOTMYTAG  -> NEXTTAG  [style=dotted, label="syntax:2"]
1172
STARTTAGb [shape=hexagon, label="STARTTAG"]
1173
NOTMYTAG  -> NEXTTAG  [label="'>'"]
1174
NOTMYTAG  -> NOTMYTAG [label="*"]
1175
NEXTTAG   -> NEXTTAG  [label="'-->'"]
1176
NEXTTAG   -> NEXTTAG  [label="*"]
1177
NEXTTAG   -> STARTTAGb  [label="'<'"]
1178
1179
#################################################################
1180
# SECTION C
1181
1182
STARTTAG  [shape=ellipse]
1183
COMMENT   [shape=ellipse]
1184
CDATA   [shape=ellipse]
1185
ESITAG    [shape=ellipse]
1186
ESIETAG   [shape=ellipse]
1187
ESIINCLUDE  [shape=ellipse]
1188
ESIREMOVE [shape=ellipse]
1189
ESICOMMENT  [shape=ellipse]
1190
ESIBOGON  [shape=ellipse]
1191
INTAGc    [shape=hexagon, label="INTAG"]
1192
NOTMYTAGc [shape=hexagon, label="NOTMYTAG"]
1193
NEXTTAGc  [shape=hexagon, label="NEXTTAG"]
1194
TAGERRORc [shape=hexagon, label="TAGERROR"]
1195
C1    [shape=circle,label=""]
1196
STARTTAG  -> COMMENT  [label="'<!--'"]
1197
STARTTAG  -> ESITAG [label="'<esi'"]
1198
STARTTAG  -> CDATA  [label="'<![CDATA['"]
1199
STARTTAG  -> NOTMYTAGc  [label="'*'"]
1200
COMMENT   -> NEXTTAGc [label="'esi'"]
1201
COMMENT   -> C1   [label="*"]
1202
C1    -> C1   [label="*"]
1203
C1    -> NEXTTAGc [label="-->"]
1204
CDATA   -> CDATA  [label="*"]
1205
CDATA   -> NEXTTAGc [label="]]>"]
1206
ESITAG    -> ESIINCLUDE [label="'include'"]
1207
ESITAG    -> ESIREMOVE  [label="'remove'"]
1208
ESITAG    -> ESICOMMENT [label="'comment'"]
1209
ESITAG    -> ESIBOGON [label="*"]
1210
ESICOMMENT  -> INTAGc
1211
ESICOMMENT  -> TAGERRORc
1212
ESICOMMENT  -> TAGERRORc  [style=dotted, label="nested\nin\nremove"]
1213
ESIREMOVE -> INTAGc
1214
ESIREMOVE -> TAGERRORc
1215
ESIINCLUDE  -> INTAGc
1216
ESIINCLUDE  -> TAGERRORc
1217
ESIINCLUDE  -> TAGERRORc  [style=dotted, label="nested\nin\nremove"]
1218
ESIBOGON  -> TAGERRORc
1219
1220
#################################################################
1221
# SECTION D
1222
1223
INTAG   [shape=ellipse]
1224
TAGERROR  [shape=ellipse]
1225
NEXTTAGd  [shape=hexagon, label="NEXTTAG"]
1226
ATTRd   [shape=hexagon, label="ATTR"]
1227
D1    [shape=circle, label=""]
1228
D2    [shape=circle, label=""]
1229
INTAG   -> D1   [label="lws"]
1230
D1    -> D2   [label="/"]
1231
INTAG   -> D2   [label="/"]
1232
INTAG   -> NEXTTAGd [label=">"]
1233
D1    -> NEXTTAGd [label=">"]
1234
D2    -> NEXTTAGd [label=">"]
1235
D1    -> ATTRd  [label="XMLstartchar"]
1236
D1    -> TAGERROR [label="*"]
1237
D2    -> TAGERROR [label="*"]
1238
TAGERROR  -> TAGERROR [label="*"]
1239
TAGERROR  -> NEXTTAGd [label="'>'"]
1240
1241
#################################################################
1242
# SECTION E
1243
1244
ATTR    [shape=ellipse]
1245
SKIPATTR  [shape=ellipse]
1246
ATTRGETVAL  [shape=ellipse]
1247
ATTRDELIM [shape=ellipse]
1248
ATTRVAL   [shape=ellipse]
1249
TAGERRORe [shape=hexagon, label="TAGERROR"]
1250
INTAGe    [shape=hexagon, label="INTAG"]
1251
ATTR    -> SKIPATTR [label="*"]
1252
ATTR    -> ATTRGETVAL [label="wanted attr"]
1253
SKIPATTR  -> SKIPATTR [label="XMLname"]
1254
SKIPATTR  -> ATTRDELIM  [label="'='"]
1255
SKIPATTR  -> TAGERRORe  [label="*"]
1256
ATTRGETVAL  -> ATTRDELIM
1257
ATTRDELIM -> ATTRVAL  [label="\""]
1258
ATTRDELIM -> ATTRVAL  [label="\'"]
1259
ATTRDELIM -> ATTRVAL  [label="*"]
1260
ATTRDELIM -> TAGERRORe  [label="lws"]
1261
ATTRVAL   -> TAGERRORe  [label="'>'"]
1262
ATTRVAL   -> INTAGe [label="delim"]
1263
ATTRVAL   -> ATTRVAL  [label="*"]
1264
1265
}
1266
1267
#endif