Scippy

SCIP

Solving Constraint Integer Programs

xmlparse.c
Go to the documentation of this file.
1 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
2 /* */
3 /* This file is part of the program and library */
4 /* SCIP --- Solving Constraint Integer Programs */
5 /* */
6 /* Copyright (C) 2002-2014 Konrad-Zuse-Zentrum */
7 /* fuer Informationstechnik Berlin */
8 /* */
9 /* SCIP is distributed under the terms of the ZIB Academic License. */
10 /* */
11 /* You should have received a copy of the ZIB Academic License */
12 /* along with SCIP; see the file COPYING. If not email to scip@zib.de. */
13 /* */
14 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
15 
16 /**@file xmldef.h
17  * @brief declarations for XML parsing
18  * @author Thorsten Koch
19  * @author Marc Pfetsch
20  *
21  * If SPEC_LIKE_SPACE_HANDLING is not defined, all LF,CR will be changed into spaces and from a
22  * sequence of spaces only one will be used.
23  *
24  * @todo Implement possibility to avoid the construction of parsing information for certain tags
25  * (and their children). For solution files this would avoid parsing the constraints section.
26  */
27 
28 /*---+----1----+----2----+----3----+----4----+----5----+----6----+----7----+----8----+----9----+----0----+----1----+----2*/
29 
30 #include <blockmemshell/memory.h>
31 
32 #include "xml.h"
33 #include "xmldef.h"
34 
35 
36 #include <sys/types.h>
37 #ifdef WITH_ZLIB
38 #include <unistd.h>
39 #endif
40 #include <stdio.h>
41 #include <stdlib.h>
42 #include <assert.h>
43 #include <ctype.h>
44 #include <string.h>
45 
46 
47 #define NAME_EXT_SIZE 128
48 #define ATTR_EXT_SIZE 4096
49 #define DATA_EXT_SIZE 4096
50 #define LINE_BUF_SIZE 8192
51 
52 #define xmlError(a, b) xmlErrmsg(a, b, FALSE, __FILE__, __LINE__)
53 
54 
55 /* forward declarations */
56 typedef struct parse_stack_struct PSTACK;
57 typedef struct parse_pos_struct PPOS;
58 
59 /** state of the parser */
61 {
67 };
68 typedef enum parse_state_enum PSTATE;
69 
70 /** Stack as a (singly) linked list. The top element is the current node. */
72 {
75 };
76 
77 /** Store the current position in the file and the state of the parser. */
79 {
80  const char* filename;
83  int pos;
84  int lineno;
85  int nextsym;
86  int lastsym;
89 };
90 
91 
92 /** output error message with corresponding line and position */
93 static void xmlErrmsg(
94  PPOS* ppos,
95  const char* msg,
96  XML_Bool msg_only,
97  const char* file,
98  int line
99  )
100 {
101 #ifndef NDEBUG
102  int ret;
103  assert( ppos != NULL );
104 
105  if ( ! msg_only )
106  {
107  ret = fprintf(stderr, "%s(%d) Error in file %s line %d\n", file, line, ppos->filename, ppos->lineno);
108  assert(ret >= 0);
109 
110  ret = fprintf(stderr, "%s", ppos->buf);
111  assert(ret >= 0);
112 
113  if ( strchr(ppos->buf, '\n') == NULL )
114  {
115  int retc;
116 
117  retc = fputc('\n', stderr);
118  assert(retc != EOF);
119  }
120 
121  ret = fprintf(stderr, "%*s\n", ppos->pos, "^");
122  assert(ret >= 0);
123  }
124  ret = fprintf(stderr, "%s\n\n", msg);
125  assert(ret >= 0);
126 
127 #else
128 
129  if ( ! msg_only )
130  {
131  (void) fprintf(stderr, "%s(%d) Error in file %s line %d\n", file, line, ppos->filename, ppos->lineno);
132 
133  (void) fprintf(stderr, "%s", ppos->buf);
134 
135  if ( strchr(ppos->buf, '\n') == NULL )
136  {
137  (void) fputc('\n', stderr);
138  }
139 
140  (void) fprintf(stderr, "%*s\n", ppos->pos, "^");
141  }
142  (void) fprintf(stderr, "%s\n\n", msg);
143 #endif
144 }
145 
146 
147 /** Push new element on the parse stack.
148  *
149  * TRUE if it worked, FAILURE otherwise.
150  */
151 static
153  PPOS* ppos,
154  XML_NODE* node
155  )
156 {
157  PSTACK* p;
158 
159  assert(ppos != NULL);
160  assert(node != NULL);
161 
162  debugMessage("Pushing %s\n", node->name);
163 
165  assert(p != NULL);
166 
167  p->node = node;
168  p->next = ppos->top;
169  ppos->top = p;
170 
171  return TRUE;
172 }
173 
174 /** returns top element on stack (which has to be present) */
176  const PPOS* ppos
177  )
178 {
179  assert(ppos != NULL);
180  assert(ppos->top != NULL);
181 
182  return ppos->top->node;
183 }
184 
185 /** remove top element from stack and deletes it
186  *
187  * TRUE if ok, FALSE otherwise
188  */
189 static
191  PPOS* ppos /**< input stream position */
192  )
193 {
194  PSTACK* p;
195  XML_Bool result;
196 
197  assert(ppos != NULL);
198 
199  if ( ppos->top == NULL )
200  {
201  xmlError(ppos, "Stack underflow");
202  result = FALSE;
203  }
204  else
205  {
206  result = TRUE;
207  p = ppos->top;
208  ppos->top = p->next;
209 
210  debugMessage("Poping %s\n", p->node->name);
211  BMSfreeMemory(&p);
212  }
213  return result;
214 }
215 
216 /** remove complete stack */
217 static
219  PPOS* ppos
220  )
221 {
222  assert(ppos != NULL);
223 
224  while ( ppos->top != NULL )
225  (void) popPstack(ppos);
226 }
227 
228 /** Returns the next character from the input buffer and fills the buffer if it is empty (similar to fgetc()). */
229 static
230 int mygetc(
231  PPOS* ppos
232  )
233 {
234  assert(ppos != NULL);
235  assert(ppos->fp != NULL);
236  assert(ppos->pos < LINE_BUF_SIZE);
237 
238  if ( ppos->buf[ppos->pos] == '\0' )
239  {
240 #ifdef SCIP_DISABLED_CODE
241  /* the low level function gzread/fread used below seem to be faster */
242  if ( NULL == FGETS(ppos->buf, sizeof(ppos->buf), ppos->fp) )
243  return EOF;
244 #else
245  size_t len = (size_t) FREAD(ppos->buf, sizeof(ppos->buf) - 1, ppos->fp); /*lint !e571 !e747*/
246 
247  if( len == 0 || len > sizeof(ppos->buf) - 1 )
248  return EOF;
249 
250  ppos->buf[len] = '\0';
251 #endif
252  ppos->pos = 0;
253  }
254  return (unsigned char)ppos->buf[ppos->pos++];
255 }
256 
257 
258 #ifdef SPEC_LIKE_SPACE_HANDLING
259 /** Read input from fp_in.
260  *
261  * If there is a LF, CR, CR/LF, or LF/CR it returns exactly on LF. Also counts the number of
262  * characters.
263  */
264 static
265 int getsymbol(
266  PPOS* ppos
267  )
268 {
269  int c;
270 
271  assert(ppos != NULL);
272 
273  if ( ppos->nextsym == 0 )
274  c = mygetc(ppos);
275  else
276  {
277  c = ppos->nextsym;
278  ppos->nextsym = 0;
279  }
280  assert(ppos->nextsym == 0);
281 
282  if (((c == '\n') && (ppos->lastsym == '\r')) || ((c == '\r') && (ppos->lastsym == '\n')))
283  c = mygetc(ppos);
284 
285  ppos->lastsym = c;
286 
287  if ( c == '\r' )
288  c = '\n';
289 
290  if ( c == '\n' )
291  ++ppos->lineno;
292 
293  return c;
294 }
295 #else
296 /** Read input from fp_in (variant).
297  *
298  * Here we convert all LF or CR into SPACE and return maximally one SPACE after the other.
299  *
300  * @note This function counts lines differently. On systems that have only one '\\r' as line feed
301  * (MAC) it does not count correctly.
302  */
303 static
305  PPOS* ppos
306  )
307 {
308  int c;
309 
310  assert(ppos != NULL);
311 
312  do
313  {
314  if ( ppos->nextsym == 0 )
315  c = mygetc(ppos);
316  else
317  {
318  c = ppos->nextsym;
319  ppos->nextsym = 0;
320  }
321  assert(ppos->nextsym == 0);
322 
323  if ( c == '\n' )
324  ++ppos->lineno;
325 
326  if ((c == '\n') || (c == '\r'))
327  c = ' ';
328  } while((c == ' ') && (ppos->lastsym == c));
329 
330  ppos->lastsym = c;
331 
332  debugMessage("[%c]\n", c);
333 
334  return c;
335 }
336 #endif
337 
338 /** Reinserts a character into the input stream */
339 static
341  PPOS* ppos,
342  int c
343  )
344 {
345  assert(ppos != NULL);
346  assert(ppos->nextsym == 0);
347 
348  ppos->nextsym = c;
349 }
350 
351 /** Skip all spaces and return the next non-space character or EOF */
352 static
354  PPOS* ppos
355  )
356 {
357  int c;
358 
359  assert(ppos != NULL);
360 
361  do
362  {
363  c = getsymbol(ppos);
364  }
365  while(isspace(c));
366 
367  return c;
368 }
369 
370 /** Get name of a TAG or attribute from the input stream.
371  *
372  * Either it returns a pointer to allocated memory which contains the name or it returns NULL if
373  * there is some error.
374  */
375 static
376 char* getName(
377  PPOS* ppos
378  )
379 {
380  char* name = NULL;
381  size_t size = 0;
382  size_t len = 0;
383  int c;
384 
385  assert(ppos != NULL);
386 
387  c = getsymbol(ppos);
388 
389  if ( ! isalpha(c) && (c != '_') && (c != ':') )
390  {
391  xmlError(ppos, "Name starting with illegal charater");
392  return NULL;
393  }
394 
395  /* The following is wrong: Here almost all characters that we casted to unicode are feasible */
396  while ( isalnum(c) || (c == '_') || (c == ':') || (c == '.') || (c == '-') )
397  {
398  if ( len + 1 >= size )
399  {
400  size += NAME_EXT_SIZE;
401 
402  if ( name == NULL )
403  {
404  ALLOC_ABORT( BMSallocMemoryArray(&name, size) );
405  }
406  else
407  {
408  ALLOC_ABORT( BMSreallocMemoryArray(&name, size) );
409  }
410  }
411  assert(name != NULL);
412  assert(size > len);
413 
414  name[len++] = (char)c;
415 
416  c = getsymbol(ppos);
417  }
418  if ( c != EOF )
419  ungetsymbol(ppos, c);
420 
421  assert(name != NULL);
422 
423  if ( len == 0 )
424  {
425  BMSfreeMemoryArray(&name);
426  name = NULL;
427  }
428  else
429  name[len] = '\0';
430 
431  return name;
432 }
433 
434 /** Read the value of an attribute from the input stream.
435  *
436  * The value has to be between two " or ' (the other character is then valid as well). The function
437  * returns a pointer to allocated memory containing the value or it returns NULL in case of an
438  * error.
439  */
440 static
442  PPOS* ppos
443  )
444 {
445  char* attr = NULL;
446  int c;
447  int stop;
448  size_t len = 0;
449  size_t size = 0;
450 
451  assert(ppos != NULL);
452 
453  /* The following is not allowed according to the specification (the value has to be directly
454  * after the equation sign). */
455  c = skipSpace(ppos);
456 
457  if ( (c != '"') && (c != '\'') )
458  {
459  xmlError(ppos, "Atribute value does not start with \" or \'");
460  return NULL;
461  }
462  stop = c;
463 
464  for(;;)
465  {
466  if ( len == size )
467  {
468  size += ATTR_EXT_SIZE;
469 
470  if ( attr == NULL )
471  {
472  ALLOC_ABORT( BMSallocMemoryArray(&attr, size) );
473  }
474  else
475  {
476  ALLOC_ABORT( BMSreallocMemoryArray(&attr, size) );
477  }
478  }
479  assert(attr != NULL);
480  assert(size > len);
481 
482  c = getsymbol(ppos);
483 
484  if ( (c == stop) || (c == EOF) )
485  break;
486 
487  attr[len++] = (char)c;
488  }
489 
490  if ( c != EOF )
491  attr[len] = '\0';
492  else
493  {
494  BMSfreeMemoryArray(&attr);
495  attr = NULL;
496  }
497  return attr;
498 }
499 
500 /** Skip comment
501  *
502  * Return FALSE if an error occurs.
503  */
504 static
506  PPOS* ppos
507  )
508 {
509  XML_Bool result = TRUE;
510  int c;
511  int state = 0;
512 
513  assert(ppos != NULL);
514 
515  for(;;)
516  {
517  c = getsymbol(ppos);
518 
519  if ( c == EOF )
520  break;
521 
522  if ( (c == '>') && (state >= 2) )
523  break;
524 
525  state = (c == '-') ? state + 1 : 0;
526  }
527  if ( c == EOF )
528  {
529  xmlError(ppos, "Unexpected EOF in comment");
530  result = FALSE;
531  }
532  return result;
533 }
534 
535 /** Handles a CDATA section.
536  *
537  * Returns a pointer to allocated memory containing the data of this section or NULL in case of an
538  * error.
539  */
540 static
541 char* doCdata(
542  PPOS* ppos
543  )
544 {
545  char* data = NULL;
546  size_t size = 0;
547  size_t len = 0;
548  int state = 0;
549  int c;
550 
551  assert(ppos != NULL);
552 
553  for(;;)
554  {
555  c = getsymbol(ppos);
556 
557  if ( c == EOF )
558  break;
559 
560  if ( c == ']' )
561  state++;
562  else
563  if ( (c == '>') && (state >= 2) )
564  break;
565  else
566  state = 0;
567 
568  if ( len == size )
569  {
570  size += DATA_EXT_SIZE;
571 
572  if ( data == NULL )
573  {
574  ALLOC_ABORT( BMSallocMemoryArray(&data, size) );
575  }
576  else
577  {
578  ALLOC_ABORT( BMSreallocMemoryArray(&data, size) );
579  }
580  }
581  assert(data != NULL);
582  assert(size > len);
583 
584  data[len++] = (char)c;
585  }
586  assert(data != NULL);
587 
588  /*lint --e{527}*/
589  if ( c != EOF )
590  {
591  assert(len >= 2);
592  assert(data != NULL);
593 
594  data[len - 2] = '\0'; /*lint !e413*/
595  }
596  else
597  {
598  BMSfreeMemoryArray(&data);
599  data = NULL;
600  xmlError(ppos, "Unexpected EOF in CDATA");
601  }
602  return data;
603 }
604 
605 /** Handle processing instructions (skipping) */
606 static
607 void handlePi(
608  PPOS* ppos
609  )
610 {
611  int c;
612 
613  assert(ppos != NULL);
614  assert(ppos->state == XML_STATE_BEFORE);
615 
616  do
617  {
618  c = getsymbol(ppos);
619  }
620  while ( (c != EOF) && (c != '>') );
621 
622  if ( c != EOF )
623  ppos->state = XML_STATE_PCDATA;
624  else
625  {
626  xmlError(ppos, "Unexpected EOF in PI");
627  ppos->state = XML_STATE_ERROR;
628  }
629 }
630 
631 /** Handles declarations that start with a <!.
632  *
633  * This includes comments. Does currenlty not work very well, because of DTDs.
634  */
635 static
637  PPOS* ppos
638  )
639 {
640  enum XmlSection
641  {
642  IS_COMMENT,
643  IS_ATTLIST,
644  IS_DOCTYPE,
645  IS_ELEMENT,
646  IS_ENTITY,
647  IS_NOTATION,
648  IS_CDATA
649  };
650  typedef enum XmlSection XMLSECTION;
651 
652  static struct
653  {
654  const char* name;
655  XMLSECTION what;
656  } key[] =
657  {
658  { "--", IS_COMMENT },
659  { "ATTLIST", IS_ATTLIST },
660  { "DOCTYPE", IS_DOCTYPE },
661  { "ELEMENT", IS_ELEMENT },
662  { "ENTITY", IS_ENTITY },
663  { "NOTATION", IS_NOTATION },
664  { "[CDATA[", IS_CDATA }
665  };
666  XML_NODE* node;
667  char* data;
668  int c;
669  int k = 0;
670  int beg = 0;
671  int end;
672 
673  assert(ppos != NULL);
674  assert(ppos->state == XML_STATE_BEFORE);
675 
676  end = (int) (sizeof(key) / sizeof(key[0])) - 1;
677  do
678  {
679  c = getsymbol(ppos);
680 
681  for(; (beg <= end) && (c != key[beg].name[k]); beg++)
682  ;
683  for(; (end >= beg) && (c != key[end].name[k]); end--)
684  ;
685  k++;
686  } while(beg < end);
687 
688  if ( beg != end )
689  {
690  xmlError(ppos, "Unknown declaration");
691 
692  while ( (c != EOF) && (c != '>') )
693  c = getsymbol(ppos);
694  }
695  else
696  {
697  assert(beg == end);
698  assert(beg < (int)(sizeof(key) / sizeof(*key)));
699 
700  switch(key[beg].what)
701  {
702  case IS_COMMENT :
703  if ( doComment(ppos) )
704  ppos->state = XML_STATE_ERROR;
705  break;
706  case IS_CDATA :
707  if ( (data = doCdata(ppos)) == NULL )
708  ppos->state = XML_STATE_ERROR;
709  else
710  {
711  if ( NULL == (node = xmlNewNode("#CDATA", ppos->lineno)) )
712  {
713  xmlError(ppos, "Can't create new node");
714  ppos->state = XML_STATE_ERROR;
715  }
716  else
717  {
718  BMSduplicateMemoryArray(&node->data, data, strlen(data)+1);
719  BMSfreeMemoryArray(&data);
720  xmlAppendChild(topPstack(ppos), node);
721  }
722  }
723  break;
724  case IS_ATTLIST :
725  case IS_ELEMENT :
726  case IS_NOTATION :
727  case IS_ENTITY :
728  case IS_DOCTYPE :
729  break;
730  default :
731  abort();
732  }
733  }
734 }
735 
736 /** Handle end tag */
737 static
739  PPOS* ppos
740  )
741 {
742  char* name;
743  int c;
744 
745  assert(ppos != NULL);
746 
747  if ( (name = getName(ppos)) == NULL )
748  xmlError(ppos, "Missing name in endtag");
749  else
750  {
751  c = skipSpace(ppos);
752 
753  if ( c != '>' )
754  {
755  xmlError(ppos, "Missing '>' in endtag");
756  ppos->state = XML_STATE_ERROR;
757  }
758  else
759  {
760  if ( strcmp(name, topPstack(ppos)->name) )
761  {
762  xmlError(ppos, "Name of endtag does not match starttag");
763  ppos->state = XML_STATE_ERROR;
764  }
765  else
766  {
767  if ( popPstack(ppos) )
768  ppos->state = XML_STATE_PCDATA;
769  else
770  ppos->state = XML_STATE_ERROR;
771  }
772  BMSfreeMemoryArray(&name);
773  }
774  }
775 }
776 
777 /** Handle start tag */
778 static
780  PPOS* ppos
781  )
782 {
783  XML_NODE* node;
784  char* name;
785 
786  assert(ppos != NULL);
787 
788  name = getName(ppos);
789  if ( name == NULL )
790  {
791  xmlError(ppos, "Missing name in tagstart");
792  ppos->state = XML_STATE_ERROR;
793  }
794  else
795  {
796  node = xmlNewNode(name, ppos->lineno);
797  if ( node == NULL )
798  {
799  xmlError(ppos, "Can't create new node");
800  ppos->state = XML_STATE_ERROR;
801  }
802  else
803  {
804  xmlAppendChild(topPstack(ppos), node);
805 
806  if ( pushPstack(ppos, node) )
807  ppos->state = XML_STATE_IN_TAG;
808  else
809  ppos->state = XML_STATE_ERROR;
810  }
811  BMSfreeMemoryArray(&name);
812  }
813 }
814 
815 /** Checks for next tag */
816 static
818  PPOS* ppos /**< input stream position */
819  )
820 {
821  int c;
822 
823  assert(ppos != NULL);
824  assert(ppos->state == XML_STATE_BEFORE);
825 
826  c = skipSpace(ppos);
827 
828  if ( c != '<' )
829  {
830  xmlError(ppos, "Expecting '<'");
831  ppos->state = XML_STATE_ERROR;
832  }
833  else
834  {
835  c = getsymbol(ppos);
836 
837  switch(c)
838  {
839  case EOF :
840  xmlError(ppos, "Unexpected EOF");
841  ppos->state = XML_STATE_ERROR;
842  break;
843  case '!' :
844  handleDecl(ppos);
845  break;
846  case '?' :
847  handlePi(ppos);
848  break;
849  case '/' :
850  handleEndtag(ppos);
851  break;
852  default :
853  ungetsymbol(ppos, c);
854  handleStarttag(ppos);
855  break;
856  }
857  }
858 }
859 
860 /** Process tag */
861 static
863  PPOS* ppos /**< input stream position */
864  )
865 {
866  XML_ATTR* attr;
867  int c;
868  XML_Bool empty = FALSE;
869  char* name;
870  char* value;
871 
872  assert(ppos != NULL);
873  assert(ppos->state == XML_STATE_IN_TAG);
874 
875  c = skipSpace(ppos);
876 
877  if ( (c == '/') || (c == '>') || (c == EOF) )
878  {
879  if ( c == '/' )
880  {
881  empty = TRUE;
882  c = getsymbol(ppos);
883  }
884 
885  if ( c == EOF )
886  {
887  xmlError(ppos, "Unexpected EOF while in a tag");
888  ppos->state = XML_STATE_ERROR;
889  }
890 
891  if ( c == '>' )
892  {
893  ppos->state = XML_STATE_PCDATA;
894 
895  if (empty && ! popPstack(ppos))
896  ppos->state = XML_STATE_ERROR;
897  }
898  else
899  {
900  xmlError(ppos, "Expected tag end marker '>'");
901  ppos->state = XML_STATE_ERROR;
902  }
903  }
904  else
905  {
906  ungetsymbol(ppos, c);
907 
908  name = getName(ppos);
909  if ( name == NULL )
910  {
911  xmlError(ppos, "No name for attribute");
912  ppos->state = XML_STATE_ERROR;
913  }
914  else
915  {
916  c = skipSpace(ppos);
917 
918  if ( (c != '=') || ((value = getAttrval(ppos)) == NULL) )
919  {
920  xmlError(ppos, "Missing attribute value");
921  ppos->state = XML_STATE_ERROR;
922  BMSfreeMemoryArray(&name);
923  }
924  else
925  {
926  attr = xmlNewAttr(name, value);
927  if ( attr == NULL )
928  {
929  xmlError(ppos, "Can't create new attribute");
930  ppos->state = XML_STATE_ERROR;
931  }
932  else
933  {
934  xmlAddAttr(topPstack(ppos), attr);
935  }
936  BMSfreeMemoryArray(&name);
937  BMSfreeMemoryArray(&value);
938  }
939  }
940  }
941 }
942 
943 /* Handles PCDATA */
944 static
946  PPOS* ppos /**< input stream position */
947  )
948 {
949  XML_NODE* node;
950  char* data = NULL;
951  size_t size = 0;
952  size_t len = 0;
953  int c;
954 
955  assert(ppos != NULL);
956  assert(ppos->state == XML_STATE_PCDATA);
957 
958 #ifndef SPEC_LIKE_SPACE_HANDLING
959  c = skipSpace(ppos);
960  if ( c != EOF )
961  ungetsymbol(ppos, c);
962 #endif
963  c = getsymbol(ppos);
964 
965  while ( (c != EOF) && (c != '<') )
966  {
967  if ( len + 1 >= size ) /* leave space for terminating '\0' */
968  {
969  size += DATA_EXT_SIZE;
970 
971  if ( data == NULL )
972  {
973  ALLOC_ABORT( BMSallocMemoryArray(&data, size) );
974  }
975  else
976  {
977  ALLOC_ABORT( BMSreallocMemoryArray(&data, size) );
978  }
979  }
980  assert(data != NULL);
981  assert(size > len + 1);
982 
983  data[len++] = (char)c;
984 
985  c = getsymbol(ppos);
986  }
987  if ( data == NULL )
988  {
989  if ( c == EOF )
990  ppos->state = XML_STATE_EOF;
991  else if ( c == '<' )
992  {
993  ppos->state = XML_STATE_BEFORE;
994  ungetsymbol(ppos, c);
995  }
996  else
997  {
998  ppos->state = XML_STATE_ERROR;
999  }
1000  }
1001  else
1002  {
1003  assert(len < size);
1004  data[len] = '\0';
1005 
1006  if ( c == EOF )
1007  ppos->state = XML_STATE_ERROR;
1008  else
1009  {
1010  ungetsymbol(ppos, c);
1011 
1012  node = xmlNewNode("#PCDATA", ppos->lineno);
1013  if ( node == NULL )
1014  {
1015  xmlError(ppos, "Can't create new node");
1016  ppos->state = XML_STATE_ERROR;
1017  }
1018  else
1019  {
1020  BMSduplicateMemoryArray(&node->data, data, strlen(data)+1);
1021  xmlAppendChild(topPstack(ppos), node);
1022  ppos->state = XML_STATE_BEFORE;
1023  }
1024  BMSfreeMemoryArray(&data);
1025  }
1026  }
1027 }
1028 
1029 /** Parse input stream */
1030 static
1032  PPOS* ppos /**< input stream position */
1033  )
1034 {
1035  XML_Bool ok = TRUE;
1036 
1037  while (ok)
1038  {
1039  debugMessage("state=%d\n", ppos->state);
1040 
1041  switch (ppos->state)
1042  {
1043  case XML_STATE_BEFORE :
1044  procBefore(ppos);
1045  break;
1046  case XML_STATE_IN_TAG :
1047  procInTag(ppos);
1048  break;
1049  case XML_STATE_PCDATA :
1050  procPcdata(ppos);
1051  break;
1052  case XML_STATE_EOF :
1053  ok = FALSE;
1054  break;
1055  case XML_STATE_ERROR :
1056  ok = FALSE;
1057  break;
1058  default :
1059  xmlError(ppos, "Internal Error, illegal state");
1060  ok = FALSE;
1061  }
1062  }
1063  return (ppos->state == XML_STATE_EOF);
1064 }
1065 
1066 /** Parse file */
1068  const char* filename /**< XML file name */
1069  )
1070 {
1071  PPOS ppos;
1072  XML_NODE* node = NULL;
1073  XML_ATTR* attr;
1074  XML_Bool result = FALSE;
1075  char* myfilename;
1076  size_t filenamelen;
1077 
1078  /* allocate space and copy filename (possibly modified below) in two steps in order to satisfy valgrind */
1079  assert( filename != NULL );
1080  filenamelen = strlen(filename);
1081  if ( BMSallocMemoryArray(&myfilename, filenamelen + 5) == NULL )
1082  return NULL;
1083  BMScopyMemoryArray(myfilename, filename, filenamelen + 1);
1084 
1085 #ifdef WITH_ZLIB
1086  if ( access(filename, R_OK) != 0 )
1087  {
1088  strcat(myfilename, ".gz");
1089 
1090  /* If .gz also does not work, revert to the old name
1091  * to get a better error message.
1092  */
1093  if ( access(myfilename, R_OK) != 0 )
1094  strcpy(myfilename, filename);
1095  }
1096 #endif
1097  ppos.fp = FOPEN(myfilename, "r");
1098  if ( ppos.fp == NULL )
1099  perror(myfilename);
1100  else
1101  {
1102  ppos.filename = myfilename;
1103  ppos.buf[0] = '\0';
1104  ppos.pos = 0;
1105  ppos.lineno = 1;
1106  ppos.nextsym = 0;
1107  ppos.lastsym = 0;
1108  ppos.state = XML_STATE_BEFORE;
1109  ppos.top = NULL;
1110 
1111  node = xmlNewNode("#ROOT", ppos.lineno);
1112  if ( node == NULL )
1113  {
1114  xmlError(&ppos, "Can't create new node");
1115  }
1116  else
1117  {
1118  attr = xmlNewAttr("filename", myfilename);
1119  if ( attr == NULL )
1120  xmlError(&ppos, "Can't create new attribute");
1121  else
1122  {
1123  xmlAddAttr(node, attr);
1124 
1125  /* push root node on stack and start to process */
1126  if ( pushPstack(&ppos, node) )
1127  {
1128  result = xmlParse(&ppos);
1129 
1130  clearPstack(&ppos);
1131  }
1132  }
1133  }
1134 
1135  if ( ! result && (node != NULL) )
1136  {
1137  xmlErrmsg(&ppos, "Parsing error, processing stopped", TRUE, __FILE__, __LINE__);
1138  xmlFreeNode(node);
1139  node = NULL;
1140  }
1141  if ( FCLOSE(ppos.fp) )
1142  perror(myfilename);
1143  }
1144  BMSfreeMemoryArray(&myfilename);
1145 
1146  return node;
1147 }
1148 
1149 
1150 
1151 
1152 
1153 
1154 /*----------------------------------------------------------------------------------------------*/
1155 
1156 
1157 /** create new node */
1159  const char* name,
1160  int lineno
1161  )
1162 {
1163  XML_NODE* n = NULL;
1164 
1165  assert(name != NULL);
1166 
1167  if ( BMSallocMemory(&n) != NULL )
1168  {
1169  BMSclearMemory(n);
1170  BMSduplicateMemoryArray(&n->name, name, strlen(name)+1);
1171  n->lineno = lineno;
1172  }
1173  return n;
1174 }
1175 
1176 /** create new attribute */
1178  const char* name,
1179  const char* value
1180  )
1181 {
1182  XML_ATTR* a = NULL;
1183 
1184  assert(name != NULL);
1185  assert(value != NULL);
1186 
1187  if ( BMSallocMemory(&a) != NULL )
1188  {
1189  BMSclearMemory(a);
1190  BMSduplicateMemoryArray(&a->name, name, strlen(name)+1);
1191  BMSduplicateMemoryArray(&a->value, value, strlen(value)+1);
1192  }
1193  return a;
1194 }
1195 
1196 /** add attribute */
1198  XML_NODE* n,
1199  XML_ATTR* a
1200  )
1201 {
1202  assert(n != NULL);
1203  assert(a != NULL);
1204 
1205  a->next = n->attrlist;
1206  n->attrlist = a;
1207 }
1208 
1209 /** append child node */
1211  XML_NODE* parent,
1212  XML_NODE* child
1213  )
1214 {
1215  assert(parent != NULL);
1216  assert(child != NULL);
1217 
1218  child->parent = parent;
1219  child->prevsibl = parent->lastchild;
1220  child->nextsibl = NULL;
1221  parent->lastchild = child;
1222 
1223  if ( child->prevsibl != NULL )
1224  child->prevsibl->nextsibl = child;
1225 
1226  if ( parent->firstchild == NULL )
1227  parent->firstchild = child;
1228 }
1229 
1230 /** free attribute */
1231 static
1233  XML_ATTR* attr
1234  )
1235 {
1236  XML_ATTR* a;
1237 
1238  /* Note: use an iterative implementation instead of a recursive one; the latter is much slower for large instances
1239  * and might overflow the heap. */
1240  a = attr;
1241  while (a != NULL)
1242  {
1243  XML_ATTR* b;
1244  b = a->next;
1245 
1246  assert(a->name != NULL);
1247  assert(a->value != NULL);
1248 
1249  BMSfreeMemoryArray(&a->name);
1250  BMSfreeMemoryArray(&a->value);
1251  BMSfreeMemory(&a);
1252  a = b;
1253  }
1254 }
1255 
1256 /** free node */
1258  XML_NODE* node
1259  )
1260 {
1261  XML_NODE* n;
1262 
1263  if ( node == NULL )
1264  return;
1265 
1266  /* Free data from back to front (because free is faster this way). */
1267  /* Note: use an iterative implementation instead of a recursive one; the latter is much slower for large instances
1268  * and might overflow the heap. */
1269  n = node->lastchild;
1270  while ( n != NULL )
1271  {
1272  XML_NODE* m;
1273  m = n->prevsibl;
1274  xmlFreeNode(n);
1275  n = m;
1276  }
1277 
1278  xmlFreeAttr(node->attrlist);
1279 
1280  if ( node->data != NULL )
1281  {
1282  BMSfreeMemoryArray(&node->data);
1283  }
1284  assert(node->name != NULL);
1285 
1286  BMSfreeMemoryArray(&node->name);
1287  BMSfreeMemory(&node);
1288 }
1289 
1290 /** output node */
1292  const XML_NODE* root
1293  )
1294 {
1295  const XML_NODE* n;
1296  const XML_ATTR* a;
1297 
1298  assert(root != NULL);
1299 
1300  for (n = root; n != NULL; n = n->nextsibl)
1301  {
1302  infoMessage("Name: %s\n", n->name);
1303  infoMessage("Line: %d\n", n->lineno);
1304  infoMessage("Data: %s\n", (n->data != NULL) ? n->data : "***");
1305 
1306  for (a = n->attrlist; a != NULL; a = a->next)
1307  infoMessage("Attr: %s = [%s]\n", a->name, a->value);
1308 
1309  if ( n->firstchild != NULL )
1310  {
1311  infoMessage("->\n");
1312  xmlShowNode(n->firstchild);
1313  infoMessage("<-\n");
1314  }
1315  }
1316 }
1317 
1318 /** get attribute value */
1319 const char* xmlGetAttrval(
1320  const XML_NODE* node,
1321  const char* name
1322  )
1323 {
1324  XML_ATTR* a;
1325 
1326  assert(node != NULL);
1327  assert(name != NULL);
1328 
1329  for (a = node->attrlist; a != NULL; a = a->next)
1330  {
1331  if ( ! strcmp(name, a->name) )
1332  break;
1333  }
1334 
1335 #ifdef SCIP_DEBUG
1336  if (a == NULL)
1337  infoMessage("Error: Attribute %s in TAG <%s> not found\n", name, node->name);
1338 #endif
1339 
1340  return (a == NULL) ? NULL : a->value;
1341 }
1342 
1343 /** return first node */
1345  const XML_NODE* node,
1346  const char* name
1347  )
1348 {
1349  const XML_NODE* n;
1350 
1351  assert(node != NULL);
1352  assert(name != NULL);
1353 
1354  for (n = node; n != NULL; n = n->nextsibl)
1355  {
1356  if ( ! strcmp(name, n->name) )
1357  break;
1358  }
1359 
1360  return n;
1361 }
1362 
1363 /** return next node */
1365  const XML_NODE* node,
1366  const char* name
1367  )
1368 {
1369  assert(node != NULL);
1370  assert(name != NULL);
1371 
1372  return (node->nextsibl == NULL) ? NULL : xmlFirstNode(node->nextsibl, name);
1373 }
1374 
1375 /** find node */
1377  const XML_NODE* node,
1378  const char* name
1379  )
1380 {
1381  const XML_NODE* n;
1382  const XML_NODE* r;
1383 
1384  assert(node != NULL);
1385  assert(name != NULL);
1386 
1387  if ( ! strcmp(name, node->name) )
1388  return node;
1389 
1390  for (n = node->firstchild; n != NULL; n = n->nextsibl)
1391  {
1392  r = xmlFindNode(n, name);
1393  if ( r != NULL )
1394  return r;
1395  }
1396 
1397  return NULL;
1398 }
1399 
1400 /** find node with bound on the depth */
1402  const XML_NODE* node, /**< current node - use start node to begin */
1403  const char* name, /**< name of tag to search for */
1404  int depth, /**< current depth - start with 0 for root */
1405  int maxdepth /**< maximal depth */
1406  )
1407 {
1408  const XML_NODE* n;
1409  const XML_NODE* r;
1410 
1411  assert(node != NULL);
1412  assert(name != NULL);
1413 
1414  if ( ! strcmp(name, node->name) )
1415  return node;
1416 
1417  if ( depth < maxdepth )
1418  {
1419  for (n = node->firstchild; n != NULL; n = n->nextsibl)
1420  {
1421  r = xmlFindNodeMaxdepth(n, name, depth+1, maxdepth);
1422  if ( r != NULL )
1423  return r;
1424  }
1425  }
1426 
1427  return NULL;
1428 }
1429 
1430 /** return next sibling */
1432  const XML_NODE* node
1433  )
1434 {
1435  assert(node != NULL);
1436 
1437  return node->nextsibl;
1438 }
1439 
1440 /** return previous sibling */
1442  const XML_NODE* node
1443  )
1444 {
1445  assert(node != NULL);
1446 
1447  return node->prevsibl;
1448 }
1449 
1450 /** return first child */
1452  const XML_NODE* node
1453  )
1454 {
1455  assert(node != NULL);
1456 
1457  return node->firstchild;
1458 }
1459 
1460 /** return last child */
1462  const XML_NODE* node
1463  )
1464 {
1465  assert(node != NULL);
1466 
1467  return node->lastchild;
1468 }
1469 
1470 /** return name of node */
1471 const char* xmlGetName(
1472  const XML_NODE* node
1473  )
1474 {
1475  assert(node != NULL);
1476 
1477  return node->name;
1478 }
1479 
1480 /** get line number */
1482  const XML_NODE* node
1483  )
1484 {
1485  assert(node != NULL);
1486 
1487  return node->lineno;
1488 }
1489 
1490 /** get data */
1491 const char* xmlGetData(
1492  const XML_NODE* node
1493  )
1494 {
1495  assert(node != NULL);
1496 
1497  return node->data;
1498 }
1499 
1500 /** find PCDATA */
1501 const char* xmlFindPcdata(
1502  const XML_NODE* node,
1503  const char* name
1504  )
1505 {
1506  const XML_NODE* n;
1507 
1508  assert(node != NULL);
1509  assert(name != NULL);
1510 
1511  n = xmlFindNode(node, name);
1512  if ( n == NULL )
1513  return NULL;
1514 
1515  if ( ! strcmp(n->firstchild->name, "#PCDATA") )
1516  return n->firstchild->data;
1517 
1518  return NULL;
1519 }
#define XML_Bool
Definition: xmldef.h:33
#define LINE_BUF_SIZE
Definition: xmlparse.c:50
const XML_NODE * xmlFirstNode(const XML_NODE *node, const char *name)
Definition: xmlparse.c:1344
static void handleDecl(PPOS *ppos)
Definition: xmlparse.c:636
PSTACK * next
Definition: xmlparse.c:74
const char * xmlFindPcdata(const XML_NODE *node, const char *name)
Definition: xmlparse.c:1501
void xmlFreeNode(XML_NODE *node)
Definition: xmlparse.c:1257
#define FREAD(buf, len, fp)
Definition: xmldef.h:54
#define NULL
Definition: lpi_spx.cpp:129
static int getsymbol(PPOS *ppos)
Definition: xmlparse.c:304
#define ALLOC_ABORT(x)
Definition: tclique_def.h:35
#define FCLOSE(fp)
Definition: xmldef.h:52
struct XML_ATTR_struct XML_ATTR
Definition: xml.h:32
static char * doCdata(PPOS *ppos)
Definition: xmlparse.c:541
static void xmlFreeAttr(XML_ATTR *attr)
Definition: xmlparse.c:1232
#define FALSE
Definition: def.h:52
XML_NODE * xmlNewNode(const char *name, int lineno)
Definition: xmlparse.c:1158
#define TRUE
Definition: def.h:51
const char * xmlGetName(const XML_NODE *node)
Definition: xmlparse.c:1471
#define BMSallocMemoryArray(ptr, num)
Definition: memory.h:76
#define DATA_EXT_SIZE
Definition: xmlparse.c:49
enum parse_state_enum PSTATE
Definition: xmlparse.c:68
#define ATTR_EXT_SIZE
Definition: xmlparse.c:48
XML_ATTR * xmlNewAttr(const char *name, const char *value)
Definition: xmlparse.c:1177
#define BMSfreeMemory(ptr)
Definition: memory.h:117
void xmlShowNode(const XML_NODE *root)
Definition: xmlparse.c:1291
static void procInTag(PPOS *ppos)
Definition: xmlparse.c:862
const char * xmlGetData(const XML_NODE *node)
Definition: xmlparse.c:1491
#define debugMessage
Definition: tclique_def.h:65
static void xmlErrmsg(PPOS *ppos, const char *msg, XML_Bool msg_only, const char *file, int line)
Definition: xmlparse.c:93
static void procPcdata(PPOS *ppos)
Definition: xmlparse.c:945
static int mygetc(PPOS *ppos)
Definition: xmlparse.c:230
PSTACK * top
Definition: xmlparse.c:88
static void ungetsymbol(PPOS *ppos, int c)
Definition: xmlparse.c:340
#define BMSfreeMemoryArray(ptr)
Definition: memory.h:120
parse_state_enum
Definition: xmlparse.c:60
static XML_Bool popPstack(PPOS *ppos)
Definition: xmlparse.c:190
struct XML_NODE_struct XML_NODE
Definition: xml.h:41
static XML_Bool doComment(PPOS *ppos)
Definition: xmlparse.c:505
#define ALLOC_FALSE(x)
Definition: tclique_def.h:47
const XML_NODE * xmlFindNode(const XML_NODE *node, const char *name)
Definition: xmlparse.c:1376
const char * xmlGetAttrval(const XML_NODE *node, const char *name)
Definition: xmlparse.c:1319
const XML_NODE * xmlNextNode(const XML_NODE *node, const char *name)
Definition: xmlparse.c:1364
static XML_Bool xmlParse(PPOS *ppos)
Definition: xmlparse.c:1031
#define BMSduplicateMemoryArray(ptr, source, num)
Definition: memory.h:111
const XML_NODE * xmlFirstChild(const XML_NODE *node)
Definition: xmlparse.c:1451
XML_NODE * xmlProcess(const char *filename)
Definition: xmlparse.c:1067
static int skipSpace(PPOS *ppos)
Definition: xmlparse.c:353
static XML_NODE * topPstack(const PPOS *ppos)
Definition: xmlparse.c:175
static char * getAttrval(PPOS *ppos)
Definition: xmlparse.c:441
void xmlAddAttr(XML_NODE *n, XML_ATTR *a)
Definition: xmlparse.c:1197
#define FGETS(buf, len, fp)
Definition: xmldef.h:53
#define BMScopyMemoryArray(ptr, source, num)
Definition: memory.h:102
#define FOPEN(file, mode)
Definition: xmldef.h:51
#define infoMessage
Definition: tclique_def.h:71
#define FPTYPE
Definition: xmldef.h:55
#define xmlError(a, b)
Definition: xmlparse.c:52
#define BMSclearMemory(ptr)
Definition: memory.h:97
static char * getName(PPOS *ppos)
Definition: xmlparse.c:376
XML_NODE * node
Definition: xmlparse.c:73
const XML_NODE * xmlNextSibl(const XML_NODE *node)
Definition: xmlparse.c:1431
char buf[LINE_BUF_SIZE]
Definition: xmlparse.c:82
const char * filename
Definition: xmlparse.c:80
static XML_Bool pushPstack(PPOS *ppos, XML_NODE *node)
Definition: xmlparse.c:152
#define BMSallocMemory(ptr)
Definition: memory.h:92
#define BMSreallocMemoryArray(ptr, num)
Definition: memory.h:80
declarations for XML parsing
#define NAME_EXT_SIZE
Definition: xmlparse.c:47
const XML_NODE * xmlPrevSibl(const XML_NODE *node)
Definition: xmlparse.c:1441
const XML_NODE * xmlLastChild(const XML_NODE *node)
Definition: xmlparse.c:1461
const XML_NODE * xmlFindNodeMaxdepth(const XML_NODE *node, const char *name, int depth, int maxdepth)
Definition: xmlparse.c:1401
static void handlePi(PPOS *ppos)
Definition: xmlparse.c:607
int xmlGetLine(const XML_NODE *node)
Definition: xmlparse.c:1481
void xmlAppendChild(XML_NODE *parent, XML_NODE *child)
Definition: xmlparse.c:1210
static void handleStarttag(PPOS *ppos)
Definition: xmlparse.c:779
PSTATE state
Definition: xmlparse.c:87
static void procBefore(PPOS *ppos)
Definition: xmlparse.c:817
static void handleEndtag(PPOS *ppos)
Definition: xmlparse.c:738
static void clearPstack(PPOS *ppos)
Definition: xmlparse.c:218
memory allocation routines
definitions for XML parsing