Scippy

SCIP

Solving Constraint Integer Programs

xmlparse.c
Go to the documentation of this file.
1 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
2 /* */
3 /* This file is part of the program and library */
4 /* SCIP --- Solving Constraint Integer Programs */
5 /* */
6 /* Copyright (c) 2002-2024 Zuse Institute Berlin (ZIB) */
7 /* */
8 /* Licensed under the Apache License, Version 2.0 (the "License"); */
9 /* you may not use this file except in compliance with the License. */
10 /* You may obtain a copy of the License at */
11 /* */
12 /* http://www.apache.org/licenses/LICENSE-2.0 */
13 /* */
14 /* Unless required by applicable law or agreed to in writing, software */
15 /* distributed under the License is distributed on an "AS IS" BASIS, */
16 /* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. */
17 /* See the License for the specific language governing permissions and */
18 /* limitations under the License. */
19 /* */
20 /* You should have received a copy of the Apache-2.0 license */
21 /* along with SCIP; see the file LICENSE. If not visit scipopt.org. */
22 /* */
23 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
24 
25 /**@file xmldef.h
26  * @ingroup OTHER_CFILES
27  * @brief declarations for XML parsing
28  * @author Thorsten Koch
29  * @author Marc Pfetsch
30  *
31  * If SPEC_LIKE_SPACE_HANDLING is not defined, all LF,CR will be changed into spaces and from a
32  * sequence of spaces only one will be used.
33  *
34  * @todo Implement possibility to avoid the construction of parsing information for certain tags
35  * (and their children). For solution files this would avoid parsing the constraints section.
36  */
37 
38 /*---+----1----+----2----+----3----+----4----+----5----+----6----+----7----+----8----+----9----+----0----+----1----+----2*/
39 
40 #include <blockmemshell/memory.h>
41 
42 #include "xml.h"
43 #include "xmldef.h"
44 #include "scip/misc.h"
45 
46 
47 #include <sys/types.h>
48 #ifdef SCIP_WITH_ZLIB
49 #if defined(_WIN32) || defined(_WIN64)
50 #define R_OK _A_RDONLY
51 #define access _access
52 #include <io.h>
53 #else
54 #include <unistd.h>
55 #endif
56 #endif
57 #include <stdio.h>
58 #include <stdlib.h>
59 #include <assert.h>
60 #include <ctype.h>
61 #include <string.h>
62 
63 
64 #define NAME_EXT_SIZE 128
65 #define ATTR_EXT_SIZE 4096
66 #define DATA_EXT_SIZE 4096
67 #define LINE_BUF_SIZE 8192
68 
69 #define xmlError(a, b) xmlErrmsg(a, b, FALSE, __FILE__, __LINE__)
70 
71 
72 /* forward declarations */
73 typedef struct parse_stack_struct PSTACK;
74 typedef struct parse_pos_struct PPOS;
75 
76 /** state of the parser */
78 {
84 };
85 typedef enum parse_state_enum PSTATE;
86 
87 /** Stack as a (singly) linked list. The top element is the current node. */
89 {
92 };
93 
94 /** Store the current position in the file and the state of the parser. */
96 {
97  const char* filename;
99  char buf[LINE_BUF_SIZE];
100  int pos;
101  int lineno;
102  int nextsym;
103  int lastsym;
106 };
107 
108 
109 /** output error message with corresponding line and position */
110 static void xmlErrmsg(
111  PPOS* ppos,
112  const char* msg,
113  XML_Bool msg_only,
114  const char* file,
115  int line
116  )
117 {
118 #ifndef NDEBUG
119  int ret;
120  assert( ppos != NULL );
121 
122  if ( ! msg_only )
123  {
124  ret = fprintf(stderr, "%s(%d) Error in file %s line %d\n", file, line, ppos->filename, ppos->lineno);
125  assert(ret >= 0);
126 
127  ret = fprintf(stderr, "%s", ppos->buf);
128  assert(ret >= 0);
129 
130  if ( strchr(ppos->buf, '\n') == NULL )
131  {
132  int retc;
133 
134  retc = fputc('\n', stderr);
135  assert(retc != EOF);
136  }
137 
138  ret = fprintf(stderr, "%*s\n", ppos->pos, "^");
139  assert(ret >= 0);
140  }
141  ret = fprintf(stderr, "%s\n\n", msg);
142  assert(ret >= 0);
143 
144 #else
145 
146  if ( ! msg_only )
147  {
148  (void) fprintf(stderr, "%s(%d) Error in file %s line %d\n", file, line, ppos->filename, ppos->lineno);
149 
150  (void) fprintf(stderr, "%s", ppos->buf);
151 
152  if ( strchr(ppos->buf, '\n') == NULL )
153  {
154  (void) fputc('\n', stderr);
155  }
156 
157  (void) fprintf(stderr, "%*s\n", ppos->pos, "^");
158  }
159  (void) fprintf(stderr, "%s\n\n", msg);
160 #endif
161 }
162 
163 
164 /** Push new element on the parse stack.
165  *
166  * TRUE if it worked, FAILURE otherwise.
167  */
168 static
170  PPOS* ppos,
171  XML_NODE* node
172  )
173 {
174  PSTACK* p;
175 
176  assert(ppos != NULL);
177  assert(node != NULL);
178 
179  debugMessage("Pushing %s\n", node->name);
180 
182  assert(p != NULL);
183 
184  p->node = node;
185  p->next = ppos->top;
186  ppos->top = p;
187 
188  return TRUE;
189 }
190 
191 /** returns top element on stack (which has to be present) */
193  const PPOS* ppos
194  )
195 {
196  assert(ppos != NULL);
197  assert(ppos->top != NULL);
198 
199  return ppos->top->node;
200 }
201 
202 /** remove top element from stack and deletes it
203  *
204  * TRUE if ok, FALSE otherwise
205  */
206 static
208  PPOS* ppos /**< input stream position */
209  )
210 {
211  PSTACK* p;
212  XML_Bool result;
213 
214  assert(ppos != NULL);
215 
216  if ( ppos->top == NULL )
217  {
218  xmlError(ppos, "Stack underflow");
219  result = FALSE;
220  }
221  else
222  {
223  result = TRUE;
224  p = ppos->top;
225  ppos->top = p->next;
226 
227  debugMessage("Poping %s\n", p->node->name);
228  BMSfreeMemory(&p);
229  }
230  return result;
231 }
232 
233 /** remove complete stack */
234 static
236  PPOS* ppos
237  )
238 {
239  assert(ppos != NULL);
240 
241  while ( ppos->top != NULL )
242  (void) popPstack(ppos);
243 }
244 
245 /** Returns the next character from the input buffer and fills the buffer if it is empty (similar to fgetc()). */
246 static
247 int mygetc(
248  PPOS* ppos
249  )
250 {
251  assert(ppos != NULL);
252  assert(ppos->fp != NULL);
253  assert(ppos->pos < LINE_BUF_SIZE);
254 
255  if ( ppos->buf[ppos->pos] == '\0' )
256  {
257 #ifdef SCIP_DISABLED_CODE
258  /* the low level function gzread/fread used below seem to be faster */
259  if ( NULL == FGETS(ppos->buf, sizeof(ppos->buf), ppos->fp) )
260  return EOF;
261 #else
262  size_t len = (size_t) FREAD(ppos->buf, sizeof(ppos->buf) - 1, ppos->fp); /*lint !e571 !e747*/
263 
264  if( len == 0 || len > sizeof(ppos->buf) - 1 )
265  return EOF;
266 
267  ppos->buf[len] = '\0';
268 #endif
269  ppos->pos = 0;
270  }
271  return (unsigned char)ppos->buf[ppos->pos++];
272 }
273 
274 
275 #ifdef SPEC_LIKE_SPACE_HANDLING
276 /** Read input from fp_in.
277  *
278  * If there is a LF, CR, CR/LF, or LF/CR it returns exactly on LF. Also counts the number of
279  * characters.
280  */
281 static
282 int getsymbol(
283  PPOS* ppos
284  )
285 {
286  int c;
287 
288  assert(ppos != NULL);
289 
290  if ( ppos->nextsym == 0 )
291  c = mygetc(ppos);
292  else
293  {
294  c = ppos->nextsym;
295  ppos->nextsym = 0;
296  }
297  assert(ppos->nextsym == 0);
298 
299  if (((c == '\n') && (ppos->lastsym == '\r')) || ((c == '\r') && (ppos->lastsym == '\n')))
300  c = mygetc(ppos);
301 
302  ppos->lastsym = c;
303 
304  if ( c == '\r' )
305  c = '\n';
306 
307  if ( c == '\n' )
308  ++ppos->lineno;
309 
310  return c;
311 }
312 #else
313 /** Read input from fp_in (variant).
314  *
315  * Here we convert all LF or CR into SPACE and return maximally one SPACE after the other.
316  *
317  * @note This function counts lines differently. On systems that have only one '\\r' as line feed
318  * (MAC) it does not count correctly.
319  */
320 static
322  PPOS* ppos
323  )
324 {
325  int c;
326 
327  assert(ppos != NULL);
328 
329  do
330  {
331  if ( ppos->nextsym == 0 )
332  c = mygetc(ppos);
333  else
334  {
335  c = ppos->nextsym;
336  ppos->nextsym = 0;
337  }
338  assert(ppos->nextsym == 0);
339 
340  if ( c == '\n' )
341  ++ppos->lineno;
342 
343  if ((c == '\n') || (c == '\r'))
344  c = ' ';
345  } while((c == ' ') && (ppos->lastsym == c));
346 
347  ppos->lastsym = c;
348 
349  debugMessage("[%c]\n", c);
350 
351  return c;
352 }
353 #endif
354 
355 /** Reinserts a character into the input stream */
356 static
358  PPOS* ppos,
359  int c
360  )
361 {
362  assert(ppos != NULL);
363  assert(ppos->nextsym == 0);
364 
365  ppos->nextsym = c;
366 }
367 
368 /** Skip all spaces and return the next non-space character or EOF */
369 static
371  PPOS* ppos
372  )
373 {
374  int c;
375 
376  assert(ppos != NULL);
377 
378  do
379  {
380  c = getsymbol(ppos);
381  }
382  while(isspace(c));
383 
384  return c;
385 }
386 
387 /** Get name of a TAG or attribute from the input stream.
388  *
389  * Either it returns a pointer to allocated memory which contains the name or it returns NULL if
390  * there is some error.
391  */
392 static
393 char* getName(
394  PPOS* ppos
395  )
396 {
397  char* name = NULL;
398  size_t size = 0;
399  size_t len = 0;
400  int c;
401 
402  assert(ppos != NULL);
403 
404  c = getsymbol(ppos);
405 
406  if ( ! isalpha(c) && (c != '_') && (c != ':') )
407  {
408  xmlError(ppos, "Name starting with illegal charater");
409  return NULL;
410  }
411 
412  /* The following is wrong: Here almost all characters that we casted to unicode are feasible */
413  while ( isalnum(c) || (c == '_') || (c == ':') || (c == '.') || (c == '-') )
414  {
415  if ( len + 1 >= size )
416  {
417  size += NAME_EXT_SIZE;
418 
419  if ( name == NULL )
420  {
421  ALLOC_ABORT( BMSallocMemoryArray(&name, size) );
422  }
423  else
424  {
425  ALLOC_ABORT( BMSreallocMemoryArray(&name, size) );
426  }
427  }
428  assert(name != NULL);
429  assert(size > len);
430 
431  name[len++] = (char)c;
432 
433  c = getsymbol(ppos);
434  }
435  if ( c != EOF )
436  ungetsymbol(ppos, c);
437 
438  assert(name != NULL);
439 
440  if ( len == 0 )
441  {
442  BMSfreeMemoryArray(&name);
443  name = NULL;
444  }
445  else
446  name[len] = '\0';
447 
448  return name;
449 }
450 
451 /** Read the value of an attribute from the input stream.
452  *
453  * The value has to be between two " or ' (the other character is then valid as well). The function
454  * returns a pointer to allocated memory containing the value or it returns NULL in case of an
455  * error.
456  */
457 static
459  PPOS* ppos
460  )
461 {
462  char* attr = NULL;
463  int c;
464  int stop;
465  size_t len = 0;
466  size_t size = 0;
467 
468  assert(ppos != NULL);
469 
470  /* The following is not allowed according to the specification (the value has to be directly
471  * after the equation sign). */
472  c = skipSpace(ppos);
473 
474  if ( (c != '"') && (c != '\'') )
475  {
476  xmlError(ppos, "Atribute value does not start with \" or \'");
477  return NULL;
478  }
479  stop = c;
480 
481  for(;;)
482  {
483  if ( len == size )
484  {
485  size += ATTR_EXT_SIZE;
486 
487  if ( attr == NULL )
488  {
489  ALLOC_ABORT( BMSallocMemoryArray(&attr, size) );
490  }
491  else
492  {
493  ALLOC_ABORT( BMSreallocMemoryArray(&attr, size) );
494  }
495  }
496  assert(attr != NULL);
497  assert(size > len);
498 
499  c = getsymbol(ppos);
500 
501  if ( (c == stop) || (c == EOF) )
502  break;
503 
504  attr[len++] = (char)c;
505  }
506 
507  if ( c != EOF )
508  attr[len] = '\0';
509  else
510  {
511  BMSfreeMemoryArray(&attr);
512  attr = NULL;
513  }
514  return attr;
515 }
516 
517 /** Skip comment
518  *
519  * Return FALSE if an error occurs.
520  */
521 static
523  PPOS* ppos
524  )
525 {
526  XML_Bool result = TRUE;
527  int c;
528  int state = 0;
529 
530  assert(ppos != NULL);
531 
532  for(;;)
533  {
534  c = getsymbol(ppos);
535 
536  if ( c == EOF )
537  break;
538 
539  if ( (c == '>') && (state >= 2) )
540  break;
541 
542  state = (c == '-') ? state + 1 : 0;
543  }
544  if ( c == EOF )
545  {
546  xmlError(ppos, "Unexpected EOF in comment");
547  result = FALSE;
548  }
549  return result;
550 }
551 
552 /** Handles a CDATA section.
553  *
554  * Returns a pointer to allocated memory containing the data of this section or NULL in case of an
555  * error.
556  */
557 static
558 char* doCdata(
559  PPOS* ppos
560  )
561 {
562  char* data = NULL;
563  size_t size = 0;
564  size_t len = 0;
565  int state = 0;
566  int c;
567 
568  assert(ppos != NULL);
569 
570  for(;;)
571  {
572  c = getsymbol(ppos);
573 
574  if ( c == EOF )
575  break;
576 
577  if ( c == ']' )
578  state++;
579  else
580  if ( (c == '>') && (state >= 2) )
581  break;
582  else
583  state = 0;
584 
585  if ( len == size )
586  {
587  size += DATA_EXT_SIZE;
588 
589  if ( data == NULL )
590  {
591  ALLOC_ABORT( BMSallocMemoryArray(&data, size) );
592  }
593  else
594  {
595  ALLOC_ABORT( BMSreallocMemoryArray(&data, size) );
596  }
597  }
598  assert(data != NULL);
599  assert(size > len);
600 
601  data[len++] = (char)c;
602  }
603  assert(data != NULL);
604 
605  /*lint --e{527}*/
606  if ( c != EOF )
607  {
608  assert(len >= 2);
609  assert(data != NULL);
610 
611  data[len - 2] = '\0'; /*lint !e413*/
612  }
613  else
614  {
615  BMSfreeMemoryArray(&data);
616  data = NULL;
617  xmlError(ppos, "Unexpected EOF in CDATA");
618  }
619  return data;
620 }
621 
622 /** Handle processing instructions (skipping) */
623 static
624 void handlePi(
625  PPOS* ppos
626  )
627 {
628  int c;
629 
630  assert(ppos != NULL);
631  assert(ppos->state == XML_STATE_BEFORE);
632 
633  do
634  {
635  c = getsymbol(ppos);
636  }
637  while ( (c != EOF) && (c != '>') );
638 
639  if ( c != EOF )
640  ppos->state = XML_STATE_PCDATA;
641  else
642  {
643  xmlError(ppos, "Unexpected EOF in PI");
644  ppos->state = XML_STATE_ERROR;
645  }
646 }
647 
648 /** Handles declarations that start with a <!.
649  *
650  * This includes comments. Does currenlty not work very well, because of DTDs.
651  */
652 static
654  PPOS* ppos
655  )
656 {
657  enum XmlSection
658  {
659  IS_COMMENT,
660  IS_ATTLIST,
661  IS_DOCTYPE,
662  IS_ELEMENT,
663  IS_ENTITY,
664  IS_NOTATION,
665  IS_CDATA
666  };
667  typedef enum XmlSection XMLSECTION;
668 
669  static struct
670  {
671  const char* name;
672  XMLSECTION what;
673  } key[] =
674  {
675  { "--", IS_COMMENT },
676  { "ATTLIST", IS_ATTLIST },
677  { "DOCTYPE", IS_DOCTYPE },
678  { "ELEMENT", IS_ELEMENT },
679  { "ENTITY", IS_ENTITY },
680  { "NOTATION", IS_NOTATION },
681  { "[CDATA[", IS_CDATA }
682  };
683  XML_NODE* node;
684  char* data;
685  int c;
686  int k = 0;
687  int beg = 0;
688  int end;
689 
690  assert(ppos != NULL);
691  assert(ppos->state == XML_STATE_BEFORE);
692 
693  end = (int) (sizeof(key) / sizeof(key[0])) - 1;
694  do
695  {
696  c = getsymbol(ppos);
697 
698  for(; (beg <= end) && (c != key[beg].name[k]); beg++)
699  ;
700  for(; (end >= beg) && (c != key[end].name[k]); end--)
701  ;
702  k++;
703  }
704  while(beg < end);
705 
706  if ( beg != end )
707  {
708  xmlError(ppos, "Unknown declaration");
709 
710  while ( (c != EOF) && (c != '>') )
711  c = getsymbol(ppos);
712  }
713  else
714  {
715  assert(beg == end);
716  assert(beg < (int)(sizeof(key) / sizeof(*key)));
717  assert(beg >= 0);
718 
719  switch(key[beg].what)
720  {
721  case IS_COMMENT :
722  if ( ! doComment(ppos) )
723  ppos->state = XML_STATE_ERROR;
724  break;
725  case IS_CDATA :
726  if ( (data = doCdata(ppos)) == NULL )
727  ppos->state = XML_STATE_ERROR;
728  else
729  {
730  if ( NULL == (node = xmlNewNode("#CDATA", ppos->lineno)) )
731  {
732  xmlError(ppos, "Can't create new node");
733  ppos->state = XML_STATE_ERROR;
734  }
735  else
736  {
737  BMSduplicateMemoryArray(&node->data, data, strlen(data)+1);
738  BMSfreeMemoryArray(&data);
739  xmlAppendChild(topPstack(ppos), node);
740  }
741  }
742  break;
743  case IS_ATTLIST :
744  case IS_ELEMENT :
745  case IS_NOTATION :
746  case IS_ENTITY :
747  case IS_DOCTYPE :
748  break;
749  default :
750  abort();
751  }
752  }
753 }
754 
755 /** Handle end tag */
756 static
758  PPOS* ppos
759  )
760 {
761  char* name;
762  int c;
763 
764  assert(ppos != NULL);
765 
766  if ( (name = getName(ppos)) == NULL )
767  xmlError(ppos, "Missing name in endtag");
768  else
769  {
770  c = skipSpace(ppos);
771 
772  if ( c != '>' )
773  {
774  xmlError(ppos, "Missing '>' in endtag");
775  ppos->state = XML_STATE_ERROR;
776  }
777  else
778  {
779  if ( strcmp(name, topPstack(ppos)->name) )
780  {
781  xmlError(ppos, "Name of endtag does not match starttag");
782  ppos->state = XML_STATE_ERROR;
783  }
784  else
785  {
786  if ( popPstack(ppos) )
787  ppos->state = XML_STATE_PCDATA;
788  else
789  ppos->state = XML_STATE_ERROR;
790  }
791  }
792 
793  BMSfreeMemoryArray(&name);
794  }
795 }
796 
797 /** Handle start tag */
798 static
800  PPOS* ppos
801  )
802 {
803  XML_NODE* node;
804  char* name;
805 
806  assert(ppos != NULL);
807 
808  name = getName(ppos);
809  if ( name == NULL )
810  {
811  xmlError(ppos, "Missing name in tagstart");
812  ppos->state = XML_STATE_ERROR;
813  }
814  else
815  {
816  node = xmlNewNode(name, ppos->lineno);
817  if ( node == NULL )
818  {
819  xmlError(ppos, "Can't create new node");
820  ppos->state = XML_STATE_ERROR;
821  }
822  else
823  {
824  xmlAppendChild(topPstack(ppos), node);
825 
826  if ( pushPstack(ppos, node) )
827  ppos->state = XML_STATE_IN_TAG;
828  else
829  ppos->state = XML_STATE_ERROR;
830  }
831  BMSfreeMemoryArray(&name);
832  }
833 }
834 
835 /** Checks for next tag */
836 static
838  PPOS* ppos /**< input stream position */
839  )
840 {
841  int c;
842 
843  assert(ppos != NULL);
844  assert(ppos->state == XML_STATE_BEFORE);
845 
846  c = skipSpace(ppos);
847 
848  if ( c != '<' )
849  {
850  xmlError(ppos, "Expecting '<'");
851  ppos->state = XML_STATE_ERROR;
852  }
853  else
854  {
855  c = getsymbol(ppos);
856 
857  switch(c)
858  {
859  case EOF :
860  xmlError(ppos, "Unexpected EOF");
861  ppos->state = XML_STATE_ERROR;
862  break;
863  case '!' :
864  handleDecl(ppos);
865  break;
866  case '?' :
867  handlePi(ppos);
868  break;
869  case '/' :
870  handleEndtag(ppos);
871  break;
872  default :
873  ungetsymbol(ppos, c);
874  handleStarttag(ppos);
875  break;
876  }
877  }
878 }
879 
880 /** Process tag */
881 static
883  PPOS* ppos /**< input stream position */
884  )
885 {
886  XML_ATTR* attr;
887  int c;
888  XML_Bool empty = FALSE;
889  char* name;
890  char* value;
891 
892  assert(ppos != NULL);
893  assert(ppos->state == XML_STATE_IN_TAG);
894 
895  c = skipSpace(ppos);
896 
897  if ( (c == '/') || (c == '>') || (c == EOF) )
898  {
899  if ( c == '/' )
900  {
901  empty = TRUE;
902  c = getsymbol(ppos);
903  }
904 
905  if ( c == EOF )
906  {
907  xmlError(ppos, "Unexpected EOF while in a tag");
908  ppos->state = XML_STATE_ERROR;
909  }
910 
911  if ( c == '>' )
912  {
913  ppos->state = XML_STATE_PCDATA;
914 
915  if (empty && ! popPstack(ppos))
916  ppos->state = XML_STATE_ERROR;
917  }
918  else
919  {
920  xmlError(ppos, "Expected tag end marker '>'");
921  ppos->state = XML_STATE_ERROR;
922  }
923  }
924  else
925  {
926  ungetsymbol(ppos, c);
927 
928  name = getName(ppos);
929  if ( name == NULL )
930  {
931  xmlError(ppos, "No name for attribute");
932  ppos->state = XML_STATE_ERROR;
933  }
934  else
935  {
936  c = skipSpace(ppos);
937 
938  if ( (c != '=') || ((value = getAttrval(ppos)) == NULL) )
939  {
940  xmlError(ppos, "Missing attribute value");
941  ppos->state = XML_STATE_ERROR;
942  BMSfreeMemoryArray(&name);
943  }
944  else
945  {
946  attr = xmlNewAttr(name, value);
947  if ( attr == NULL )
948  {
949  xmlError(ppos, "Can't create new attribute");
950  ppos->state = XML_STATE_ERROR;
951  }
952  else
953  {
954  xmlAddAttr(topPstack(ppos), attr);
955  }
956  BMSfreeMemoryArray(&name);
957  BMSfreeMemoryArray(&value);
958  }
959  }
960  }
961 }
962 
963 /* Handles PCDATA */
964 static
966  PPOS* ppos /**< input stream position */
967  )
968 {
969  XML_NODE* node;
970  char* data = NULL;
971  size_t size = 0;
972  size_t len = 0;
973  int c;
974 
975  assert(ppos != NULL);
976  assert(ppos->state == XML_STATE_PCDATA);
977 
978 #ifndef SPEC_LIKE_SPACE_HANDLING
979  c = skipSpace(ppos);
980  if ( c != EOF )
981  ungetsymbol(ppos, c);
982 #endif
983  c = getsymbol(ppos);
984 
985  while ( (c != EOF) && (c != '<') )
986  {
987  if ( len + 1 >= size ) /* leave space for terminating '\0' */
988  {
989  size += DATA_EXT_SIZE;
990 
991  if ( data == NULL )
992  {
993  ALLOC_ABORT( BMSallocMemoryArray(&data, size) );
994  }
995  else
996  {
997  ALLOC_ABORT( BMSreallocMemoryArray(&data, size) );
998  }
999  }
1000  assert(data != NULL);
1001  assert(size > len + 1);
1002 
1003  data[len++] = (char)c;
1004 
1005  c = getsymbol(ppos);
1006  }
1007  if ( data == NULL )
1008  {
1009  if ( c == EOF )
1010  ppos->state = XML_STATE_EOF;
1011  else
1012  {
1013  assert(c == '<');
1014  ppos->state = XML_STATE_BEFORE;
1015  ungetsymbol(ppos, c);
1016  }
1017  }
1018  else
1019  {
1020  assert(len < size);
1021  data[len] = '\0';
1022 
1023  if ( c == EOF )
1024  ppos->state = XML_STATE_ERROR;
1025  else
1026  {
1027  ungetsymbol(ppos, c);
1028 
1029  node = xmlNewNode("#PCDATA", ppos->lineno);
1030  if ( node == NULL )
1031  {
1032  xmlError(ppos, "Can't create new node");
1033  ppos->state = XML_STATE_ERROR;
1034  }
1035  else
1036  {
1037  BMSduplicateMemoryArray(&node->data, data, strlen(data)+1);
1038  xmlAppendChild(topPstack(ppos), node);
1039  ppos->state = XML_STATE_BEFORE;
1040  }
1041  }
1042 
1043  BMSfreeMemoryArray(&data);
1044  }
1045 }
1046 
1047 /** Parse input stream */
1048 static
1050  PPOS* ppos /**< input stream position */
1051  )
1052 {
1053  XML_Bool ok = TRUE;
1054 
1055  while (ok)
1056  {
1057  debugMessage("state=%d\n", ppos->state);
1058 
1059  switch (ppos->state)
1060  {
1061  case XML_STATE_BEFORE :
1062  procBefore(ppos);
1063  break;
1064  case XML_STATE_IN_TAG :
1065  procInTag(ppos);
1066  break;
1067  case XML_STATE_PCDATA :
1068  procPcdata(ppos);
1069  break;
1070  case XML_STATE_EOF :
1071  ok = FALSE;
1072  break;
1073  case XML_STATE_ERROR :
1074  ok = FALSE;
1075  break;
1076  default :
1077  xmlError(ppos, "Internal Error, illegal state");
1078  ok = FALSE;
1079  }
1080  }
1081  return (ppos->state == XML_STATE_EOF);
1082 }
1083 
1084 /** Parse file */
1086  const char* filename /**< XML file name */
1087  )
1088 {
1089  PPOS ppos;
1090  XML_NODE* node = NULL;
1091  XML_ATTR* attr;
1092  XML_Bool result = FALSE;
1093  char* myfilename;
1094  size_t filenamelen;
1095 
1096  /* allocate space and copy filename (possibly modified below) in two steps in order to satisfy valgrind */
1097  assert( filename != NULL );
1098  filenamelen = strlen(filename);
1099  if ( BMSallocMemoryArray(&myfilename, filenamelen + 5) == NULL )
1100  return NULL;
1101  BMScopyMemoryArray(myfilename, filename, filenamelen + 1);
1102 
1103 #ifdef SCIP_WITH_ZLIB
1104  if ( access(filename, R_OK) != 0 )
1105  {
1106  strcat(myfilename, ".gz");
1107 
1108  /* If .gz also does not work, revert to the old name
1109  * to get a better error message.
1110  */
1111  if ( access(myfilename, R_OK) != 0 )
1112  (void)SCIPstrncpy(myfilename, filename, (int)filenamelen + 5);
1113  }
1114 #endif
1115  ppos.fp = FOPEN(myfilename, "r");
1116  if ( ppos.fp == NULL )
1117  perror(myfilename);
1118  else
1119  {
1120  ppos.filename = myfilename;
1121  ppos.buf[0] = '\0';
1122  ppos.pos = 0;
1123  ppos.lineno = 1;
1124  ppos.nextsym = 0;
1125  ppos.lastsym = 0;
1126  ppos.state = XML_STATE_BEFORE;
1127  ppos.top = NULL;
1128 
1129  node = xmlNewNode("#ROOT", ppos.lineno);
1130  if ( node == NULL )
1131  {
1132  xmlError(&ppos, "Can't create new node");
1133  }
1134  else
1135  {
1136  attr = xmlNewAttr("filename", myfilename);
1137  if ( attr == NULL )
1138  xmlError(&ppos, "Can't create new attribute");
1139  else
1140  {
1141  xmlAddAttr(node, attr);
1142 
1143  /* push root node on stack and start to process */
1144  if ( pushPstack(&ppos, node) )
1145  {
1146  result = xmlParse(&ppos);
1147 
1148  clearPstack(&ppos);
1149  }
1150  }
1151  }
1152 
1153  if ( ! result && (node != NULL) )
1154  {
1155  xmlErrmsg(&ppos, "Parsing error, processing stopped", TRUE, __FILE__, __LINE__);
1156  xmlFreeNode(node);
1157  node = NULL;
1158  }
1159  if ( FCLOSE(ppos.fp) )
1160  perror(myfilename);
1161  }
1162  BMSfreeMemoryArray(&myfilename);
1163 
1164  return node;
1165 }
1166 
1167 
1168 
1169 
1170 
1171 
1172 /*----------------------------------------------------------------------------------------------*/
1173 
1174 
1175 /** create new node */
1177  const char* name,
1178  int lineno
1179  )
1180 {
1181  XML_NODE* n = NULL;
1182 
1183  assert(name != NULL);
1184 
1185  if ( BMSallocMemory(&n) != NULL )
1186  {
1187  BMSclearMemory(n);
1188  BMSduplicateMemoryArray(&n->name, name, strlen(name)+1);
1189  n->lineno = lineno;
1190  }
1191  return n;
1192 }
1193 
1194 /** create new attribute */
1196  const char* name,
1197  const char* value
1198  )
1199 {
1200  XML_ATTR* a = NULL;
1201 
1202  assert(name != NULL);
1203  assert(value != NULL);
1204 
1205  if ( BMSallocMemory(&a) != NULL )
1206  {
1207  BMSclearMemory(a);
1208  BMSduplicateMemoryArray(&a->name, name, strlen(name)+1);
1209  BMSduplicateMemoryArray(&a->value, value, strlen(value)+1);
1210  }
1211  return a;
1212 }
1213 
1214 /** add attribute */
1216  XML_NODE* n,
1217  XML_ATTR* a
1218  )
1219 {
1220  assert(n != NULL);
1221  assert(a != NULL);
1222 
1223  a->next = n->attrlist;
1224  n->attrlist = a;
1225 }
1226 
1227 /** append child node */
1229  XML_NODE* parent,
1230  XML_NODE* child
1231  )
1232 {
1233  assert(parent != NULL);
1234  assert(child != NULL);
1235 
1236  child->parent = parent;
1237  child->prevsibl = parent->lastchild;
1238  child->nextsibl = NULL;
1239  parent->lastchild = child;
1240 
1241  if ( child->prevsibl != NULL )
1242  child->prevsibl->nextsibl = child;
1243 
1244  if ( parent->firstchild == NULL )
1245  parent->firstchild = child;
1246 }
1247 
1248 /** free attribute */
1249 static
1251  XML_ATTR* attr
1252  )
1253 {
1254  XML_ATTR* a;
1255 
1256  /* Note: use an iterative implementation instead of a recursive one; the latter is much slower for large instances
1257  * and might overflow the heap. */
1258  a = attr;
1259  while (a != NULL)
1260  {
1261  XML_ATTR* b;
1262  b = a->next;
1263 
1264  assert(a->name != NULL);
1265  assert(a->value != NULL);
1266 
1267  BMSfreeMemoryArray(&a->name);
1268  BMSfreeMemoryArray(&a->value);
1269  BMSfreeMemory(&a);
1270  a = b;
1271  }
1272 }
1273 
1274 /** free node */
1276  XML_NODE* node
1277  )
1278 {
1279  XML_NODE* n;
1280 
1281  if ( node == NULL )
1282  return;
1283 
1284  /* Free data from back to front (because free is faster this way). */
1285  /* Note: use an iterative implementation instead of a recursive one; the latter is much slower for large instances
1286  * and might overflow the heap. */
1287  n = node->lastchild;
1288  while ( n != NULL )
1289  {
1290  XML_NODE* m;
1291  m = n->prevsibl;
1292  xmlFreeNode(n);
1293  n = m;
1294  }
1295 
1296  xmlFreeAttr(node->attrlist);
1297 
1298  if ( node->data != NULL )
1299  {
1300  BMSfreeMemoryArray(&node->data);
1301  }
1302  assert(node->name != NULL);
1303 
1304  BMSfreeMemoryArray(&node->name);
1305  BMSfreeMemory(&node);
1306 }
1307 
1308 /** output node */
1310  const XML_NODE* root
1311  )
1312 {
1313  const XML_NODE* n;
1314  const XML_ATTR* a;
1315 
1316  assert(root != NULL);
1317 
1318  for (n = root; n != NULL; n = n->nextsibl)
1319  {
1320  infoMessage("Name: %s\n", n->name);
1321  infoMessage("Line: %d\n", n->lineno);
1322  infoMessage("Data: %s\n", (n->data != NULL) ? n->data : "***");
1323 
1324  for (a = n->attrlist; a != NULL; a = a->next)
1325  infoMessage("Attr: %s = [%s]\n", a->name, a->value);
1326 
1327  if ( n->firstchild != NULL )
1328  {
1329  infoMessage("->\n");
1330  xmlShowNode(n->firstchild);
1331  infoMessage("<-\n");
1332  }
1333  }
1334 }
1335 
1336 /** get attribute value */
1337 const char* xmlGetAttrval(
1338  const XML_NODE* node,
1339  const char* name
1340  )
1341 {
1342  XML_ATTR* a;
1343 
1344  assert(node != NULL);
1345  assert(name != NULL);
1346 
1347  for (a = node->attrlist; a != NULL; a = a->next)
1348  {
1349  if ( ! strcmp(name, a->name) )
1350  break;
1351  }
1352 
1353 #ifdef SCIP_DEBUG
1354  if (a == NULL)
1355  infoMessage("Error: Attribute %s in TAG <%s> not found\n", name, node->name);
1356 #endif
1357 
1358  return (a == NULL) ? NULL : a->value;
1359 }
1360 
1361 /** return first node */
1363  const XML_NODE* node,
1364  const char* name
1365  )
1366 {
1367  const XML_NODE* n;
1368 
1369  assert(node != NULL);
1370  assert(name != NULL);
1371 
1372  for (n = node; n != NULL; n = n->nextsibl)
1373  {
1374  if ( ! strcmp(name, n->name) )
1375  break;
1376  }
1377 
1378  return n;
1379 }
1380 
1381 /** return next node */
1383  const XML_NODE* node,
1384  const char* name
1385  )
1386 {
1387  assert(node != NULL);
1388  assert(name != NULL);
1389 
1390  return (node->nextsibl == NULL) ? NULL : xmlFirstNode(node->nextsibl, name);
1391 }
1392 
1393 /** find node */
1395  const XML_NODE* node,
1396  const char* name
1397  )
1398 {
1399  const XML_NODE* n;
1400  const XML_NODE* r;
1401 
1402  assert(node != NULL);
1403  assert(name != NULL);
1404 
1405  if ( ! strcmp(name, node->name) )
1406  return node;
1407 
1408  for (n = node->firstchild; n != NULL; n = n->nextsibl)
1409  {
1410  r = xmlFindNode(n, name);
1411  if ( r != NULL )
1412  return r;
1413  }
1414 
1415  return NULL;
1416 }
1417 
1418 /** find node with bound on the depth */
1420  const XML_NODE* node, /**< current node - use start node to begin */
1421  const char* name, /**< name of tag to search for */
1422  int depth, /**< current depth - start with 0 for root */
1423  int maxdepth /**< maximal depth */
1424  )
1425 {
1426  const XML_NODE* n;
1427  const XML_NODE* r;
1428 
1429  assert(node != NULL);
1430  assert(name != NULL);
1431 
1432  if ( ! strcmp(name, node->name) )
1433  return node;
1434 
1435  if ( depth < maxdepth )
1436  {
1437  for (n = node->firstchild; n != NULL; n = n->nextsibl)
1438  {
1439  r = xmlFindNodeMaxdepth(n, name, depth+1, maxdepth);
1440  if ( r != NULL )
1441  return r;
1442  }
1443  }
1444 
1445  return NULL;
1446 }
1447 
1448 /** return next sibling */
1450  const XML_NODE* node
1451  )
1452 {
1453  assert(node != NULL);
1454 
1455  return node->nextsibl;
1456 }
1457 
1458 /** return previous sibling */
1460  const XML_NODE* node
1461  )
1462 {
1463  assert(node != NULL);
1464 
1465  return node->prevsibl;
1466 }
1467 
1468 /** return first child */
1470  const XML_NODE* node
1471  )
1472 {
1473  assert(node != NULL);
1474 
1475  return node->firstchild;
1476 }
1477 
1478 /** return last child */
1480  const XML_NODE* node
1481  )
1482 {
1483  assert(node != NULL);
1484 
1485  return node->lastchild;
1486 }
1487 
1488 /** return name of node */
1489 const char* xmlGetName(
1490  const XML_NODE* node
1491  )
1492 {
1493  assert(node != NULL);
1494 
1495  return node->name;
1496 }
1497 
1498 /** get line number */
1500  const XML_NODE* node
1501  )
1502 {
1503  assert(node != NULL);
1504 
1505  return node->lineno;
1506 }
1507 
1508 /** get data */
1509 const char* xmlGetData(
1510  const XML_NODE* node
1511  )
1512 {
1513  assert(node != NULL);
1514 
1515  return node->data;
1516 }
1517 
1518 /** find PCDATA */
1519 const char* xmlFindPcdata(
1520  const XML_NODE* node,
1521  const char* name
1522  )
1523 {
1524  const XML_NODE* n;
1525 
1526  assert(node != NULL);
1527  assert(name != NULL);
1528 
1529  n = xmlFindNode(node, name);
1530  if ( n == NULL )
1531  return NULL;
1532 
1533  if ( ! strcmp(n->firstchild->name, "#PCDATA") )
1534  return n->firstchild->data;
1535 
1536  return NULL;
1537 }
#define XML_Bool
Definition: xmldef.h:42
#define NULL
Definition: def.h:267
#define LINE_BUF_SIZE
Definition: xmlparse.c:67
const XML_NODE * xmlFirstNode(const XML_NODE *node, const char *name)
Definition: xmlparse.c:1362
static void handleDecl(PPOS *ppos)
Definition: xmlparse.c:653
PSTACK * next
Definition: xmlparse.c:91
const char * xmlFindPcdata(const XML_NODE *node, const char *name)
Definition: xmlparse.c:1519
void xmlFreeNode(XML_NODE *node)
Definition: xmlparse.c:1275
#define FREAD(buf, len, fp)
Definition: xmldef.h:63
static int getsymbol(PPOS *ppos)
Definition: xmlparse.c:321
#define ALLOC_ABORT(x)
Definition: tclique_def.h:50
#define FCLOSE(fp)
Definition: xmldef.h:61
struct XML_ATTR_struct XML_ATTR
Definition: xml.h:41
static char * doCdata(PPOS *ppos)
Definition: xmlparse.c:558
static void xmlFreeAttr(XML_ATTR *attr)
Definition: xmlparse.c:1250
#define FALSE
Definition: def.h:94
XML_NODE * xmlNewNode(const char *name, int lineno)
Definition: xmlparse.c:1176
#define TRUE
Definition: def.h:93
const char * xmlGetName(const XML_NODE *node)
Definition: xmlparse.c:1489
#define BMSallocMemoryArray(ptr, num)
Definition: memory.h:123
#define DATA_EXT_SIZE
Definition: xmlparse.c:66
enum parse_state_enum PSTATE
Definition: xmlparse.c:85
#define ATTR_EXT_SIZE
Definition: xmlparse.c:65
XML_ATTR * xmlNewAttr(const char *name, const char *value)
Definition: xmlparse.c:1195
#define BMSfreeMemory(ptr)
Definition: memory.h:145
void xmlShowNode(const XML_NODE *root)
Definition: xmlparse.c:1309
int SCIPstrncpy(char *t, const char *s, int size)
Definition: misc.c:10919
static void procInTag(PPOS *ppos)
Definition: xmlparse.c:882
const char * xmlGetData(const XML_NODE *node)
Definition: xmlparse.c:1509
#define debugMessage
Definition: tclique_def.h:80
static void xmlErrmsg(PPOS *ppos, const char *msg, XML_Bool msg_only, const char *file, int line)
Definition: xmlparse.c:110
static void procPcdata(PPOS *ppos)
Definition: xmlparse.c:965
static int mygetc(PPOS *ppos)
Definition: xmlparse.c:247
PSTACK * top
Definition: xmlparse.c:105
static void ungetsymbol(PPOS *ppos, int c)
Definition: xmlparse.c:357
#define BMSfreeMemoryArray(ptr)
Definition: memory.h:147
parse_state_enum
Definition: xmlparse.c:77
static XML_Bool popPstack(PPOS *ppos)
Definition: xmlparse.c:207
struct XML_NODE_struct XML_NODE
Definition: xml.h:50
static XML_Bool doComment(PPOS *ppos)
Definition: xmlparse.c:522
#define ALLOC_FALSE(x)
Definition: tclique_def.h:62
internal miscellaneous methods
const XML_NODE * xmlFindNode(const XML_NODE *node, const char *name)
Definition: xmlparse.c:1394
const char * xmlGetAttrval(const XML_NODE *node, const char *name)
Definition: xmlparse.c:1337
const XML_NODE * xmlNextNode(const XML_NODE *node, const char *name)
Definition: xmlparse.c:1382
static XML_Bool xmlParse(PPOS *ppos)
Definition: xmlparse.c:1049
#define BMSduplicateMemoryArray(ptr, source, num)
Definition: memory.h:143
const XML_NODE * xmlFirstChild(const XML_NODE *node)
Definition: xmlparse.c:1469
XML_NODE * xmlProcess(const char *filename)
Definition: xmlparse.c:1085
static int skipSpace(PPOS *ppos)
Definition: xmlparse.c:370
static XML_NODE * topPstack(const PPOS *ppos)
Definition: xmlparse.c:192
static char * getAttrval(PPOS *ppos)
Definition: xmlparse.c:458
void xmlAddAttr(XML_NODE *n, XML_ATTR *a)
Definition: xmlparse.c:1215
#define FGETS(buf, len, fp)
Definition: xmldef.h:62
#define BMScopyMemoryArray(ptr, source, num)
Definition: memory.h:134
#define FOPEN(file, mode)
Definition: xmldef.h:60
#define infoMessage
Definition: tclique_def.h:86
#define FPTYPE
Definition: xmldef.h:64
#define xmlError(a, b)
Definition: xmlparse.c:69
#define BMSclearMemory(ptr)
Definition: memory.h:129
static char * getName(PPOS *ppos)
Definition: xmlparse.c:393
XML_NODE * node
Definition: xmlparse.c:90
SCIP_Real * r
Definition: circlepacking.c:59
SCIP_VAR ** b
Definition: circlepacking.c:65
const XML_NODE * xmlNextSibl(const XML_NODE *node)
Definition: xmlparse.c:1449
char buf[LINE_BUF_SIZE]
Definition: xmlparse.c:99
const char * filename
Definition: xmlparse.c:97
SCIP_VAR * a
Definition: circlepacking.c:66
static XML_Bool pushPstack(PPOS *ppos, XML_NODE *node)
Definition: xmlparse.c:169
#define BMSallocMemory(ptr)
Definition: memory.h:118
#define BMSreallocMemoryArray(ptr, num)
Definition: memory.h:127
declarations for XML parsing
#define NAME_EXT_SIZE
Definition: xmlparse.c:64
const XML_NODE * xmlPrevSibl(const XML_NODE *node)
Definition: xmlparse.c:1459
const XML_NODE * xmlLastChild(const XML_NODE *node)
Definition: xmlparse.c:1479
const XML_NODE * xmlFindNodeMaxdepth(const XML_NODE *node, const char *name, int depth, int maxdepth)
Definition: xmlparse.c:1419
static void handlePi(PPOS *ppos)
Definition: xmlparse.c:624
int xmlGetLine(const XML_NODE *node)
Definition: xmlparse.c:1499
void xmlAppendChild(XML_NODE *parent, XML_NODE *child)
Definition: xmlparse.c:1228
static void handleStarttag(PPOS *ppos)
Definition: xmlparse.c:799
static void procBefore(PPOS *ppos)
Definition: xmlparse.c:837
static void handleEndtag(PPOS *ppos)
Definition: xmlparse.c:757
static void clearPstack(PPOS *ppos)
Definition: xmlparse.c:235
memory allocation routines
definitions for XML parsing