token.c

Go to the documentation of this file.
00001 /*
00002 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
00003 %                                                                             %
00004 %                                                                             %
00005 %                                                                             %
00006 %                    TTTTT   OOO   K   K  EEEEE  N   N                        %
00007 %                      T    O   O  K  K   E      NN  N                        %
00008 %                      T    O   O  KKK    EEE    N N N                        %
00009 %                      T    O   O  K  K   E      N  NN                        %
00010 %                      T     OOO   K   K  EEEEE  N   N                        %
00011 %                                                                             %
00012 %                                                                             %
00013 %                         MagickCore Token Methods                            %
00014 %                                                                             %
00015 %                             Software Design                                 %
00016 %                               John Cristy                                   %
00017 %                              January 1993                                   %
00018 %                                                                             %
00019 %                                                                             %
00020 %  Copyright 1999-2008 ImageMagick Studio LLC, a non-profit organization      %
00021 %  dedicated to making software imaging solutions freely available.           %
00022 %                                                                             %
00023 %  You may not use this file except in compliance with the License.  You may  %
00024 %  obtain a copy of the License at                                            %
00025 %                                                                             %
00026 %    http://www.imagemagick.org/script/license.php                            %
00027 %                                                                             %
00028 %  Unless required by applicable law or agreed to in writing, software        %
00029 %  distributed under the License is distributed on an "AS IS" BASIS,          %
00030 %  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.   %
00031 %  See the License for the specific language governing permissions and        %
00032 %  limitations under the License.                                             %
00033 %                                                                             %
00034 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
00035 %
00036 %
00037 %
00038 */
00039 
00040 /*
00041   Include declarations.
00042 */
00043 #include "magick/studio.h"
00044 #include "magick/exception.h"
00045 #include "magick/exception-private.h"
00046 #include "magick/image.h"
00047 #include "magick/memory_.h"
00048 #include "magick/string_.h"
00049 #include "magick/token.h"
00050 #include "magick/utility.h"
00051 
00052 /*
00053   Typedef declaractions.
00054 */
00055 struct _TokenInfo
00056 {
00057   int
00058     state;
00059 
00060   MagickStatusType
00061     flag;
00062 
00063   long
00064     offset;
00065 
00066   char
00067     quote;
00068 
00069   unsigned long
00070     signature;
00071 };
00072 
00073 /*
00074 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
00075 %                                                                             %
00076 %                                                                             %
00077 %                                                                             %
00078 %   A c q u i r e T o k e n I n f o                                           %
00079 %                                                                             %
00080 %                                                                             %
00081 %                                                                             %
00082 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
00083 %
00084 %  AcquireTokenInfo() allocates the TokenInfo structure.
00085 %
00086 %  The format of the AcquireTokenInfo method is:
00087 %
00088 %      TokenInfo *AcquireTokenInfo()
00089 %
00090 */
00091 MagickExport TokenInfo *AcquireTokenInfo(void)
00092 {
00093   TokenInfo
00094     *token_info;
00095 
00096   token_info=(TokenInfo *) AcquireMagickMemory(sizeof(*token_info));
00097   if (token_info == (TokenInfo *) NULL)
00098     ThrowFatalException(ResourceLimitFatalError,"MemoryAllocationFailed");
00099   token_info->signature=MagickSignature;
00100   return(token_info);
00101 }
00102 
00103 /*
00104 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
00105 %                                                                             %
00106 %                                                                             %
00107 %                                                                             %
00108 %   D e s t r o y T o k e n I n f o                                           %
00109 %                                                                             %
00110 %                                                                             %
00111 %                                                                             %
00112 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
00113 %
00114 %  DestroyTokenInfo() deallocates memory associated with an TokenInfo
00115 %  structure.
00116 %
00117 %  The format of the DestroyTokenInfo method is:
00118 %
00119 %      TokenInfo *DestroyTokenInfo(TokenInfo *token_info)
00120 %
00121 %  A description of each parameter follows:
00122 %
00123 %    o token_info: Specifies a pointer to an TokenInfo structure.
00124 %
00125 */
00126 MagickExport TokenInfo *DestroyTokenInfo(TokenInfo *token_info)
00127 {
00128   (void) LogMagickEvent(TraceEvent,GetMagickModule(),"...");
00129   assert(token_info != (TokenInfo *) NULL);
00130   assert(token_info->signature == MagickSignature);
00131   token_info->signature=(~MagickSignature);
00132   token_info=(TokenInfo *) RelinquishMagickMemory(token_info);
00133   return(token_info);
00134 }
00135 
00136 /*
00137 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
00138 %                                                                             %
00139 %                                                                             %
00140 %                                                                             %
00141 +   G e t M a g i c k T o k e n                                               %
00142 %                                                                             %
00143 %                                                                             %
00144 %                                                                             %
00145 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
00146 %
00147 %  GetMagickToken() gets a token from the token stream.  A token is defined as a
00148 %  sequence of characters delimited by whitespace (e.g. clip-path), a sequence
00149 %  delimited with quotes (.e.g "Quote me"), or a sequence enclosed in
00150 %  parenthesis (e.g. rgb(0,0,0)).
00151 %
00152 %  The format of the GetMagickToken method is:
00153 %
00154 %      void GetMagickToken(const char *start,const char **end,char *token)
00155 %
00156 %  A description of each parameter follows:
00157 %
00158 %    o start: the start of the token sequence.
00159 %
00160 %    o end: point to the end of the token sequence.
00161 %
00162 %    o token: copy the token to this buffer.
00163 %
00164 */
00165 MagickExport void GetMagickToken(const char *start,const char **end,char *token)
00166 {
00167   register const char
00168     *p;
00169 
00170   register long
00171     i;
00172 
00173   i=0;
00174   for (p=start; *p != '\0'; )
00175   {
00176     while ((isspace((int) ((unsigned char) *p)) != 0) && (*p != '\0'))
00177       p++;
00178     if (*p == '\0')
00179       break;
00180     switch (*p)
00181     {
00182       case '"':
00183       case '\'':
00184       case '`':
00185       case '{':
00186       {
00187         register char
00188           escape;
00189 
00190         switch (*p)
00191         {
00192           case '"': escape='"'; break;
00193           case '\'': escape='\''; break;
00194           case '`': escape='\''; break;
00195           case '{': escape='}'; break;
00196           default: escape=(*p); break;
00197         }
00198         for (p++; *p != '\0'; p++)
00199         {
00200           if ((*p == '\\') && ((*(p+1) == escape) || (*(p+1) == '\\')))
00201             p++;
00202           else
00203             if (*p == escape)
00204               {
00205                 p++;
00206                 break;
00207               }
00208           token[i++]=(*p);
00209         }
00210         break;
00211       }
00212       case '/':
00213       {
00214         token[i++]=(*p++);
00215         if ((*p == '>') || (*p == '/'))
00216           token[i++]=(*p++);
00217         break;
00218       }
00219       default:
00220       {
00221         char
00222           *q;
00223 
00224         (void) strtod(p,&q);
00225         if (p != q)
00226           {
00227             for ( ; p < q; p++)
00228               token[i++]=(*p);
00229             if (*p == '%')
00230               token[i++]=(*p++);
00231             break;
00232           }
00233         if ((isalpha((int) ((unsigned char) *p)) == 0) &&
00234             (*p != *DirectorySeparator) && (*p != '#') && (*p != '<'))
00235           {
00236             token[i++]=(*p++);
00237             break;
00238           }
00239         for ( ; *p != '\0'; p++)
00240         {
00241           if (((isspace((int) ((unsigned char) *p)) != 0) || (*p == '=') ||
00242               (*p == ',') || (*p == ':')) && (*(p-1) != '\\'))
00243             break;
00244           if ((i > 0) && (*p == '<'))
00245             break;
00246           token[i++]=(*p);
00247           if (*p == '>')
00248             break;
00249           if (*p == '(')
00250             for (p++; *p != '\0'; p++)
00251             {
00252               token[i++]=(*p);
00253               if ((*p == ')') && (*(p-1) != '\\'))
00254                 break;
00255             }
00256         }
00257         break;
00258       }
00259     }
00260     break;
00261   }
00262   token[i]='\0';
00263   if (LocaleNCompare(token,"url(",4) == 0)
00264     {
00265       ssize_t
00266         offset;
00267 
00268       offset=4;
00269       if (token[offset] == '#')
00270         offset++;
00271       i=(long) strlen(token);
00272       (void) CopyMagickString(token,token+offset,MaxTextExtent);
00273       token[i-offset-1]='\0';
00274     }
00275   while (isspace((int) ((unsigned char) *p)) != 0)
00276     p++;
00277   if (end != (const char **) NULL)
00278     *end=(const char *) p;
00279 }
00280 
00281 /*
00282 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
00283 %                                                                             %
00284 %                                                                             %
00285 %                                                                             %
00286 %   G l o b E x p r e s s i o n                                               %
00287 %                                                                             %
00288 %                                                                             %
00289 %                                                                             %
00290 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
00291 %
00292 %  GlobExpression() returns MagickTrue if the expression matches the pattern.
00293 %
00294 %  The format of the GlobExpression function is:
00295 %
00296 %      MagickBooleanType GlobExpression(const char *expression,
00297 %        const char *pattern,const MagickBooleanType case_insensitive)
00298 %
00299 %  A description of each parameter follows:
00300 %
00301 %    o expression: Specifies a pointer to a text string containing a file name.
00302 %
00303 %    o pattern: Specifies a pointer to a text string containing a pattern.
00304 %
00305 %    o case_insensitive: set to MagickTrue to ignore the case when matching
00306 %      an expression.
00307 %
00308 */
00309 MagickExport MagickBooleanType GlobExpression(const char *expression,
00310   const char *pattern,const MagickBooleanType case_insensitive)
00311 {
00312   MagickBooleanType
00313     done,
00314     match;
00315 
00316   register const char
00317     *p;
00318 
00319   /*
00320     Return on empty pattern or '*'.
00321   */
00322   if (pattern == (char *) NULL)
00323     return(MagickTrue);
00324   if (*pattern == '\0')
00325     return(MagickTrue);
00326   if (LocaleCompare(pattern,"*") == 0)
00327     return(MagickTrue);
00328   p=pattern+strlen(pattern)-1;
00329   if ((*p == ']') && (strchr(pattern,'[') != (char *) NULL))
00330     {
00331       ExceptionInfo
00332         *exception;
00333 
00334       ImageInfo
00335         *image_info;
00336 
00337       /*
00338         Determine if pattern is a scene, i.e. img0001.pcd[2].
00339       */
00340       image_info=AcquireImageInfo();
00341       (void) CopyMagickString(image_info->filename,pattern,MaxTextExtent);
00342       exception=AcquireExceptionInfo();
00343       (void) SetImageInfo(image_info,MagickTrue,exception);
00344       exception=DestroyExceptionInfo(exception);
00345       if (LocaleCompare(image_info->filename,pattern) != 0)
00346         {
00347           image_info=DestroyImageInfo(image_info);
00348           return(MagickFalse);
00349         }
00350       image_info=DestroyImageInfo(image_info);
00351     }
00352   /*
00353     Evaluate glob expression.
00354   */
00355   done=MagickFalse;
00356   while ((*pattern != '\0') && (done == MagickFalse))
00357   {
00358     if (*expression == '\0')
00359       if ((*pattern != '{') && (*pattern != '*'))
00360         break;
00361     switch (*pattern)
00362     {
00363       case '\\':
00364       {
00365         pattern++;
00366         if (*pattern != '\0')
00367           pattern++;
00368         break;
00369       }
00370       case '*':
00371       {
00372         MagickBooleanType
00373           status;
00374 
00375         pattern++;
00376         status=MagickFalse;
00377         while ((*expression != '\0') && (status == MagickFalse))
00378           status=GlobExpression(expression++,pattern,case_insensitive);
00379         if (status != MagickFalse)
00380           {
00381             while (*expression != '\0')
00382               expression++;
00383             while (*pattern != '\0')
00384               pattern++;
00385           }
00386         break;
00387       }
00388       case '[':
00389       {
00390         char
00391           c;
00392 
00393         pattern++;
00394         for ( ; ; )
00395         {
00396           if ((*pattern == '\0') || (*pattern == ']'))
00397             {
00398               done=MagickTrue;
00399               break;
00400             }
00401           if (*pattern == '\\')
00402             {
00403               pattern++;
00404               if (*pattern == '\0')
00405                 {
00406                   done=MagickTrue;
00407                   break;
00408                 }
00409              }
00410           if (*(pattern+1) == '-')
00411             {
00412               c=(*pattern);
00413               pattern+=2;
00414               if (*pattern == ']')
00415                 {
00416                   done=MagickTrue;
00417                   break;
00418                 }
00419               if (*pattern == '\\')
00420                 {
00421                   pattern++;
00422                   if (*pattern == '\0')
00423                     {
00424                       done=MagickTrue;
00425                       break;
00426                     }
00427                 }
00428               if ((*expression < c) || (*expression > *pattern))
00429                 {
00430                   pattern++;
00431                   continue;
00432                 }
00433             }
00434           else
00435             if (*pattern != *expression)
00436               {
00437                 pattern++;
00438                 continue;
00439               }
00440           pattern++;
00441           while ((*pattern != ']') && (*pattern != '\0'))
00442           {
00443             if ((*pattern == '\\') && (*(pattern+1) != '\0'))
00444               pattern++;
00445             pattern++;
00446           }
00447           if (*pattern != '\0')
00448             {
00449               pattern++;
00450               expression++;
00451             }
00452           break;
00453         }
00454         break;
00455       }
00456       case '?':
00457       {
00458         pattern++;
00459         expression++;
00460         break;
00461       }
00462       case '{':
00463       {
00464         register const char
00465           *p;
00466 
00467         pattern++;
00468         while ((*pattern != '}') && (*pattern != '\0'))
00469         {
00470           p=expression;
00471           match=MagickTrue;
00472           while ((*p != '\0') && (*pattern != '\0') &&
00473                  (*pattern != ',') && (*pattern != '}') &&
00474                  (match != MagickFalse))
00475           {
00476             if (*pattern == '\\')
00477               pattern++;
00478             match=(*pattern == *p) ? MagickTrue : MagickFalse;
00479             p++;
00480             pattern++;
00481           }
00482           if (*pattern == '\0')
00483             {
00484               match=MagickFalse;
00485               done=MagickTrue;
00486               break;
00487             }
00488           else
00489             if (match != MagickFalse)
00490               {
00491                 expression=p;
00492                 while ((*pattern != '}') && (*pattern != '\0'))
00493                 {
00494                   pattern++;
00495                   if (*pattern == '\\')
00496                     {
00497                       pattern++;
00498                       if (*pattern == '}')
00499                         pattern++;
00500                     }
00501                 }
00502               }
00503             else
00504               {
00505                 while ((*pattern != '}') && (*pattern != ',') &&
00506                        (*pattern != '\0'))
00507                 {
00508                   pattern++;
00509                   if (*pattern == '\\')
00510                     {
00511                       pattern++;
00512                       if ((*pattern == '}') || (*pattern == ','))
00513                         pattern++;
00514                     }
00515                 }
00516               }
00517             if (*pattern != '\0')
00518               pattern++;
00519           }
00520         break;
00521       }
00522       default:
00523       {
00524         if (case_insensitive != MagickFalse)
00525           {
00526             if (tolower((int) ((unsigned char) *expression)) !=
00527                 tolower((int) ((unsigned char) *pattern)))
00528               {
00529                 done=MagickTrue;
00530                 break;
00531               }
00532           }
00533         else
00534           if (*expression != *pattern)
00535             {
00536               done=MagickTrue;
00537               break;
00538             }
00539         expression++;
00540         pattern++;
00541       }
00542     }
00543   }
00544   while (*pattern == '*') 
00545     pattern++;
00546   match=(*expression == '\0') && (*pattern == '\0') ? MagickTrue : MagickFalse;
00547   return(match);
00548 }
00549 
00550 /*
00551 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
00552 %                                                                             %
00553 %                                                                             %
00554 %                                                                             %
00555 +     I s G l o b                                                             %
00556 %                                                                             %
00557 %                                                                             %
00558 %                                                                             %
00559 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
00560 %
00561 %  IsGlob() returns MagickTrue if the path specification contains a globbing
00562 %  pattern.
00563 %
00564 %  The format of the IsGlob method is:
00565 %
00566 %      MagickBooleanType IsGlob(const char *geometry)
00567 %
00568 %  A description of each parameter follows:
00569 %
00570 %    o path: the path.
00571 %
00572 */
00573 MagickExport MagickBooleanType IsGlob(const char *path)
00574 {
00575   MagickBooleanType
00576     status;
00577 
00578   if (IsPathAccessible(path) != MagickFalse)
00579     return(MagickFalse);
00580   status=(strchr(path,'*') != (char *) NULL) ||
00581     (strchr(path,'?') != (char *) NULL) ||
00582     (strchr(path,'{') != (char *) NULL) ||
00583     (strchr(path,'}') != (char *) NULL) ||
00584     (strchr(path,'[') != (char *) NULL) ||
00585     (strchr(path,']') != (char *) NULL) ? MagickTrue : MagickFalse;
00586   return(status);
00587 }
00588 
00589 /*
00590 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
00591 %                                                                             %
00592 %                                                                             %
00593 %                                                                             %
00594 %   T o k e n i z e r                                                         %
00595 %                                                                             %
00596 %                                                                             %
00597 %                                                                             %
00598 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
00599 %
00600 %  Tokenizer() is a generalized, finite state token parser.  It extracts tokens
00601 %  one at a time from a string of characters.  The characters used for white
00602 %  space, for break characters, and for quotes can be specified.  Also,
00603 %  characters in the string can be preceded by a specifiable escape character
00604 %  which removes any special meaning the character may have.
00605 %
00606 %  Here is some terminology:
00607 %
00608 %    o token: A single unit of information in the form of a group of
00609 %      characters.
00610 %
00611 %    o white space: Apace that gets ignored (except within quotes or when
00612 %      escaped), like blanks and tabs. in addition, white space terminates a
00613 %      non-quoted token.
00614 %
00615 %    o break set: One or more characters that separates non-quoted tokens.
00616 %      Commas are a common break character. The usage of break characters to
00617 %      signal the end of a token is the same as that of white space, except
00618 %      multiple break characters with nothing or only white space between
00619 %      generate a null token for each two break characters together.
00620 %
00621 %      For example, if blank is set to be the white space and comma is set to
00622 %      be the break character, the line
00623 %
00624 %        A, B, C ,  , DEF
00625 %
00626 %        ... consists of 5 tokens:
00627 %
00628 %        1)  "A"
00629 %        2)  "B"
00630 %        3)  "C"
00631 %        4)  "" (the null string)
00632 %        5)  "DEF"
00633 %
00634 %    o Quote character: A character that, when surrounding a group of other
00635 %      characters, causes the group of characters to be treated as a single
00636 %      token, no matter how many white spaces or break characters exist in
00637 %      the group. Also, a token always terminates after the closing quote.
00638 %      For example, if ' is the quote character, blank is white space, and
00639 %      comma is the break character, the following string
00640 %
00641 %        A, ' B, CD'EF GHI
00642 %
00643 %        ... consists of 4 tokens:
00644 %
00645 %        1)  "A"
00646 %        2)  " B, CD" (note the blanks & comma)
00647 %        3)  "EF"
00648 %        4)  "GHI"
00649 %
00650 %      The quote characters themselves do not appear in the resultant
00651 %      tokens.  The double quotes are delimiters i use here for
00652 %      documentation purposes only.
00653 %
00654 %    o Escape character: A character which itself is ignored but which
00655 %      causes the next character to be used as is.  ^ and \ are often used
00656 %      as escape characters. An escape in the last position of the string
00657 %      gets treated as a "normal" (i.e., non-quote, non-white, non-break,
00658 %      and non-escape) character. For example, assume white space, break
00659 %      character, and quote are the same as in the above examples, and
00660 %      further, assume that ^ is the escape character. Then, in the string
00661 %
00662 %        ABC, ' DEF ^' GH' I ^ J K^ L ^
00663 %
00664 %        ... there are 7 tokens:
00665 %
00666 %        1)  "ABC"
00667 %        2)  " DEF ' GH"
00668 %        3)  "I"
00669 %        4)  " "     (a lone blank)
00670 %        5)  "J"
00671 %        6)  "K L"
00672 %        7)  "^"     (passed as is at end of line)
00673 %
00674 %  The format of the Tokenizer method is:
00675 %
00676 %      int Tokenizer(TokenInfo *token_info,const unsigned flag,char *token,
00677 %        const size_t max_token_length,const char *line,const char *white,
00678 %        const char *break_set,const char *quote,const char escape,
00679 %        char *breaker,int *next,char *quoted)
00680 %
00681 %  A description of each parameter follows:
00682 %
00683 %    o flag: right now, only the low order 3 bits are used.
00684 %
00685 %        1 => convert non-quoted tokens to upper case
00686 %        2 => convert non-quoted tokens to lower case
00687 %        0 => do not convert non-quoted tokens
00688 %
00689 %    o token: a character string containing the returned next token
00690 %
00691 %    o max_token_length: the maximum size of "token".  Characters beyond
00692 %      "max_token_length" are truncated.
00693 %
00694 %    o string: the string to be parsed.
00695 %
00696 %    o white: a string of the valid white spaces.  example:
00697 %
00698 %        char whitesp[]={" \t"};
00699 %
00700 %      blank and tab will be valid white space.
00701 %
00702 %    o break: a string of the valid break characters. example:
00703 %
00704 %        char breakch[]={";,"};
00705 %
00706 %      semicolon and comma will be valid break characters.
00707 %
00708 %    o quote: a string of the valid quote characters. An example would be
00709 %
00710 %        char whitesp[]={"'\"");
00711 %
00712 %      (this causes single and double quotes to be valid) Note that a
00713 %      token starting with one of these characters needs the same quote
00714 %      character to terminate it.
00715 %
00716 %      for example:
00717 %
00718 %        "ABC '
00719 %
00720 %      is unterminated, but
00721 %
00722 %        "DEF" and 'GHI'
00723 %
00724 %      are properly terminated.  Note that different quote characters
00725 %      can appear on the same line; only for a given token do the quote
00726 %      characters have to be the same.
00727 %
00728 %    o escape: the escape character (NOT a string ... only one
00729 %      allowed). Use zero if none is desired.
00730 %
00731 %    o breaker: the break character used to terminate the current
00732 %      token.  If the token was quoted, this will be the quote used.  If
00733 %      the token is the last one on the line, this will be zero.
00734 %
00735 %    o next: this variable points to the first character of the
00736 %      next token.  it gets reset by "tokenizer" as it steps through the
00737 %      string.  Set it to 0 upon initialization, and leave it alone
00738 %      after that.  You can change it if you want to jump around in the
00739 %      string or re-parse from the beginning, but be careful.
00740 %
00741 %    o quoted: set to True if the token was quoted and MagickFalse
00742 %      if not.  You may need this information (for example:  in C, a
00743 %      string with quotes around it is a character string, while one
00744 %      without is an identifier).
00745 %
00746 %    o result: 0 if we haven't reached EOS (end of string), and 1
00747 %      if we have.
00748 %
00749 */
00750 
00751 #define IN_WHITE 0
00752 #define IN_TOKEN 1
00753 #define IN_QUOTE 2
00754 #define IN_OZONE 3
00755 
00756 static long sindex(int c,const char *string)
00757 {
00758   register const char
00759     *p;
00760 
00761   for (p=string; *p != '\0'; p++)
00762     if (c == (int) (*p))
00763       return(p-string);
00764   return(-1);
00765 }
00766 
00767 static void StoreToken(TokenInfo *token_info,char *string,
00768   size_t max_token_length,int c)
00769 {
00770   register long
00771     i;
00772 
00773   if ((token_info->offset < 0) ||
00774       ((size_t) token_info->offset >= (max_token_length-1)))
00775     return;
00776   i=token_info->offset++;
00777   string[i]=(char) c;
00778   if (token_info->state == IN_QUOTE)
00779     return;
00780   switch (token_info->flag & 0x03)
00781   {
00782     case 1:
00783     {
00784       string[i]=(char) toupper(c);
00785       break;
00786     }
00787     case 2:
00788     {
00789       string[i]=(char) tolower(c);
00790       break;
00791     }
00792     default:
00793       break;
00794   }
00795 }
00796 
00797 MagickExport int Tokenizer(TokenInfo *token_info,const unsigned flag,
00798   char *token,const size_t max_token_length,const char *line,const char *white,
00799   const char *break_set,const char *quote,const char escape,char *breaker,
00800   int *next,char *quoted)
00801 {
00802   int
00803     c;
00804 
00805   register long
00806     i;
00807 
00808   *breaker='\0';
00809   *quoted='\0';
00810   if (line[*next] == '\0')
00811     return(1);
00812   token_info->state=IN_WHITE;
00813   token_info->quote=(char) MagickFalse;
00814   token_info->flag=flag;
00815   for (token_info->offset=0; (int) line[*next] != 0; (*next)++)
00816   {
00817     c=(int) line[*next];
00818     i=sindex(c,break_set);
00819     if (i >= 0)
00820       {
00821         switch (token_info->state)
00822         {
00823           case IN_WHITE:
00824           case IN_TOKEN:
00825           case IN_OZONE:
00826           {
00827             (*next)++;
00828             *breaker=break_set[i];
00829             token[token_info->offset]='\0';
00830             return(0);
00831           }
00832           case IN_QUOTE:
00833           {
00834             StoreToken(token_info,token,max_token_length,c);
00835             break;
00836           }
00837         }
00838         continue;
00839       }
00840     i=sindex(c,quote);
00841     if (i >= 0)
00842       {
00843         switch (token_info->state)
00844         {
00845           case IN_WHITE:
00846           {
00847             token_info->state=IN_QUOTE;
00848             token_info->quote=quote[i];
00849             *quoted=(char) MagickTrue;
00850             break;
00851           }
00852           case IN_QUOTE:
00853           {
00854             if (quote[i] != token_info->quote)
00855               StoreToken(token_info,token,max_token_length,c);
00856             else
00857               {
00858                 token_info->state=IN_OZONE;
00859                 token_info->quote='\0';
00860               }
00861             break;
00862           }
00863           case IN_TOKEN:
00864           case IN_OZONE:
00865           {
00866             *breaker=(char) c;
00867             token[token_info->offset]='\0';
00868             return(0);
00869           }
00870         }
00871         continue;
00872       }
00873     i=sindex(c,white);
00874     if (i >= 0)
00875       {
00876         switch (token_info->state)
00877         {
00878           case IN_WHITE:
00879           case IN_OZONE:
00880             break;
00881           case IN_TOKEN:
00882           {
00883             token_info->state=IN_OZONE;
00884             break;
00885           }
00886           case IN_QUOTE:
00887           {
00888             StoreToken(token_info,token,max_token_length,c);
00889             break;
00890           }
00891         }
00892         continue;
00893       }
00894     if (c == (int) escape)
00895       {
00896         if (line[(*next)+1] == '\0')
00897           {
00898             *breaker='\0';
00899             StoreToken(token_info,token,max_token_length,c);
00900             (*next)++;
00901             token[token_info->offset]='\0';
00902             return(0);
00903           }
00904         switch (token_info->state)
00905         {
00906           case IN_WHITE:
00907           {
00908             (*next)--;
00909             token_info->state=IN_TOKEN;
00910             break;
00911           }
00912           case IN_TOKEN:
00913           case IN_QUOTE:
00914           {
00915             (*next)++;
00916             c=(int) line[*next];
00917             StoreToken(token_info,token,max_token_length,c);
00918             break;
00919           }
00920           case IN_OZONE:
00921           {
00922             token[token_info->offset]='\0';
00923             return(0);
00924           }
00925         }
00926         continue;
00927       }
00928     switch (token_info->state)
00929     {
00930       case IN_WHITE:
00931         token_info->state=IN_TOKEN;
00932       case IN_TOKEN:
00933       case IN_QUOTE:
00934       {
00935         StoreToken(token_info,token,max_token_length,c);
00936         break;
00937       }
00938       case IN_OZONE:
00939       {
00940         token[token_info->offset]='\0';
00941         return(0);
00942       }
00943     }
00944   }
00945   token[token_info->offset]='\0';
00946   return(0);
00947 }

Generated on Sat Nov 22 23:45:18 2008 for MagickCore by  doxygen 1.5.7.1