token.c

Go to the documentation of this file.
00001 /*
00002 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
00003 %                                                                             %
00004 %                                                                             %
00005 %                                                                             %
00006 %                    TTTTT   OOO   K   K  EEEEE  N   N                        %
00007 %                      T    O   O  K  K   E      NN  N                        %
00008 %                      T    O   O  KKK    EEE    N N N                        %
00009 %                      T    O   O  K  K   E      N  NN                        %
00010 %                      T     OOO   K   K  EEEEE  N   N                        %
00011 %                                                                             %
00012 %                                                                             %
00013 %                         MagickCore Token Methods                            %
00014 %                                                                             %
00015 %                             Software Design                                 %
00016 %                               John Cristy                                   %
00017 %                              January 1993                                   %
00018 %                                                                             %
00019 %                                                                             %
00020 %  Copyright 1999-2009 ImageMagick Studio LLC, a non-profit organization      %
00021 %  dedicated to making software imaging solutions freely available.           %
00022 %                                                                             %
00023 %  You may not use this file except in compliance with the License.  You may  %
00024 %  obtain a copy of the License at                                            %
00025 %                                                                             %
00026 %    http://www.imagemagick.org/script/license.php                            %
00027 %                                                                             %
00028 %  Unless required by applicable law or agreed to in writing, software        %
00029 %  distributed under the License is distributed on an "AS IS" BASIS,          %
00030 %  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.   %
00031 %  See the License for the specific language governing permissions and        %
00032 %  limitations under the License.                                             %
00033 %                                                                             %
00034 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
00035 %
00036 %
00037 %
00038 */
00039 
00040 /*
00041   Include declarations.
00042 */
00043 #include "magick/studio.h"
00044 #include "magick/exception.h"
00045 #include "magick/exception-private.h"
00046 #include "magick/image.h"
00047 #include "magick/memory_.h"
00048 #include "magick/string_.h"
00049 #include "magick/token.h"
00050 #include "magick/token-private.h"
00051 #include "magick/utility.h"
00052 
00053 /*
00054   Typedef declaractions.
00055 */
00056 struct _TokenInfo
00057 {
00058   int
00059     state;
00060 
00061   MagickStatusType
00062     flag;
00063 
00064   long
00065     offset;
00066 
00067   char
00068     quote;
00069 
00070   unsigned long
00071     signature;
00072 };
00073 
00074 /*
00075 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
00076 %                                                                             %
00077 %                                                                             %
00078 %                                                                             %
00079 %   A c q u i r e T o k e n I n f o                                           %
00080 %                                                                             %
00081 %                                                                             %
00082 %                                                                             %
00083 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
00084 %
00085 %  AcquireTokenInfo() allocates the TokenInfo structure.
00086 %
00087 %  The format of the AcquireTokenInfo method is:
00088 %
00089 %      TokenInfo *AcquireTokenInfo()
00090 %
00091 */
00092 MagickExport TokenInfo *AcquireTokenInfo(void)
00093 {
00094   TokenInfo
00095     *token_info;
00096 
00097   token_info=(TokenInfo *) AcquireMagickMemory(sizeof(*token_info));
00098   if (token_info == (TokenInfo *) NULL)
00099     ThrowFatalException(ResourceLimitFatalError,"MemoryAllocationFailed");
00100   token_info->signature=MagickSignature;
00101   return(token_info);
00102 }
00103 
00104 /*
00105 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
00106 %                                                                             %
00107 %                                                                             %
00108 %                                                                             %
00109 %   D e s t r o y T o k e n I n f o                                           %
00110 %                                                                             %
00111 %                                                                             %
00112 %                                                                             %
00113 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
00114 %
00115 %  DestroyTokenInfo() deallocates memory associated with an TokenInfo
00116 %  structure.
00117 %
00118 %  The format of the DestroyTokenInfo method is:
00119 %
00120 %      TokenInfo *DestroyTokenInfo(TokenInfo *token_info)
00121 %
00122 %  A description of each parameter follows:
00123 %
00124 %    o token_info: Specifies a pointer to an TokenInfo structure.
00125 %
00126 */
00127 MagickExport TokenInfo *DestroyTokenInfo(TokenInfo *token_info)
00128 {
00129   (void) LogMagickEvent(TraceEvent,GetMagickModule(),"...");
00130   assert(token_info != (TokenInfo *) NULL);
00131   assert(token_info->signature == MagickSignature);
00132   token_info->signature=(~MagickSignature);
00133   token_info=(TokenInfo *) RelinquishMagickMemory(token_info);
00134   return(token_info);
00135 }
00136 
00137 /*
00138 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
00139 %                                                                             %
00140 %                                                                             %
00141 %                                                                             %
00142 +   G e t M a g i c k T o k e n                                               %
00143 %                                                                             %
00144 %                                                                             %
00145 %                                                                             %
00146 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
00147 %
00148 %  GetMagickToken() gets a token from the token stream.  A token is defined as a
00149 %  sequence of characters delimited by whitespace (e.g. clip-path), a sequence
00150 %  delimited with quotes (.e.g "Quote me"), or a sequence enclosed in
00151 %  parenthesis (e.g. rgb(0,0,0)).
00152 %
00153 %  The format of the GetMagickToken method is:
00154 %
00155 %      void GetMagickToken(const char *start,const char **end,char *token)
00156 %
00157 %  A description of each parameter follows:
00158 %
00159 %    o start: the start of the token sequence.
00160 %
00161 %    o end: point to the end of the token sequence.
00162 %
00163 %    o token: copy the token to this buffer.
00164 %
00165 */
00166 MagickExport void GetMagickToken(const char *start,const char **end,char *token)
00167 {
00168   double
00169     value;
00170 
00171   register const char
00172     *p;
00173 
00174   register long
00175     i;
00176 
00177   i=0;
00178   for (p=start; *p != '\0'; )
00179   {
00180     while ((isspace((int) ((unsigned char) *p)) != 0) && (*p != '\0'))
00181       p++;
00182     if (*p == '\0')
00183       break;
00184     switch (*p)
00185     {
00186       case '"':
00187       case '\'':
00188       case '`':
00189       case '{':
00190       {
00191         register char
00192           escape;
00193 
00194         switch (*p)
00195         {
00196           case '"': escape='"'; break;
00197           case '\'': escape='\''; break;
00198           case '`': escape='\''; break;
00199           case '{': escape='}'; break;
00200           default: escape=(*p); break;
00201         }
00202         for (p++; *p != '\0'; p++)
00203         {
00204           if ((*p == '\\') && ((*(p+1) == escape) || (*(p+1) == '\\')))
00205             p++;
00206           else
00207             if (*p == escape)
00208               {
00209                 p++;
00210                 break;
00211               }
00212           token[i++]=(*p);
00213         }
00214         break;
00215       }
00216       case '/':
00217       {
00218         token[i++]=(*p++);
00219         if ((*p == '>') || (*p == '/'))
00220           token[i++]=(*p++);
00221         break;
00222       }
00223       default:
00224       {
00225         char
00226           *q;
00227 
00228         value=strtod(p,&q);
00229         if ((p != q) && (*p != ','))
00230           {
00231             for ( ; (p < q) && (*p != ','); p++)
00232               token[i++]=(*p);
00233             if (*p == '%')
00234               token[i++]=(*p++);
00235             break;
00236           }
00237         if ((isalpha((int) ((unsigned char) *p)) == 0) &&
00238             (*p != *DirectorySeparator) && (*p != '#') && (*p != '<'))
00239           {
00240             token[i++]=(*p++);
00241             break;
00242           }
00243         for ( ; *p != '\0'; p++)
00244         {
00245           if (((isspace((int) ((unsigned char) *p)) != 0) || (*p == '=') ||
00246               (*p == ',') || (*p == ':')) && (*(p-1) != '\\'))
00247             break;
00248           if ((i > 0) && (*p == '<'))
00249             break;
00250           token[i++]=(*p);
00251           if (*p == '>')
00252             break;
00253           if (*p == '(')
00254             for (p++; *p != '\0'; p++)
00255             {
00256               token[i++]=(*p);
00257               if ((*p == ')') && (*(p-1) != '\\'))
00258                 break;
00259             }
00260         }
00261         break;
00262       }
00263     }
00264     break;
00265   }
00266   token[i]='\0';
00267   if (LocaleNCompare(token,"url(",4) == 0)
00268     {
00269       ssize_t
00270         offset;
00271 
00272       offset=4;
00273       if (token[offset] == '#')
00274         offset++;
00275       i=(long) strlen(token);
00276       (void) CopyMagickString(token,token+offset,MaxTextExtent);
00277       token[i-offset-1]='\0';
00278     }
00279   while (isspace((int) ((unsigned char) *p)) != 0)
00280     p++;
00281   if (end != (const char **) NULL)
00282     *end=(const char *) p;
00283 }
00284 
00285 /*
00286 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
00287 %                                                                             %
00288 %                                                                             %
00289 %                                                                             %
00290 %   G l o b E x p r e s s i o n                                               %
00291 %                                                                             %
00292 %                                                                             %
00293 %                                                                             %
00294 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
00295 %
00296 %  GlobExpression() returns MagickTrue if the expression matches the pattern.
00297 %
00298 %  The format of the GlobExpression function is:
00299 %
00300 %      MagickBooleanType GlobExpression(const char *expression,
00301 %        const char *pattern,const MagickBooleanType case_insensitive)
00302 %
00303 %  A description of each parameter follows:
00304 %
00305 %    o expression: Specifies a pointer to a text string containing a file name.
00306 %
00307 %    o pattern: Specifies a pointer to a text string containing a pattern.
00308 %
00309 %    o case_insensitive: set to MagickTrue to ignore the case when matching
00310 %      an expression.
00311 %
00312 */
00313 MagickExport MagickBooleanType GlobExpression(const char *expression,
00314   const char *pattern,const MagickBooleanType case_insensitive)
00315 {
00316   MagickBooleanType
00317     done,
00318     match;
00319 
00320   register const char
00321     *p;
00322 
00323   /*
00324     Return on empty pattern or '*'.
00325   */
00326   if (pattern == (char *) NULL)
00327     return(MagickTrue);
00328   if (GetUTFCode(pattern) == 0)
00329     return(MagickTrue);
00330   if (LocaleCompare(pattern,"*") == 0)
00331     return(MagickTrue);
00332   p=pattern+strlen(pattern)-1;
00333   if ((GetUTFCode(p) == ']') && (strchr(pattern,'[') != (char *) NULL))
00334     {
00335       ExceptionInfo
00336         *exception;
00337 
00338       ImageInfo
00339         *image_info;
00340 
00341       /*
00342         Determine if pattern is a scene, i.e. img0001.pcd[2].
00343       */
00344       image_info=AcquireImageInfo();
00345       (void) CopyMagickString(image_info->filename,pattern,MaxTextExtent);
00346       exception=AcquireExceptionInfo();
00347       (void) SetImageInfo(image_info,MagickTrue,exception);
00348       exception=DestroyExceptionInfo(exception);
00349       if (LocaleCompare(image_info->filename,pattern) != 0)
00350         {
00351           image_info=DestroyImageInfo(image_info);
00352           return(MagickFalse);
00353         }
00354       image_info=DestroyImageInfo(image_info);
00355     }
00356   /*
00357     Evaluate glob expression.
00358   */
00359   done=MagickFalse;
00360   while ((GetUTFCode(pattern) != 0) && (done == MagickFalse))
00361   {
00362     if (GetUTFCode(expression) == 0)
00363       if ((GetUTFCode(pattern) != '{') && (GetUTFCode(pattern) != '*'))
00364         break;
00365     switch (GetUTFCode(pattern))
00366     {
00367       case '\\':
00368       {
00369         pattern+=GetUTFOctets(pattern);
00370         if (GetUTFCode(pattern) != 0)
00371           pattern+=GetUTFOctets(pattern);
00372         break;
00373       }
00374       case '*':
00375       {
00376         MagickBooleanType
00377           status;
00378 
00379         status=MagickFalse;
00380         pattern+=GetUTFOctets(pattern);
00381         while ((GetUTFCode(expression) != 0) && (status == MagickFalse))
00382         {
00383           status=GlobExpression(expression,pattern,case_insensitive);
00384           expression+=GetUTFOctets(expression);
00385         }
00386         if (status != MagickFalse)
00387           {
00388             while (GetUTFCode(expression) != 0)
00389               expression+=GetUTFOctets(expression);
00390             while (GetUTFCode(pattern) != 0)
00391               pattern+=GetUTFOctets(pattern);
00392           }
00393         break;
00394       }
00395       case '[':
00396       {
00397         unsigned long
00398           c;
00399 
00400         pattern+=GetUTFOctets(pattern);
00401         for ( ; ; )
00402         {
00403           if ((GetUTFCode(pattern) == 0) || (GetUTFCode(pattern) == ']'))
00404             {
00405               done=MagickTrue;
00406               break;
00407             }
00408           if (GetUTFCode(pattern) == '\\')
00409             {
00410               pattern+=GetUTFOctets(pattern);
00411               if (GetUTFCode(pattern) == 0)
00412                 {
00413                   done=MagickTrue;
00414                   break;
00415                 }
00416              }
00417           if (GetUTFCode(pattern+GetUTFOctets(pattern)) == '-')
00418             {
00419               c=GetUTFCode(pattern);
00420               pattern+=GetUTFOctets(pattern);
00421               pattern+=GetUTFOctets(pattern);
00422               if (GetUTFCode(pattern) == ']')
00423                 {
00424                   done=MagickTrue;
00425                   break;
00426                 }
00427               if (GetUTFCode(pattern) == '\\')
00428                 {
00429                   pattern+=GetUTFOctets(pattern);
00430                   if (GetUTFCode(pattern) == 0)
00431                     {
00432                       done=MagickTrue;
00433                       break;
00434                     }
00435                 }
00436               if ((GetUTFCode(expression) < c) ||
00437                   (GetUTFCode(expression) > GetUTFCode(pattern)))
00438                 {
00439                   pattern+=GetUTFOctets(pattern);
00440                   continue;
00441                 }
00442             }
00443           else
00444             if (GetUTFCode(pattern) != GetUTFCode(expression))
00445               {
00446                 pattern+=GetUTFOctets(pattern);
00447                 continue;
00448               }
00449           pattern+=GetUTFOctets(pattern);
00450           while ((GetUTFCode(pattern) != ']') && (GetUTFCode(pattern) != 0))
00451           {
00452             if ((GetUTFCode(pattern) == '\\') &&
00453                 (GetUTFCode(pattern+GetUTFOctets(pattern)) > 0))
00454               pattern+=GetUTFOctets(pattern);
00455             pattern+=GetUTFOctets(pattern);
00456           }
00457           if (GetUTFCode(pattern) != 0)
00458             {
00459               pattern+=GetUTFOctets(pattern);
00460               expression+=GetUTFOctets(expression);
00461             }
00462           break;
00463         }
00464         break;
00465       }
00466       case '?':
00467       {
00468         pattern+=GetUTFOctets(pattern);
00469         expression+=GetUTFOctets(expression);
00470         break;
00471       }
00472       case '{':
00473       {
00474         register const char
00475           *p;
00476 
00477         pattern+=GetUTFOctets(pattern);
00478         while ((GetUTFCode(pattern) != '}') && (GetUTFCode(pattern) != 0))
00479         {
00480           p=expression;
00481           match=MagickTrue;
00482           while ((GetUTFCode(p) != 0) && (GetUTFCode(pattern) != 0) &&
00483                  (GetUTFCode(pattern) != ',') && (GetUTFCode(pattern) != '}') &&
00484                  (match != MagickFalse))
00485           {
00486             if (GetUTFCode(pattern) == '\\')
00487               pattern+=GetUTFOctets(pattern);
00488             match=(GetUTFCode(pattern) == GetUTFCode(p)) ? MagickTrue :
00489               MagickFalse;
00490             p+=GetUTFOctets(p);
00491             pattern+=GetUTFOctets(pattern);
00492           }
00493           if (GetUTFCode(pattern) == 0)
00494             {
00495               match=MagickFalse;
00496               done=MagickTrue;
00497               break;
00498             }
00499           else
00500             if (match != MagickFalse)
00501               {
00502                 expression=p;
00503                 while ((GetUTFCode(pattern) != '}') &&
00504                        (GetUTFCode(pattern) != 0))
00505                 {
00506                   pattern+=GetUTFOctets(pattern);
00507                   if (GetUTFCode(pattern) == '\\')
00508                     {
00509                       pattern+=GetUTFOctets(pattern);
00510                       if (GetUTFCode(pattern) == '}')
00511                         pattern+=GetUTFOctets(pattern);
00512                     }
00513                 }
00514               }
00515             else
00516               {
00517                 while ((GetUTFCode(pattern) != '}') &&
00518                        (GetUTFCode(pattern) != ',') &&
00519                        (GetUTFCode(pattern) != 0))
00520                 {
00521                   pattern+=GetUTFOctets(pattern);
00522                   if (GetUTFCode(pattern) == '\\')
00523                     {
00524                       pattern+=GetUTFOctets(pattern);
00525                       if ((GetUTFCode(pattern) == '}') ||
00526                           (GetUTFCode(pattern) == ','))
00527                         pattern+=GetUTFOctets(pattern);
00528                     }
00529                 }
00530               }
00531             if (GetUTFCode(pattern) != 0)
00532               pattern+=GetUTFOctets(pattern);
00533           }
00534         break;
00535       }
00536       default:
00537       {
00538         if (case_insensitive != MagickFalse)
00539           {
00540             if (tolower((int) GetUTFCode(expression)) !=
00541                 tolower((int) GetUTFCode(pattern)))
00542               {
00543                 done=MagickTrue;
00544                 break;
00545               }
00546           }
00547         else
00548           if (GetUTFCode(expression) != GetUTFCode(pattern))
00549             {
00550               done=MagickTrue;
00551               break;
00552             }
00553         expression+=GetUTFOctets(expression);
00554         pattern+=GetUTFOctets(pattern);
00555       }
00556     }
00557   }
00558   while (GetUTFCode(pattern) == '*')
00559     pattern+=GetUTFOctets(pattern);
00560   match=(GetUTFCode(expression) == 0) && (GetUTFCode(pattern) == 0) ?
00561     MagickTrue : MagickFalse;
00562   return(match);
00563 }
00564 
00565 /*
00566 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
00567 %                                                                             %
00568 %                                                                             %
00569 %                                                                             %
00570 +     I s G l o b                                                             %
00571 %                                                                             %
00572 %                                                                             %
00573 %                                                                             %
00574 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
00575 %
00576 %  IsGlob() returns MagickTrue if the path specification contains a globbing
00577 %  pattern.
00578 %
00579 %  The format of the IsGlob method is:
00580 %
00581 %      MagickBooleanType IsGlob(const char *geometry)
00582 %
00583 %  A description of each parameter follows:
00584 %
00585 %    o path: the path.
00586 %
00587 */
00588 MagickExport MagickBooleanType IsGlob(const char *path)
00589 {
00590   MagickBooleanType
00591     status;
00592 
00593   if (IsPathAccessible(path) != MagickFalse)
00594     return(MagickFalse);
00595   status=(strchr(path,'*') != (char *) NULL) ||
00596     (strchr(path,'?') != (char *) NULL) ||
00597     (strchr(path,'{') != (char *) NULL) ||
00598     (strchr(path,'}') != (char *) NULL) ||
00599     (strchr(path,'[') != (char *) NULL) ||
00600     (strchr(path,']') != (char *) NULL) ? MagickTrue : MagickFalse;
00601   return(status);
00602 }
00603 
00604 /*
00605 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
00606 %                                                                             %
00607 %                                                                             %
00608 %                                                                             %
00609 %   T o k e n i z e r                                                         %
00610 %                                                                             %
00611 %                                                                             %
00612 %                                                                             %
00613 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
00614 %
00615 %  Tokenizer() is a generalized, finite state token parser.  It extracts tokens
00616 %  one at a time from a string of characters.  The characters used for white
00617 %  space, for break characters, and for quotes can be specified.  Also,
00618 %  characters in the string can be preceded by a specifiable escape character
00619 %  which removes any special meaning the character may have.
00620 %
00621 %  Here is some terminology:
00622 %
00623 %    o token: A single unit of information in the form of a group of
00624 %      characters.
00625 %
00626 %    o white space: Apace that gets ignored (except within quotes or when
00627 %      escaped), like blanks and tabs. in addition, white space terminates a
00628 %      non-quoted token.
00629 %
00630 %    o break set: One or more characters that separates non-quoted tokens.
00631 %      Commas are a common break character. The usage of break characters to
00632 %      signal the end of a token is the same as that of white space, except
00633 %      multiple break characters with nothing or only white space between
00634 %      generate a null token for each two break characters together.
00635 %
00636 %      For example, if blank is set to be the white space and comma is set to
00637 %      be the break character, the line
00638 %
00639 %        A, B, C ,  , DEF
00640 %
00641 %        ... consists of 5 tokens:
00642 %
00643 %        1)  "A"
00644 %        2)  "B"
00645 %        3)  "C"
00646 %        4)  "" (the null string)
00647 %        5)  "DEF"
00648 %
00649 %    o Quote character: A character that, when surrounding a group of other
00650 %      characters, causes the group of characters to be treated as a single
00651 %      token, no matter how many white spaces or break characters exist in
00652 %      the group. Also, a token always terminates after the closing quote.
00653 %      For example, if ' is the quote character, blank is white space, and
00654 %      comma is the break character, the following string
00655 %
00656 %        A, ' B, CD'EF GHI
00657 %
00658 %        ... consists of 4 tokens:
00659 %
00660 %        1)  "A"
00661 %        2)  " B, CD" (note the blanks & comma)
00662 %        3)  "EF"
00663 %        4)  "GHI"
00664 %
00665 %      The quote characters themselves do not appear in the resultant
00666 %      tokens.  The double quotes are delimiters i use here for
00667 %      documentation purposes only.
00668 %
00669 %    o Escape character: A character which itself is ignored but which
00670 %      causes the next character to be used as is.  ^ and \ are often used
00671 %      as escape characters. An escape in the last position of the string
00672 %      gets treated as a "normal" (i.e., non-quote, non-white, non-break,
00673 %      and non-escape) character. For example, assume white space, break
00674 %      character, and quote are the same as in the above examples, and
00675 %      further, assume that ^ is the escape character. Then, in the string
00676 %
00677 %        ABC, ' DEF ^' GH' I ^ J K^ L ^
00678 %
00679 %        ... there are 7 tokens:
00680 %
00681 %        1)  "ABC"
00682 %        2)  " DEF ' GH"
00683 %        3)  "I"
00684 %        4)  " "     (a lone blank)
00685 %        5)  "J"
00686 %        6)  "K L"
00687 %        7)  "^"     (passed as is at end of line)
00688 %
00689 %  The format of the Tokenizer method is:
00690 %
00691 %      int Tokenizer(TokenInfo *token_info,const unsigned flag,char *token,
00692 %        const size_t max_token_length,const char *line,const char *white,
00693 %        const char *break_set,const char *quote,const char escape,
00694 %        char *breaker,int *next,char *quoted)
00695 %
00696 %  A description of each parameter follows:
00697 %
00698 %    o flag: right now, only the low order 3 bits are used.
00699 %
00700 %        1 => convert non-quoted tokens to upper case
00701 %        2 => convert non-quoted tokens to lower case
00702 %        0 => do not convert non-quoted tokens
00703 %
00704 %    o token: a character string containing the returned next token
00705 %
00706 %    o max_token_length: the maximum size of "token".  Characters beyond
00707 %      "max_token_length" are truncated.
00708 %
00709 %    o string: the string to be parsed.
00710 %
00711 %    o white: a string of the valid white spaces.  example:
00712 %
00713 %        char whitesp[]={" \t"};
00714 %
00715 %      blank and tab will be valid white space.
00716 %
00717 %    o break: a string of the valid break characters. example:
00718 %
00719 %        char breakch[]={";,"};
00720 %
00721 %      semicolon and comma will be valid break characters.
00722 %
00723 %    o quote: a string of the valid quote characters. An example would be
00724 %
00725 %        char whitesp[]={"'\"");
00726 %
00727 %      (this causes single and double quotes to be valid) Note that a
00728 %      token starting with one of these characters needs the same quote
00729 %      character to terminate it.
00730 %
00731 %      for example:
00732 %
00733 %        "ABC '
00734 %
00735 %      is unterminated, but
00736 %
00737 %        "DEF" and 'GHI'
00738 %
00739 %      are properly terminated.  Note that different quote characters
00740 %      can appear on the same line; only for a given token do the quote
00741 %      characters have to be the same.
00742 %
00743 %    o escape: the escape character (NOT a string ... only one
00744 %      allowed). Use zero if none is desired.
00745 %
00746 %    o breaker: the break character used to terminate the current
00747 %      token.  If the token was quoted, this will be the quote used.  If
00748 %      the token is the last one on the line, this will be zero.
00749 %
00750 %    o next: this variable points to the first character of the
00751 %      next token.  it gets reset by "tokenizer" as it steps through the
00752 %      string.  Set it to 0 upon initialization, and leave it alone
00753 %      after that.  You can change it if you want to jump around in the
00754 %      string or re-parse from the beginning, but be careful.
00755 %
00756 %    o quoted: set to True if the token was quoted and MagickFalse
00757 %      if not.  You may need this information (for example:  in C, a
00758 %      string with quotes around it is a character string, while one
00759 %      without is an identifier).
00760 %
00761 %    o result: 0 if we haven't reached EOS (end of string), and 1
00762 %      if we have.
00763 %
00764 */
00765 
00766 #define IN_WHITE 0
00767 #define IN_TOKEN 1
00768 #define IN_QUOTE 2
00769 #define IN_OZONE 3
00770 
00771 static long sindex(int c,const char *string)
00772 {
00773   register const char
00774     *p;
00775 
00776   for (p=string; *p != '\0'; p++)
00777     if (c == (int) (*p))
00778       return(p-string);
00779   return(-1);
00780 }
00781 
00782 static void StoreToken(TokenInfo *token_info,char *string,
00783   size_t max_token_length,int c)
00784 {
00785   register long
00786     i;
00787 
00788   if ((token_info->offset < 0) ||
00789       ((size_t) token_info->offset >= (max_token_length-1)))
00790     return;
00791   i=token_info->offset++;
00792   string[i]=(char) c;
00793   if (token_info->state == IN_QUOTE)
00794     return;
00795   switch (token_info->flag & 0x03)
00796   {
00797     case 1:
00798     {
00799       string[i]=(char) toupper(c);
00800       break;
00801     }
00802     case 2:
00803     {
00804       string[i]=(char) tolower(c);
00805       break;
00806     }
00807     default:
00808       break;
00809   }
00810 }
00811 
00812 MagickExport int Tokenizer(TokenInfo *token_info,const unsigned flag,
00813   char *token,const size_t max_token_length,const char *line,const char *white,
00814   const char *break_set,const char *quote,const char escape,char *breaker,
00815   int *next,char *quoted)
00816 {
00817   int
00818     c;
00819 
00820   register long
00821     i;
00822 
00823   *breaker='\0';
00824   *quoted='\0';
00825   if (line[*next] == '\0')
00826     return(1);
00827   token_info->state=IN_WHITE;
00828   token_info->quote=(char) MagickFalse;
00829   token_info->flag=flag;
00830   for (token_info->offset=0; (int) line[*next] != 0; (*next)++)
00831   {
00832     c=(int) line[*next];
00833     i=sindex(c,break_set);
00834     if (i >= 0)
00835       {
00836         switch (token_info->state)
00837         {
00838           case IN_WHITE:
00839           case IN_TOKEN:
00840           case IN_OZONE:
00841           {
00842             (*next)++;
00843             *breaker=break_set[i];
00844             token[token_info->offset]='\0';
00845             return(0);
00846           }
00847           case IN_QUOTE:
00848           {
00849             StoreToken(token_info,token,max_token_length,c);
00850             break;
00851           }
00852         }
00853         continue;
00854       }
00855     i=sindex(c,quote);
00856     if (i >= 0)
00857       {
00858         switch (token_info->state)
00859         {
00860           case IN_WHITE:
00861           {
00862             token_info->state=IN_QUOTE;
00863             token_info->quote=quote[i];
00864             *quoted=(char) MagickTrue;
00865             break;
00866           }
00867           case IN_QUOTE:
00868           {
00869             if (quote[i] != token_info->quote)
00870               StoreToken(token_info,token,max_token_length,c);
00871             else
00872               {
00873                 token_info->state=IN_OZONE;
00874                 token_info->quote='\0';
00875               }
00876             break;
00877           }
00878           case IN_TOKEN:
00879           case IN_OZONE:
00880           {
00881             *breaker=(char) c;
00882             token[token_info->offset]='\0';
00883             return(0);
00884           }
00885         }
00886         continue;
00887       }
00888     i=sindex(c,white);
00889     if (i >= 0)
00890       {
00891         switch (token_info->state)
00892         {
00893           case IN_WHITE:
00894           case IN_OZONE:
00895             break;
00896           case IN_TOKEN:
00897           {
00898             token_info->state=IN_OZONE;
00899             break;
00900           }
00901           case IN_QUOTE:
00902           {
00903             StoreToken(token_info,token,max_token_length,c);
00904             break;
00905           }
00906         }
00907         continue;
00908       }
00909     if (c == (int) escape)
00910       {
00911         if (line[(*next)+1] == '\0')
00912           {
00913             *breaker='\0';
00914             StoreToken(token_info,token,max_token_length,c);
00915             (*next)++;
00916             token[token_info->offset]='\0';
00917             return(0);
00918           }
00919         switch (token_info->state)
00920         {
00921           case IN_WHITE:
00922           {
00923             (*next)--;
00924             token_info->state=IN_TOKEN;
00925             break;
00926           }
00927           case IN_TOKEN:
00928           case IN_QUOTE:
00929           {
00930             (*next)++;
00931             c=(int) line[*next];
00932             StoreToken(token_info,token,max_token_length,c);
00933             break;
00934           }
00935           case IN_OZONE:
00936           {
00937             token[token_info->offset]='\0';
00938             return(0);
00939           }
00940         }
00941         continue;
00942       }
00943     switch (token_info->state)
00944     {
00945       case IN_WHITE:
00946         token_info->state=IN_TOKEN;
00947       case IN_TOKEN:
00948       case IN_QUOTE:
00949       {
00950         StoreToken(token_info,token,max_token_length,c);
00951         break;
00952       }
00953       case IN_OZONE:
00954       {
00955         token[token_info->offset]='\0';
00956         return(0);
00957       }
00958     }
00959   }
00960   token[token_info->offset]='\0';
00961   return(0);
00962 }

Generated on 19 Nov 2009 for MagickCore by  doxygen 1.6.1