#include #include #include #include #include "utils.h" /* * Local function protypes: */ static int match_single(char *start_ptr, char *end_ptr, char ch, int *was_error); static char get_next_char(char *string, char **tail, int *was_escaped); /*....................................................................... * This is the top level matching routine. It returns 1 if the string * regexp matches the string, string - otherwise it returns 0. * The final argument is an error flag. It is normally 0 but when there * has been a syntax error it becomes 1. */ int match(char *regexp, char *string, int *was_error) { int was_escaped; /* Returned by get_next_char() to report that the */ /* character that it returned was escaped */ char *start_ptr; /* Start pointer of regexp for match_single() */ char *end_ptr; /* End pointer of regexp for match_single() */ char *tail; /* Pointer to tail part of some string not yet processed */ char new_char; /* Current char returned by get_next_char() */ int match_test; /* 1 or 0 depending on whether success is measured */ /* by a match or by no match */ /* * No error yet. */ *was_error=0; /* * Parse regexp. */ for(;;) { new_char=get_next_char(regexp, ®exp, &was_escaped); /* * Interpret new regexp character. */ if(!was_escaped) { switch (new_char) { /* * Stop at end of regexp string. If the string is also exhausted then * the match has succeded. */ case '\0': return (*string == '\0') ? 1:0; break; /* * Start of a single character regexp. */ case '[': /* * Keep a record of where the inside of the [] regexp started. */ start_ptr = regexp; /* * Locate the matching (un-escaped) ']'. */ for(;;) { new_char=get_next_char(regexp, ®exp, &was_escaped); if(new_char== '\0' || (new_char==']' && !was_escaped) ) break; }; /* * If not found, report error and return. */ if(new_char == '\0') { lprintf(stderr, "Syntax error: \'[\' not matched in regexp\n"); *was_error=1; return 0; }; /* * Record the pointer to the last char in the [] regexp. */ end_ptr = regexp-2; /* * If the first character of the regexp is an un-escaped '^' then * the result of the one-char regexp match is to be complemented. */ new_char=get_next_char(start_ptr, &tail, &was_escaped); if(new_char == '^' && !was_escaped) { start_ptr=tail; match_test=1; } else { match_test=0; }; /* * Make sure that the [] expression contains something. */ if(end_ptr < start_ptr) { lprintf(stderr, "Syntax error: Empty [] regexp encounterred\n"); *was_error=1; return 0; }; /* * If the character following the closing ']' is an unescaped '*', then * the single-character regexp must match 0 or more characters. Otherwise * it should match exactly one. */ new_char=get_next_char(regexp, &tail, &was_escaped); if(new_char=='*' && !was_escaped) { regexp=tail; for(;;) { if(match(regexp, string, was_error) == 1) return 1; if(*was_error) return 0; if(*string == '\0') return 0; if(match_single(start_ptr, end_ptr, *(string++), was_error) == match_test) return 0; }; } else { /* * A '+' following a [] expression says that it should match exactly * one character. This is only really required when one wants to * a following '*' not to be associated with the []. */ if(new_char=='+' && !was_escaped) regexp=tail; if(*string == '\0') return 0; if(match_single(start_ptr, end_ptr, *(string++), was_error) == match_test) return 0; }; continue; /* * '.' matches any character in string. */ case '.': if(*string == '\0') return 0; string++; continue; /* * Zero or more characters in string. */ case '*': /* * Further '*'s are redundant. */ while(get_next_char(regexp, &tail, &was_escaped)=='*' && !was_escaped) regexp=tail; /* * End of regexp string? If so the rest of string definitely matches. */ if(*regexp == '\0') return 1; /* * Now we don't know where the next part of the regexp will continue * in string, so keep recursively calling match() for each subsequent * character in string until we hit the end of string (in which case * the match has failed) or match() returns success. */ while(*string != '\0') { if(match(regexp, string++, was_error)==1) return 1; }; return 0; /* * The character is not a special one - nor are the escaped characters * that didn't get into this switch, they will all be handled together * off the bottom of the switch expression. */ default: break; }; }; /* * Simple character in regular expression must match that in string. * If the character is a \ however it is assumed to escape the following * character - which in turn must match the character in the string. */ if(*string == new_char) { string++; } else { return 0; }; }; } /*....................................................................... * Given an input string, return either the first character or if that * first character is a \ return the corresponding escaped character. * If there is an error return '\0'. The pointer into the input string * will be returned via the second argument, incremented to the point * just after the last character used. The flag, 'was_escaped' is returned * as 1 if the returned character was an escape sequence, and 0 otherwise. */ static char get_next_char(char *string, char **tail, int *was_escaped) { char ch; /* Will hold the character that is to be returned */ /* * Check for escape sequences. */ switch (*string) { case '\\': string++; *was_escaped=1; switch (*string) { /* * Standard escape equivalents for control characters. */ case 'n': ch = '\n'; break; case 'r': ch = '\r'; break; case 't': ch = '\t'; break; case 'f': ch = '\f'; break; /* * Octal escape sequence. */ case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': ch = (char) strtol(string, &string, 8); break; /* * Hex escape sequence. */ case 'x': case 'X': if(!isxdigit((int) string[1])) { lprintf(stderr, "Incomplete \\x.. hexadecimal escape sequence\n"); *tail=string; return '\0'; }; ch = (char) strtol(string, &string, 16); break; /* * No char to escape - whoops - error. */ case '\0': lprintf(stderr, "Incomplete escape sequence at end of string\n"); *tail = string; return '\0'; break; /* * Any other character gets passed unscathed. */ default: ch = *string; }; break; default: ch = *string; *was_escaped=0; break; }; /* * Return the result. */ *tail = ++string; return ch; } /*....................................................................... * Try to match a single character ch with a [] regexp character list. * Return 1 on success, 0 on failure. Normally *was_error is returned * as 0, but if there is a regexp syntax error it is returned as 1. * The regexp can be formed of any arrangement of the following: * * A-Z : Any capital letter between the two characters provided. * a-z : Any lower case letter between the two characters provided. * 1-9 : Any digit between the numbers specified. * adbc : Any charcter from the specified list. * ^ : When placed before any of the above, the match will succede * : if the character is not one of those specified. Characters * : may be escaped to remove any special meanings or to include * : control characters etc.. */ static int match_single(char *start_ptr, char *end_ptr, char ch, int *was_error) { int in_range; /* Flags when half way through processing a regexp range */ int was_escaped;/* Flags characters that were escaped when read */ char last_char;/* Keeps a record of first char of a potential regexp range */ char new_char; /* Latest char read from regexp */ char *regexp; /* Used to step through regexp */ char *ptr; /* Pointer for general usage */ /* * Strings holding sensible collating sequence for ranges. * It would be much faster to assume ASCII but not very portable. */ static char upper[]="ABCDEFGHIJKLMNOPQRSTUVWXYZ"; static char lower[]="abcdefghijklmnopqrstuvwxyz"; static char digit[]="0123456789"; /* * No errors yet. */ *was_error=0; /* * Copy start pointer to regexp-stepping pointer. */ regexp=start_ptr; /* * Process each char list in regexp in turn until a match is found * or the ending ']' is reached. */ in_range=0; last_char='\0'; while(regexp <= end_ptr) { /* * Get the next character of the regexp, with due regard for * escape sequences. */ new_char=get_next_char(regexp, ®exp, &was_escaped); /* * Check for the range-symbol. */ if(!was_escaped && new_char == '-') { if(in_range || last_char == '\0') { lprintf(stderr, "Incomplete regexp range\n"); *was_error=1; return 0; }; in_range=1; continue; }; /* * Try to match the latest character with ch. */ if(ch==new_char) return 1; /* * Is this the final char of a range designation? */ if(!in_range) { last_char=new_char; } /* * Complete range designation recieved - process it. */ else { /* * Check types of the two characters and see if ch is between them. */ if(isdigit((int) last_char) && isdigit((int) new_char)) { ptr = strchr(digit, ch); if(strchr(digit,last_char) <= ptr && strchr(digit, new_char) >= ptr) return 1; } else if(isupper((int) last_char) && isupper((int) new_char)) { ptr = strchr(upper, ch); if(strchr(upper,last_char) <= ptr && strchr(upper, new_char) >= ptr) return 1; } else if(islower((int) last_char) && islower((int) new_char)) { ptr = strchr(lower, ch); if(strchr(lower,last_char) <= ptr && strchr(lower, new_char) >= ptr) return 1; } else { lprintf(stderr, "Syntax error in regexp character range\n"); *was_error=1; return 0; }; /* * Range parsed, but with no match - prepare for next part of * regexp. */ in_range = 0; last_char = '\0'; }; }; /* * No match found. */ return 0; }