gecko/xpcom/io/nsWildCard.cpp

471 lines
14 KiB
C++

/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
/* vim:set ts=4 sts=4 sw=4 cin et: */
/* ***** BEGIN LICENSE BLOCK *****
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
*
* The contents of this file are subject to the Mozilla Public License Version
* 1.1 (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
* http://www.mozilla.org/MPL/
*
* Software distributed under the License is distributed on an "AS IS" basis,
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
* for the specific language governing rights and limitations under the
* License.
*
* The Original Code is mozilla.org code.
*
* The Initial Developer of the Original Code is
* Netscape Communications Corporation.
* Portions created by the Initial Developer are Copyright (C) 1998
* the Initial Developer. All Rights Reserved.
*
* Contributor(s):
*
* Alternatively, the contents of this file may be used under the terms of
* either the GNU General Public License Version 2 or later (the "GPL"), or
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
* in which case the provisions of the GPL or the LGPL are applicable instead
* of those above. If you wish to allow use of your version of this file only
* under the terms of either the GPL or the LGPL, and not to allow others to
* use your version of this file under the terms of the MPL, indicate your
* decision by deleting the provisions above and replace them with the notice
* and other provisions required by the GPL or the LGPL. If you do not delete
* the provisions above, a recipient may use your version of this file under
* the terms of any one of the MPL, the GPL or the LGPL.
*
* ***** END LICENSE BLOCK ***** */
/* *
*
*
* nsWildCard.cpp: shell-like wildcard match routines
*
* See nsIZipReader.findEntries documentation in nsIZipReader.idl for
* a description of the syntax supported by the routines in this file.
*
* Rob McCool
*
*/
#include "nsWildCard.h"
#include "nsXPCOM.h"
#include "nsCRTGlue.h"
#include "nsCharTraits.h"
/* -------------------- ASCII-specific character methods ------------------- */
typedef int static_assert_character_code_arrangement['a' > 'A' ? 1 : -1];
template<class T>
static int
alpha(T c)
{
return ('a' <= c && c <= 'z') ||
('A' <= c && c <= 'Z');
}
template<class T>
static int
alphanumeric(T c)
{
return ('0' <= c && c <= '9') || alpha(c);
}
template<class T>
static int
lower(T c)
{
return ('A' <= c && c <= 'Z') ? c + ('a' - 'A') : c;
}
template<class T>
static int
upper(T c)
{
return ('a' <= c && c <= 'z') ? c - ('a' - 'A') : c;
}
/* ----------------------------- _valid_subexp ---------------------------- */
template<class T>
static int
_valid_subexp(const T *expr, T stop1, T stop2)
{
register int x;
int nsc = 0; /* Number of special characters */
int np; /* Number of pipe characters in union */
int tld = 0; /* Number of tilde characters */
for (x = 0; expr[x] && (expr[x] != stop1) && (expr[x] != stop2); ++x) {
switch(expr[x]) {
case '~':
if(tld) /* at most one exclusion */
return INVALID_SXP;
if (stop1) /* no exclusions within unions */
return INVALID_SXP;
if (!expr[x+1]) /* exclusion cannot be last character */
return INVALID_SXP;
if (!x) /* exclusion cannot be first character */
return INVALID_SXP;
++tld;
/* fall through */
case '*':
case '?':
case '$':
++nsc;
break;
case '[':
++nsc;
if((!expr[++x]) || (expr[x] == ']'))
return INVALID_SXP;
for(; expr[x] && (expr[x] != ']'); ++x) {
if(expr[x] == '\\' && !expr[++x])
return INVALID_SXP;
}
if(!expr[x])
return INVALID_SXP;
break;
case '(':
++nsc;
if (stop1) /* no nested unions */
return INVALID_SXP;
np = -1;
do {
int t = ::_valid_subexp(&expr[++x], T(')'), T('|'));
if(t == 0 || t == INVALID_SXP)
return INVALID_SXP;
x+=t;
if(!expr[x])
return INVALID_SXP;
++np;
} while (expr[x] == '|' );
if(np < 1) /* must be at least one pipe */
return INVALID_SXP;
break;
case ')':
case ']':
case '|':
return INVALID_SXP;
case '\\':
++nsc;
if(!expr[++x])
return INVALID_SXP;
break;
default:
break;
}
}
if((!stop1) && (!nsc)) /* must be at least one special character */
return NON_SXP;
return ((expr[x] == stop1 || expr[x] == stop2) ? x : INVALID_SXP);
}
template<class T>
int
NS_WildCardValid_(const T *expr)
{
int x = ::_valid_subexp(expr, T('\0'), T('\0'));
return (x < 0 ? x : VALID_SXP);
}
NS_COM int
NS_WildCardValid(const char *expr)
{
return NS_WildCardValid_(expr);
}
NS_COM int
NS_WildCardValid(const PRUnichar *expr)
{
return NS_WildCardValid_(expr);
}
/* ----------------------------- _shexp_match ----------------------------- */
#define MATCH 0
#define NOMATCH 1
#define ABORTED -1
template<class T>
static int
_shexp_match(const T *str, const T *expr, PRBool case_insensitive, unsigned int level);
/**
* Count characters until we reach a NUL character or either of the
* two delimiter characters, stop1 or stop2. If we encounter a bracketed
* expression, look only for NUL or ']' inside it. Do not look for stop1
* or stop2 inside it. Return ABORTED if bracketed expression is unterminated.
* Handle all escaping.
* Return index in input string of first stop found, or ABORTED if not found.
* If "dest" is non-NULL, copy counted characters to it and NUL terminate.
*/
template<class T>
static int
_scan_and_copy(const T *expr, T stop1, T stop2, T *dest)
{
register int sx; /* source index */
register char cc;
for (sx = 0; (cc = expr[sx]) && cc != stop1 && cc != stop2; sx++) {
if (cc == '\\') {
if (!expr[++sx])
return ABORTED; /* should be impossible */
}
else if (cc == '[') {
while ((cc = expr[++sx]) && cc != ']') {
if(cc == '\\' && !expr[++sx])
return ABORTED;
}
if (!cc)
return ABORTED; /* should be impossible */
}
}
if (dest && sx) {
/* Copy all but the closing delimiter. */
memcpy(dest, expr, sx * sizeof(T));
dest[sx] = 0;
}
return cc ? sx : ABORTED; /* index of closing delimiter */
}
/* On input, expr[0] is the opening parenthesis of a union.
* See if any of the alternatives in the union matches as a pattern.
* The strategy is to take each of the alternatives, in turn, and append
* the rest of the expression (after the closing ')' that marks the end of
* this union) to that alternative, and then see if the resultant expression
* matches the input string. Repeat this until some alternative matches,
* or we have an abort.
*/
template<class T>
static int
_handle_union(const T *str, const T *expr, PRBool case_insensitive,
unsigned int level)
{
register int sx; /* source index */
int cp; /* source index of closing parenthesis */
int count;
int ret = NOMATCH;
T *e2;
/* Find the closing parenthesis that ends this union in the expression */
cp = ::_scan_and_copy(expr, T(')'), T('\0'), static_cast<T*>(NULL));
if (cp == ABORTED || cp < 4) /* must be at least "(a|b" before ')' */
return ABORTED;
++cp; /* now index of char after closing parenthesis */
e2 = (T *) NS_Alloc((1 + nsCharTraits<T>::length(expr)) * sizeof(T));
if (!e2)
return ABORTED;
for (sx = 1; ; ++sx) {
/* Here, expr[sx] is one character past the preceeding '(' or '|'. */
/* Copy everything up to the next delimiter to e2 */
count = ::_scan_and_copy(expr + sx, T(')'), T('|'), e2);
if (count == ABORTED || !count) {
ret = ABORTED;
break;
}
sx += count;
/* Append everything after closing parenthesis to e2. This is safe. */
nsCharTraits<T>::copy(e2 + count, expr + cp, nsCharTraits<T>::length(expr + cp) + 1);
ret = ::_shexp_match(str, e2, case_insensitive, level + 1);
if (ret != NOMATCH || !expr[sx] || expr[sx] == ')')
break;
}
NS_Free(e2);
if (sx < 2)
ret = ABORTED;
return ret;
}
/* returns 1 if val is in range from start..end, case insensitive. */
static int
_is_char_in_range(unsigned char start, unsigned char end, unsigned char val)
{
char map[256];
memset(map, 0, sizeof map);
while (start <= end)
map[lower(start++)] = 1;
return map[lower(val)];
}
template<class T>
static int
_shexp_match(const T *str, const T *expr, PRBool case_insensitive,
unsigned int level)
{
register int x; /* input string index */
register int y; /* expression index */
int ret,neg;
if (level > 20) /* Don't let the stack get too deep. */
return ABORTED;
for(x = 0, y = 0; expr[y]; ++y, ++x) {
if((!str[x]) && (expr[y] != '$') && (expr[y] != '*')) {
return NOMATCH;
}
switch(expr[y]) {
case '$':
if(str[x])
return NOMATCH;
--x; /* we don't want loop to increment x */
break;
case '*':
while(expr[++y] == '*'){}
if(!expr[y])
return MATCH;
while(str[x]) {
ret = ::_shexp_match(&str[x++], &expr[y], case_insensitive,
level + 1);
switch(ret) {
case NOMATCH:
continue;
case ABORTED:
return ABORTED;
default:
return MATCH;
}
}
if((expr[y] == '$') && (expr[y+1] == '\0') && (!str[x]))
return MATCH;
else
return NOMATCH;
case '[': {
T start, end = 0;
int i;
neg = ((expr[++y] == '^') && (expr[y+1] != ']'));
if (neg)
++y;
i = y;
start = expr[i++];
if (start == '\\')
start = expr[i++];
if (alphanumeric(start) && expr[i++] == '-') {
end = expr[i++];
if (end == '\\')
end = expr[i++];
}
if (alphanumeric(end) && expr[i] == ']') {
/* This is a range form: a-b */
T val = str[x];
if (end < start) { /* swap them */
T tmp = end;
end = start;
start = tmp;
}
if (case_insensitive && alpha(val)) {
val = ::_is_char_in_range((unsigned char) start,
(unsigned char) end,
(unsigned char) val);
if (neg == val)
return NOMATCH;
}
else if (neg != ((val < start) || (val > end))) {
return NOMATCH;
}
y = i;
}
else {
/* Not range form */
int matched = 0;
for (; expr[y] != ']'; y++) {
if (expr[y] == '\\')
++y;
if(case_insensitive)
matched |= (upper(str[x]) == upper(expr[y]));
else
matched |= (str[x] == expr[y]);
}
if (neg == matched)
return NOMATCH;
}
}
break;
case '(':
if (!expr[y+1])
return ABORTED;
return ::_handle_union(&str[x], &expr[y], case_insensitive, level + 1);
case '?':
break;
case ')':
case ']':
case '|':
return ABORTED;
case '\\':
++y;
/* fall through */
default:
if(case_insensitive) {
if(upper(str[x]) != upper(expr[y]))
return NOMATCH;
}
else {
if(str[x] != expr[y])
return NOMATCH;
}
break;
}
}
return (str[x] ? NOMATCH : MATCH);
}
template<class T>
static int
ns_WildCardMatch(const T *str, const T *xp, PRBool case_insensitive)
{
T *expr = NULL;
int x, ret = MATCH;
if (!nsCharTraits<T>::find(xp, nsCharTraits<T>::length(xp), T('~')))
return _shexp_match(str, xp, case_insensitive, 0);
expr = (T *) NS_Alloc((nsCharTraits<T>::length(xp) + 1) * sizeof(T));
if(!expr)
return NOMATCH;
x = ::_scan_and_copy(expr, T('~'), T('\0'), static_cast<T*>(NULL));
if (x != ABORTED && expr[x] == '~') {
expr[x++] = '\0';
ret = ::_shexp_match(str, &expr[x], case_insensitive, 0);
switch (ret) {
case NOMATCH: ret = MATCH; break;
case MATCH: ret = NOMATCH; break;
default: break;
}
}
if (ret == MATCH)
ret = ::_shexp_match(str, expr, case_insensitive, 0);
NS_Free(expr);
return ret;
}
template<class T>
NS_COM int
NS_WildCardMatch_(const T *str, const T *expr, PRBool case_insensitive)
{
int is_valid = NS_WildCardValid(expr);
switch(is_valid) {
case INVALID_SXP:
return -1;
default:
return ns_WildCardMatch(str, expr, case_insensitive);
}
}
NS_COM int
NS_WildCardMatch(const char *str, const char *xp,
PRBool case_insensitive)
{
return NS_WildCardMatch_(str, xp, case_insensitive);
}
NS_COM int
NS_WildCardMatch(const PRUnichar *str, const PRUnichar *xp,
PRBool case_insensitive)
{
return NS_WildCardMatch_(str, xp, case_insensitive);
}