#2834: Change re module semantics, so that str and bytes mixing is forbidden,

and str (unicode) patterns get full unicode matching by default. The re.ASCII
flag is also introduced to ask for ASCII matching instead.
This commit is contained in:
Antoine Pitrou
2008-08-19 17:56:33 +00:00
parent 3ad7ba10a2
commit fd036451bf
37 changed files with 280 additions and 163 deletions

View File

@@ -11,9 +11,13 @@
This module provides regular expression matching operations similar to
those found in Perl. Both patterns and strings to be searched can be
Unicode strings as well as 8-bit strings. The :mod:`re` module is
always available.
those found in Perl. The :mod:`re` module is always available.
Both patterns and strings to be searched can be Unicode strings as well as
8-bit strings. However, Unicode strings and 8-bit strings cannot be mixed:
that is, you cannot match an Unicode string with a byte pattern or
vice-versa; similarly, when asking for a substition, the replacement
string must be of the same type as both the pattern and the search string.
Regular expressions use the backslash character (``'\'``) to indicate
special forms or to allow special characters to be used without invoking
@@ -212,12 +216,12 @@ The special characters are:
group; ``(?P<name>...)`` is the only exception to this rule. Following are the
currently supported extensions.
``(?iLmsux)``
(One or more letters from the set ``'i'``, ``'L'``, ``'m'``, ``'s'``,
``'u'``, ``'x'``.) The group matches the empty string; the letters
set the corresponding flags: :const:`re.I` (ignore case),
:const:`re.L` (locale dependent), :const:`re.M` (multi-line),
:const:`re.S` (dot matches all), :const:`re.U` (Unicode dependent),
``(?aiLmsux)``
(One or more letters from the set ``'a'``, ``'i'``, ``'L'``, ``'m'``,
``'s'``, ``'u'``, ``'x'``.) The group matches the empty string; the
letters set the corresponding flags: :const:`re.a` (ASCII-only matching),
:const:`re.I` (ignore case), :const:`re.L` (locale dependent),
:const:`re.M` (multi-line), :const:`re.S` (dot matches all),
and :const:`re.X` (verbose), for the entire regular expression. (The
flags are described in :ref:`contents-of-module-re`.) This
is useful if you wish to include the flags as part of the regular
@@ -324,56 +328,62 @@ the second character. For example, ``\$`` matches the character ``'$'``.
word is indicated by whitespace or a non-alphanumeric, non-underscore character.
Note that ``\b`` is defined as the boundary between ``\w`` and ``\ W``, so the
precise set of characters deemed to be alphanumeric depends on the values of the
``UNICODE`` and ``LOCALE`` flags. Inside a character range, ``\b`` represents
``ASCII`` and ``LOCALE`` flags. Inside a character range, ``\b`` represents
the backspace character, for compatibility with Python's string literals.
``\B``
Matches the empty string, but only when it is *not* at the beginning or end of a
word. This is just the opposite of ``\b``, so is also subject to the settings
of ``LOCALE`` and ``UNICODE``.
of ``ASCII`` and ``LOCALE`` .
``\d``
When the :const:`UNICODE` flag is not specified, matches any decimal digit; this
is equivalent to the set ``[0-9]``. With :const:`UNICODE`, it will match
whatever is classified as a digit in the Unicode character properties database.
For Unicode (str) patterns:
When the :const:`ASCII` flag is specified, matches any decimal digit; this
is equivalent to the set ``[0-9]``. Otherwise, it will match whatever
is classified as a digit in the Unicode character properties database
(but this does include the standard ASCII digits and is thus a superset
of [0-9]).
For 8-bit (bytes) patterns:
Matches any decimal digit; this is equivalent to the set ``[0-9]``.
``\D``
When the :const:`UNICODE` flag is not specified, matches any non-digit
character; this is equivalent to the set ``[^0-9]``. With :const:`UNICODE`, it
will match anything other than character marked as digits in the Unicode
character properties database.
Matches any character which is not a decimal digit. This is the
opposite of ``\d`` and is therefore similarly subject to the settings of
``ASCII`` and ``LOCALE``.
``\s``
When the :const:`LOCALE` and :const:`UNICODE` flags are not specified, matches
any whitespace character; this is equivalent to the set ``[ \t\n\r\f\v]``. With
:const:`LOCALE`, it will match this set plus whatever characters are defined as
space for the current locale. If :const:`UNICODE` is set, this will match the
characters ``[ \t\n\r\f\v]`` plus whatever is classified as space in the Unicode
character properties database.
For Unicode (str) patterns:
When the :const:`ASCII` flag is specified, matches only ASCII whitespace
characters; this is equivalent to the set ``[ \t\n\r\f\v]``. Otherwise,
it will match this set whatever is classified as space in the Unicode
character properties database (including for example the non-breaking
spaces mandated by typography rules in many languages).
For 8-bit (bytes) patterns:
Matches characters considered whitespace in the ASCII character set;
this is equivalent to the set ``[ \t\n\r\f\v]``.
``\S``
When the :const:`LOCALE` and :const:`UNICODE` flags are not specified, matches
any non-whitespace character; this is equivalent to the set ``[^ \t\n\r\f\v]``
With :const:`LOCALE`, it will match any character not in this set, and not
defined as space in the current locale. If :const:`UNICODE` is set, this will
match anything other than ``[ \t\n\r\f\v]`` and characters marked as space in
the Unicode character properties database.
Matches any character which is not a whitespace character. This is the
opposite of ``\s`` and is therefore similarly subject to the settings of
``ASCII`` and ``LOCALE``.
``\w``
When the :const:`LOCALE` and :const:`UNICODE` flags are not specified, matches
any alphanumeric character and the underscore; this is equivalent to the set
``[a-zA-Z0-9_]``. With :const:`LOCALE`, it will match the set ``[0-9_]`` plus
whatever characters are defined as alphanumeric for the current locale. If
:const:`UNICODE` is set, this will match the characters ``[0-9_]`` plus whatever
is classified as alphanumeric in the Unicode character properties database.
For Unicode (str) patterns:
When the :const:`ASCII` flag is specified, this is equivalent to the set
``[a-zA-Z0-9_]``. Otherwise, it will match whatever is classified as
alphanumeric in the Unicode character properties database (it will
include most characters that can be part of a word in whatever language,
as well as numbers and the underscore sign).
For 8-bit (bytes) patterns:
Matches characters considered alphanumeric in the ASCII character set;
this is equivalent to the set ``[a-zA-Z0-9_]``. With :const:`LOCALE`,
it will additionally match whatever characters are defined as
alphanumeric for the current locale.
``\W``
When the :const:`LOCALE` and :const:`UNICODE` flags are not specified, matches
any non-alphanumeric character; this is equivalent to the set ``[^a-zA-Z0-9_]``.
With :const:`LOCALE`, it will match any character not in the set ``[0-9_]``, and
not defined as alphanumeric for the current locale. If :const:`UNICODE` is set,
this will match anything other than ``[0-9_]`` and characters marked as
alphanumeric in the Unicode character properties database.
Matches any character which is not an alphanumeric character. This is the
opposite of ``\w`` and is therefore similarly subject to the settings of
``ASCII`` and ``LOCALE``.
``\Z``
Matches only at the end of the string.
@@ -454,6 +464,25 @@ form.
expression at a time needn't worry about compiling regular expressions.)
.. data:: A
ASCII
Make ``\w``, ``\W``, ``\b``, ``\B``, ``\s`` and ``\S`` perform ASCII-only
matching instead of full Unicode matching. This is only meaningful for
Unicode patterns, and is ignored for byte patterns.
Note that the :const:`re.U` flag still exists (as well as its synonym
:const:`re.UNICODE` and its embedded counterpart ``(?u)``), but it has
become useless in Python 3.0.
In previous Python versions, it was used to specify that
matching had to be Unicode dependent (the default was ASCII matching in
all circumstances). Starting from Python 3.0, the default is Unicode
matching for Unicode strings (which can be changed by specifying the
``'a'`` flag), and ASCII matching for 8-bit strings. Further, Unicode
dependent matching for 8-bit strings isn't allowed anymore and results
in a ValueError.
.. data:: I
IGNORECASE
@@ -465,7 +494,10 @@ form.
LOCALE
Make ``\w``, ``\W``, ``\b``, ``\B``, ``\s`` and ``\S`` dependent on the
current locale.
current locale. The use of this flag is discouraged as the locale mechanism
is very unreliable, and it only handles one "culture" at a time anyway;
you should use Unicode matching instead, which is the default in Python 3.0
for Unicode (str) patterns.
.. data:: M
@@ -486,13 +518,6 @@ form.
newline; without this flag, ``'.'`` will match anything *except* a newline.
.. data:: U
UNICODE
Make ``\w``, ``\W``, ``\b``, ``\B``, ``\d``, ``\D``, ``\s`` and ``\S`` dependent
on the Unicode character properties database.
.. data:: X
VERBOSE
@@ -511,6 +536,8 @@ form.
b = re.compile(r"\d+\.\d*")
.. function:: search(pattern, string[, flags])
Scan through *string* looking for a location where the regular expression

View File

@@ -14,7 +14,7 @@ import time
import locale
import calendar
from re import compile as re_compile
from re import IGNORECASE
from re import IGNORECASE, ASCII
from re import escape as re_escape
from datetime import date as datetime_date
try:
@@ -262,7 +262,7 @@ class TimeRE(dict):
def compile(self, format):
"""Return a compiled re object for the format string."""
return re_compile(self.pattern(format), IGNORECASE)
return re_compile(self.pattern(format), IGNORECASE | ASCII)
_cache_lock = _thread_allocate_lock()
# DO NOT modify _TimeRE_cache or _regex_cache without acquiring the cache lock

View File

@@ -39,7 +39,7 @@ def _translate(s, altchars):
return s.translate(translation)
# Base64 encoding/decoding uses binascii
def b64encode(s, altchars=None):
@@ -126,7 +126,7 @@ def urlsafe_b64decode(s):
return b64decode(s, b'-_')
# Base32 encoding/decoding must be done in Python
_b32alphabet = {
0: b'A', 9: b'J', 18: b'S', 27: b'3',
@@ -225,7 +225,7 @@ def b32decode(s, casefold=False, map01=None):
# characters because this will tell us how many null bytes to remove from
# the end of the decoded string.
padchars = 0
mo = re.search('(?P<pad>[=]*)$', s)
mo = re.search(b'(?P<pad>[=]*)$', s)
if mo:
padchars = len(mo.group('pad'))
if padchars > 0:
@@ -262,7 +262,7 @@ def b32decode(s, casefold=False, map01=None):
return b''.join(parts)
# RFC 3548, Base 16 Alphabet specifies uppercase, but hexlify() returns
# lowercase. The RFC also recommends against accepting input case
# insensitively.
@@ -291,12 +291,12 @@ def b16decode(s, casefold=False):
raise TypeError("expected bytes, not %s" % s.__class__.__name__)
if casefold:
s = s.upper()
if re.search('[^0-9A-F]', s):
if re.search(b'[^0-9A-F]', s):
raise binascii.Error('Non-base16 digit found')
return binascii.unhexlify(s)
# Legacy interface. This code could be cleaned up since I don't believe
# binascii has any line length limitations. It just doesn't seem worth it
# though. The files should be opened in binary mode.
@@ -353,7 +353,7 @@ def decodestring(s):
return binascii.a2b_base64(s)
# Usable as a script...
def main():
"""Small main program"""

View File

@@ -5415,7 +5415,7 @@ ExtendedContext = Context(
# 2. For finite numbers (not infinities and NaNs) the body of the
# number between the optional sign and the optional exponent must have
# at least one decimal digit, possibly after the decimal point. The
# lookahead expression '(?=\d|\.\d)' checks this.
# lookahead expression '(?=[0-9]|\.[0-9])' checks this.
#
# As the flag UNICODE is not enabled here, we're explicitly avoiding any
# other meaning for \d than the numbers [0-9].

View File

@@ -409,7 +409,7 @@ def get_versions():
out = os.popen(gcc_exe + ' -dumpversion','r')
out_string = out.read()
out.close()
result = re.search('(\d+\.\d+(\.\d+)*)',out_string)
result = re.search('(\d+\.\d+(\.\d+)*)', out_string, re.ASCII)
if result:
gcc_version = StrictVersion(result.group(1))
else:
@@ -421,7 +421,7 @@ def get_versions():
out = os.popen(ld_exe + ' -v','r')
out_string = out.read()
out.close()
result = re.search('(\d+\.\d+(\.\d+)*)',out_string)
result = re.search('(\d+\.\d+(\.\d+)*)', out_string, re.ASCII)
if result:
ld_version = StrictVersion(result.group(1))
else:
@@ -433,7 +433,7 @@ def get_versions():
out = os.popen(dllwrap_exe + ' --version','r')
out_string = out.read()
out.close()
result = re.search(' (\d+\.\d+(\.\d+)*)',out_string)
result = re.search(' (\d+\.\d+(\.\d+)*)', out_string, re.ASCII)
if result:
dllwrap_version = StrictVersion(result.group(1))
else:

View File

@@ -300,7 +300,7 @@ def get_versions():
out = os.popen(gcc_exe + ' -dumpversion','r')
out_string = out.read()
out.close()
result = re.search('(\d+\.\d+\.\d+)',out_string)
result = re.search('(\d+\.\d+\.\d+)', out_string, re.ASCII)
if result:
gcc_version = StrictVersion(result.group(1))
else:

View File

@@ -512,7 +512,7 @@ def get_config_vars(*args):
# patched up as well.
'CFLAGS', 'PY_CFLAGS', 'BLDSHARED'):
flags = _config_vars[key]
flags = re.sub('-arch\s+\w+\s', ' ', flags)
flags = re.sub('-arch\s+\w+\s', ' ', flags, re.ASCII)
flags = re.sub('-isysroot [^ \t]*', ' ', flags)
_config_vars[key] = flags

View File

@@ -81,7 +81,7 @@ def get_platform ():
return "%s-%s.%s" % (osname, version, release)
elif osname[:6] == "cygwin":
osname = "cygwin"
rel_re = re.compile (r'[\d.]+')
rel_re = re.compile (r'[\d.]+', re.ASCII)
m = rel_re.match(release)
if m:
release = m.group()

View File

@@ -134,7 +134,7 @@ class StrictVersion (Version):
"""
version_re = re.compile(r'^(\d+) \. (\d+) (\. (\d+))? ([ab](\d+))?$',
re.VERBOSE)
re.VERBOSE | re.ASCII)
def parse (self, vstring):

View File

@@ -5,7 +5,8 @@ import distutils.version
import operator
re_validPackage = re.compile(r"(?i)^\s*([a-z_]\w*(?:\.[a-z_]\w*)*)(.*)")
re_validPackage = re.compile(r"(?i)^\s*([a-z_]\w*(?:\.[a-z_]\w*)*)(.*)",
re.ASCII)
# (package) (rest)
re_paren = re.compile(r"^\s*\((.*)\)\s*$") # (list) inside of parentheses
@@ -153,7 +154,8 @@ def split_provision(value):
global _provision_rx
if _provision_rx is None:
_provision_rx = re.compile(
"([a-zA-Z_]\w*(?:\.[a-zA-Z_]\w*)*)(?:\s*\(\s*([^)\s]+)\s*\))?$")
"([a-zA-Z_]\w*(?:\.[a-zA-Z_]\w*)*)(?:\s*\(\s*([^)\s]+)\s*\))?$",
re.ASCII)
value = value.strip()
m = _provision_rx.match(value)
if not m:

View File

@@ -70,7 +70,7 @@ for c in (b' !"#$%&\'()*+,-./0123456789:;<>'
_QUOPRI_BODY_MAP[c] = chr(c)
# Helpers
def header_check(octet):
"""Return True if the octet should be escaped with header quopri."""
@@ -125,7 +125,7 @@ def quote(c):
return '=%02X' % ord(c)
def header_encode(header_bytes, charset='iso-8859-1'):
"""Encode a single header line with quoted-printable (like) encoding.
@@ -149,7 +149,7 @@ def header_encode(header_bytes, charset='iso-8859-1'):
return '=?%s?q?%s?=' % (charset, EMPTYSTRING.join(encoded))
def body_encode(body, maxlinelen=76, eol=NL):
"""Encode with quoted-printable, wrapping at maxlinelen characters.
@@ -225,7 +225,7 @@ def body_encode(body, maxlinelen=76, eol=NL):
return encoded_body
# BAW: I'm not sure if the intent was for the signature of this function to be
# the same as base64MIME.decode() or not...
def decode(encoded, eol=NL):
@@ -280,7 +280,7 @@ body_decode = decode
decodestring = decode
def _unquote_match(match):
"""Turn a match in the form =AB to the ASCII character with value 0xab"""
s = match.group(0)
@@ -296,4 +296,4 @@ def header_decode(s):
the high level email.Header class for that functionality.
"""
s = s.replace('_', ' ')
return re.sub(r'=\w{2}', _unquote_match, s)
return re.sub(r'=\w{2}', _unquote_match, s, re.ASCII)

View File

@@ -52,7 +52,7 @@ specialsre = re.compile(r'[][\\()<>@,:;".]')
escapesre = re.compile(r'[][\\()"]')
# Helpers
def formataddr(pair):
@@ -73,7 +73,7 @@ def formataddr(pair):
return address
def getaddresses(fieldvalues):
"""Return a list of (REALNAME, EMAIL) for each fieldvalue."""
all = COMMASPACE.join(fieldvalues)
@@ -81,7 +81,7 @@ def getaddresses(fieldvalues):
return a.addresslist
ecre = re.compile(r'''
=\? # literal =?
(?P<charset>[^?]*?) # non-greedy up to the next ? is the charset
@@ -93,7 +93,7 @@ ecre = re.compile(r'''
''', re.VERBOSE | re.IGNORECASE)
def formatdate(timeval=None, localtime=False, usegmt=False):
"""Returns a date string as specified by RFC 2822, e.g.:
@@ -146,7 +146,7 @@ def formatdate(timeval=None, localtime=False, usegmt=False):
zone)
def make_msgid(idstring=None):
"""Returns a string suitable for RFC 2822 compliant Message-ID, e.g:
@@ -168,7 +168,7 @@ def make_msgid(idstring=None):
return msgid
# These functions are in the standalone mimelib version only because they've
# subsequently been fixed in the latest Python versions. We use this to worm
# around broken older Pythons.
@@ -202,7 +202,7 @@ def unquote(str):
return str
# RFC2231-related functions - parameter encoding and decoding
def decode_rfc2231(s):
"""Decode string according to RFC 2231"""
@@ -227,7 +227,8 @@ def encode_rfc2231(s, charset=None, language=None):
return "%s'%s'%s" % (charset, language, s)
rfc2231_continuation = re.compile(r'^(?P<name>\w+)\*((?P<num>[0-9]+)\*?)?$')
rfc2231_continuation = re.compile(r'^(?P<name>\w+)\*((?P<num>[0-9]+)\*?)?$',
re.ASCII)
def decode_params(params):
"""Decode parameters list according to RFC 2231.

View File

@@ -176,12 +176,10 @@ class Codec(codecs.Codec):
return "", 0
# IDNA allows decoding to operate on Unicode strings, too.
if isinstance(input, bytes):
labels = dots.split(input)
else:
# Force to bytes
if not isinstance(input, bytes):
# XXX obviously wrong, see #3232
input = bytes(input)
labels = input.split(b".")
labels = input.split(b".")
if labels and len(labels[-1]) == 0:
trailing_dot = '.'

View File

@@ -590,7 +590,8 @@ def parse150(resp):
global _150_re
if _150_re is None:
import re
_150_re = re.compile("150 .* \((\d+) bytes\)", re.IGNORECASE)
_150_re = re.compile(
"150 .* \((\d+) bytes\)", re.IGNORECASE | re.ASCII)
m = _150_re.match(resp)
if not m:
return None
@@ -613,7 +614,7 @@ def parse227(resp):
global _227_re
if _227_re is None:
import re
_227_re = re.compile(r'(\d+),(\d+),(\d+),(\d+),(\d+),(\d+)')
_227_re = re.compile(r'(\d+),(\d+),(\d+),(\d+),(\d+),(\d+)', re.ASCII)
m = _227_re.search(resp)
if not m:
raise error_proto(resp)

View File

@@ -385,4 +385,4 @@ class HTMLParser(_markupbase.ParserBase):
return '&'+s+';'
return re.sub(r"&(#?[xX]?(?:[0-9a-fA-F]+|\w{1,8}));",
replaceEntities, s)
replaceEntities, s, re.ASCII)

View File

@@ -121,7 +121,7 @@ def time2netscape(t=None):
UTC_ZONES = {"GMT": None, "UTC": None, "UT": None, "Z": None}
TIMEZONE_RE = re.compile(r"^([-+])?(\d\d?):?(\d\d)?$")
TIMEZONE_RE = re.compile(r"^([-+])?(\d\d?):?(\d\d)?$", re.ASCII)
def offset_from_tz_string(tz):
offset = None
if tz in UTC_ZONES:
@@ -191,9 +191,9 @@ def _str2time(day, mon, yr, hr, min, sec, tz):
STRICT_DATE_RE = re.compile(
r"^[SMTWF][a-z][a-z], (\d\d) ([JFMASOND][a-z][a-z]) "
"(\d\d\d\d) (\d\d):(\d\d):(\d\d) GMT$")
"(\d\d\d\d) (\d\d):(\d\d):(\d\d) GMT$", re.ASCII)
WEEKDAY_RE = re.compile(
r"^(?:Sun|Mon|Tue|Wed|Thu|Fri|Sat)[a-z]*,?\s*", re.I)
r"^(?:Sun|Mon|Tue|Wed|Thu|Fri|Sat)[a-z]*,?\s*", re.I | re.ASCII)
LOOSE_HTTP_DATE_RE = re.compile(
r"""^
(\d\d?) # day
@@ -210,7 +210,7 @@ LOOSE_HTTP_DATE_RE = re.compile(
([-+]?\d{2,4}|(?![APap][Mm]\b)[A-Za-z]+)? # timezone
\s*
(?:\(\w+\))? # ASCII representation of timezone in parens.
\s*$""", re.X)
\s*$""", re.X | re.ASCII)
def http2time(text):
"""Returns time in seconds since epoch of time represented by a string.
@@ -282,7 +282,7 @@ ISO_DATE_RE = re.compile(
\s*
([-+]?\d\d?:?(:?\d\d)?
|Z|z)? # timezone (Z is "zero meridian", i.e. GMT)
\s*$""", re.X)
\s*$""", re.X | re. ASCII)
def iso2time(text):
"""
As for http2time, but parses the ISO 8601 formats:
@@ -489,7 +489,7 @@ def parse_ns_headers(ns_headers):
return result
IPV4_RE = re.compile(r"\.\d+$")
IPV4_RE = re.compile(r"\.\d+$", re.ASCII)
def is_HDN(text):
"""Return True if text is a host domain name."""
# XXX
@@ -574,7 +574,7 @@ def user_domain_match(A, B):
return True
return False
cut_port_re = re.compile(r":\d+$")
cut_port_re = re.compile(r":\d+$", re.ASCII)
def request_host(request):
"""Return request-host, as defined by RFC 2965.
@@ -1207,7 +1207,7 @@ class CookieJar:
domain_re = re.compile(r"[^.]*")
dots_re = re.compile(r"^\.+")
magic_re = r"^\#LWP-Cookies-(\d+\.\d+)"
magic_re = re.compile(r"^\#LWP-Cookies-(\d+\.\d+)", re.ASCII)
def __init__(self, policy=None):
if policy is None:
@@ -1856,7 +1856,7 @@ class LWPCookieJar(FileCookieJar):
def _really_load(self, f, filename, ignore_discard, ignore_expires):
magic = f.readline()
if not re.search(self.magic_re, magic):
if not self.magic_re.search(magic):
msg = ("%r does not look like a Set-Cookie3 (LWP) format "
"file" % filename)
raise LoadError(msg)
@@ -1965,7 +1965,7 @@ class MozillaCookieJar(FileCookieJar):
header by default (Mozilla can cope with that).
"""
magic_re = "#( Netscape)? HTTP Cookie File"
magic_re = re.compile("#( Netscape)? HTTP Cookie File")
header = """\
# Netscape HTTP Cookie File
# http://www.netscape.com/newsref/std/cookie_spec.html
@@ -1977,7 +1977,7 @@ class MozillaCookieJar(FileCookieJar):
now = time.time()
magic = f.readline()
if not re.search(self.magic_re, magic):
if not self.magic_re.search(magic):
f.close()
raise LoadError(
"%r does not look like a Netscape format cookies file" %

View File

@@ -445,7 +445,7 @@ _CookiePattern = re.compile(
""+ _LegalCharsPatt +"*" # Any word or empty string
r")" # End of group 'val'
r"\s*;?" # Probably ending in a semi-colon
)
, re.ASCII) # May be removed if safe.
# At long last, here is the cookie class.

View File

@@ -88,11 +88,12 @@ InternalDate = re.compile(r'.*INTERNALDATE "'
r' (?P<hour>[0-9][0-9]):(?P<min>[0-9][0-9]):(?P<sec>[0-9][0-9])'
r' (?P<zonen>[-+])(?P<zoneh>[0-9][0-9])(?P<zonem>[0-9][0-9])'
r'"')
Literal = re.compile(r'.*{(?P<size>\d+)}$')
Literal = re.compile(r'.*{(?P<size>\d+)}$', re.ASCII)
MapCRLF = re.compile(r'\r\n|\r|\n')
Response_code = re.compile(r'\[(?P<type>[A-Z-]+)( (?P<data>[^\]]*))?\]')
Untagged_response = re.compile(r'\* (?P<type>[A-Z-]+)( (?P<data>.*))?')
Untagged_status = re.compile(r'\* (?P<data>\d+) (?P<type>[A-Z-]+)( (?P<data2>.*))?')
Untagged_status = re.compile(
r'\* (?P<data>\d+) (?P<type>[A-Z-]+)( (?P<data2>.*))?', re.ASCII)
@@ -146,7 +147,7 @@ class IMAP4:
class abort(error): pass # Service errors - close and retry
class readonly(abort): pass # Mailbox status changed to READ-ONLY
mustquote = re.compile(r"[^\w!#$%&'*+,.:;<=>?^`|~-]")
mustquote = re.compile(r"[^\w!#$%&'*+,.:;<=>?^`|~-]", re.ASCII)
def __init__(self, host = '', port = IMAP4_PORT):
self.debug = Debug
@@ -168,7 +169,7 @@ class IMAP4:
self.tagpre = Int2AP(random.randint(4096, 65535))
self.tagre = re.compile(r'(?P<tag>'
+ self.tagpre
+ r'\d+) (?P<type>[A-Z]+) (?P<data>.*)')
+ r'\d+) (?P<type>[A-Z]+) (?P<data>.*)', re.ASCII)
# Get server welcome message,
# request and store CAPABILITY response.

View File

@@ -67,7 +67,7 @@ def JSONNumber(match, context):
fn = getattr(context, 'parse_int', None) or int
res = fn(integer)
return res, None
pattern(r'(-?(?:0|[1-9]\d*))(\.\d+)?([eE][-+]?\d+)?')(JSONNumber)
pattern(r'(-?(?:0|[1-9][0-9]*))(\.[0-9]+)?([eE][-+]?[0-9]+)?')(JSONNumber)
STRINGCHUNK = re.compile(r'(.*?)(["\\\x00-\x1f])', FLAGS)

View File

@@ -199,7 +199,7 @@ class TimedRotatingFileHandler(BaseRotatingHandler):
else:
raise ValueError("Invalid rollover interval specified: %s" % self.when)
self.extMatch = re.compile(self.extMatch)
self.extMatch = re.compile(self.extMatch, re.ASCII)
self.interval = self.interval * interval # multiply by units requested
self.rolloverAt = currentTime + self.interval

Some files were not shown because too many files have changed in this diff Show More