| 14 | # Set up these regular expressions outside the function so they only have to |
| 15 | # be compiled once. |
| 16 | re_bracket = re.compile(r'(?<!\\)((?:\\\\)*)([()])') # Open or close bracket not preceeded by a single slash |
| 17 | re_has_named_group = re.compile(r'(?<!\\)(?:\\\\)*\(\?P') # '(?P' not preceeded by a single slash |
| 18 | re_type = type(re_bracket) |
| 19 | |
| 20 | re_unescape = re.compile(r'\\(.)|[$?*+^()]') |
| 21 | def unescape(value): |
| 22 | """ Unescape a regex string """ |
| 23 | def repl(m): |
| 24 | escaped = m.group(1) |
| 25 | if escaped and re.match(r'[\ddDsSwW]', escaped): |
| 26 | # These cases shouldn't ever come up - no match possible if they do. |
| 27 | raise NoReverseMatch(r"Regular expression notation '\%s' was outside of a group so this regex is not reversable" % escaped) |
| 28 | if escaped and escaped in 'AZbB': |
| 29 | # These cases should just return nothing. |
| 30 | return '' |
| 31 | # For every other case: if it's the escaped version then return it without |
| 32 | # a slash, otherwise return nothing. |
| 33 | return escaped or '' |
| 34 | return re_unescape.sub(repl, value) |
| 35 | |
55 | | def __call__(self, match_obj): |
56 | | # match_obj.group(1) is the contents of the parenthesis. |
57 | | # First we need to figure out whether it's a named or unnamed group. |
58 | | # |
59 | | grouped = match_obj.group(1) |
60 | | m = re.search(r'^\?P<(\w+)>(.*?)$', grouped) |
61 | | if m: # If this was a named group... |
62 | | # m.group(1) is the name of the group |
63 | | # m.group(2) is the regex. |
64 | | try: |
65 | | value = self.kwargs[m.group(1)] |
66 | | except KeyError: |
67 | | # It was a named group, but the arg was passed in as a |
68 | | # positional arg or not at all. |
69 | | try: |
70 | | value = self.args[self.current_arg] |
71 | | self.current_arg += 1 |
72 | | except IndexError: |
73 | | # The arg wasn't passed in. |
74 | | raise NoReverseMatch('Not enough positional arguments passed in') |
75 | | test_regex = m.group(2) |
76 | | else: # Otherwise, this was a positional (unnamed) group. |
77 | | try: |
78 | | value = self.args[self.current_arg] |
79 | | self.current_arg += 1 |
80 | | except IndexError: |
81 | | # The arg wasn't passed in. |
82 | | raise NoReverseMatch('Not enough positional arguments passed in') |
83 | | test_regex = grouped |
84 | | # Note we're using re.match here on purpose because the start of |
85 | | # to string needs to match. |
86 | | if not re.match(test_regex + '$', str(value)): # TODO: Unicode? |
87 | | raise NoReverseMatch("Value %r didn't match regular expression %r" % (value, test_regex)) |
88 | | return str(value) # TODO: Unicode? |
| 80 | def tokenize(text): |
| 81 | """ |
| 82 | Recursive tokenizer for regular expression parenthesis. |
| 83 | """ |
| 84 | def parse(text, top=True, named_group=False): |
| 85 | bits = [] |
| 86 | m = re_bracket.search(text) |
| 87 | while m: |
| 88 | before, text = text[:m.start()+len(m.group(1))], text[m.end():] |
| 89 | if before: |
| 90 | bits.append(before) |
| 91 | if m.group(2) != '(': |
| 92 | break |
| 93 | inner_bits, text, named_group = parse(text, top=False, named_group=not top and named_group) |
| 94 | if inner_bits: |
| 95 | inline = named_group |
| 96 | first_bit = inner_bits[0] |
| 97 | if isinstance(first_bit, str): |
| 98 | if first_bit.startswith('?'): |
| 99 | # Regex extension notation. |
| 100 | if first_bit.startswith('?:'): |
| 101 | # No need to parse this non-grouping parenthesis. |
| 102 | inline = True |
| 103 | inner_bits[0] = first_bit[2:] |
| 104 | elif first_bit.startswith('?P'): |
| 105 | # Named group, set variable so higher levels will flatten. |
| 106 | named_group = True |
| 107 | else: |
| 108 | # Skip all other extension notation. |
| 109 | inner_bits = None |
| 110 | if inner_bits: |
| 111 | if inline: |
| 112 | bits.extend(inner_bits) |
| 113 | else: |
| 114 | bits.append(inner_bits) |
| 115 | m = re_bracket.search(text) |
| 116 | return bits, text, named_group |
| 117 | bits, text, named_group = parse(text) |
| 118 | if text: |
| 119 | bits.append(text) |
| 120 | # Now tokenize the bits. Each token will either be a string or a regex. |
| 121 | tokens = [] |
| 122 | count = 0 |
| 123 | for bit in bits: |
| 124 | if isinstance(bit, list): |
| 125 | # Build the regex here so it only has to be compiled once. |
| 126 | bit = re.compile('%s$' % build_re(bit)) |
| 127 | count += 1 |
| 128 | tokens.append(bit) |
| 129 | return tokens, count |
| 131 | class ReverseRegexLookup(object): |
| 132 | def __init__(self, text): |
| 133 | self.has_named_groups = bool(re_has_named_group.search(text)) |
| 134 | self.tokens, self.minimum_arguments = tokenize(text) |
| 135 | |
| 136 | def check(self, args=[], kwargs={}): |
| 137 | # Note: args and kwargs will be destroyed (using .pop()) so if you need |
| 138 | # to keep using them, pass copies. |
| 139 | if self.minimum_arguments > len(args) + len(kwargs): |
| 140 | raise NoReverseMatch('Not enough arguments passed in') |
| 141 | match = [] |
| 142 | args = list(args) |
| 143 | kwargs = kwargs.copy() |
| 144 | for token in self.tokens: |
| 145 | if isinstance(token, re_type): # A regex token. |
| 146 | value = None |
| 147 | # Is it a named argument? |
| 148 | if token.groupindex: |
| 149 | try: |
| 150 | value = kwargs.pop(token.groupindex.keys()[0]) |
| 151 | except KeyError: |
| 152 | # It was a named group, but the arg was passed in as a |
| 153 | # positional arg or not at all. |
| 154 | pass |
| 155 | if value is None: |
| 156 | try: |
| 157 | value = args.pop(0) |
| 158 | except IndexError: |
| 159 | # The arg wasn't passed in. |
| 160 | raise NoReverseMatch('Not enough positional arguments passed in') |
| 161 | value = str(value) # TODO: Unicode? |
| 162 | if not token.match(value): |
| 163 | raise NoReverseMatch("Value %r didn't match regular expression %r" % (value, token.pattern)) |
| 164 | match.append(value) |
| 165 | else: # A string token. |
| 166 | match.append(token) |
| 167 | match = ''.join(match) |
| 168 | # Unescape special characters which could possibly be used in a URL and strip unused regular expression syntax. |
| 169 | match = unescape(match) |
| 170 | return match |
| 171 | |