fixed string literal parsing; broder will test

author Yang Zhang <y_z@mit.edu>

Sat, 1 Nov 2008 21:38:01 +0000 (17:38 -0400)

committer Yang Zhang <y_z@mit.edu>

Sat, 1 Nov 2008 21:38:01 +0000 (17:38 -0400)
author Yang Zhang <y_z@mit.edu>
Sat, 1 Nov 2008 21:38:01 +0000 (17:38 -0400)
committer Yang Zhang <y_z@mit.edu>
Sat, 1 Nov 2008 21:38:01 +0000 (17:38 -0400)
diff --git a/invirt-dns b/invirt-dns

index 4193be1..b2a9ac6 100755 (executable)
--- a/invirt-dns
+++ b/invirt-dns
@@ -121,9 +121,18 @@ class QuotingBindAuthority(authority.BindAuthority):
      This will catch double quotes as marking the start or end of a
      quoted phrase, unless the double quote is escaped by a backslash
      """
-    # Grab everything up to the first whitespace character or
-    # quotation mark not proceeded by a backslash
-    whitespace_re = re.compile(r'(.*?)([\t\n\x0b\x0c\r ]+|(?<!\\)")')
+    # Match either a quoted or unquoted string literal followed by
+    # whitespace or the end of line.  This yields two groups, one of
+    # which has a match, and the other of which is None, depending on
+    # whether the string literal was quoted or unquoted; this is what
+    # necessitates the subsequent filtering out of groups that are
+    # None.
+    string_pat = \
+            re.compile(r'"((?:[^"\\]|\\.)*)"|((?:[^\\\s]|\\.)+)(?:\s+|\s*$)')
+
+    # For interpreting escapes.
+    escape_pat = re.compile(r'\\(.)')
+
      def collapseContinuations(self, lines):
          L = []
          state = 0
@@ -142,41 +151,13 @@ class QuotingBindAuthority(authority.BindAuthority):
                      L[-1] += ' ' + line
          lines = L
          L = []
+
          for line in lines:
              in_quote = False
              split_line = []
-            while len(line) > 0:
-                match = self.whitespace_re.match(line)
-                if match is None:
-                    # If there's no match, that means that there's no
-                    # whitespace in the rest of the line, so it should
-                    # be treated as a single entity, quoted or not
-                    #
-                    # This also means that a closing quote isn't
-                    # strictly necessary if the line ends the quote
-                    substr = line
-                    end = ''
-                else:
-                    substr, end = match.groups()
-                
-                if in_quote:
-                    # If we're in the middle of the quote, the string
-                    # we just grabbed belongs at the end of the
-                    # previous string
-                    #
-                    # Including the whitespace! Unless it's not
-                    # whitespace and is actually a closequote instead
-                    split_line[-1] += substr + (end if end != '"' else '')
-                else:
-                    # If we're not in the middle of a quote, than this
-                    # is the next new string
-                    split_line.append(substr)
-                
-                if end == '"':
-                    in_quote = not in_quote
-                
-                # Then strip off what we just processed
-                line = line[len(substr + end):]
+            for m in string_pat.finditer(line):
+                [x] = [x for x in m.groups() if x is not None]
+                split_line.append(escape_pat.sub(r'\1', x))
              L.append(split_line)
          return filter(None, L)
author	Yang Zhang <y_z@mit.edu>
	Sat, 1 Nov 2008 21:38:01 +0000 (17:38 -0400)
committer	Yang Zhang <y_z@mit.edu>
	Sat, 1 Nov 2008 21:38:01 +0000 (17:38 -0400)