This will catch double quotes as marking the start or end of a
quoted phrase, unless the double quote is escaped by a backslash
"""
- # Grab everything up to the first whitespace character or
- # quotation mark not proceeded by a backslash
- whitespace_re = re.compile(r'(.*?)([\t\n\x0b\x0c\r ]+|(?<!\\)")')
+ # Match either a quoted or unquoted string literal followed by
+ # whitespace or the end of line. This yields two groups, one of
+ # which has a match, and the other of which is None, depending on
+ # whether the string literal was quoted or unquoted; this is what
+ # necessitates the subsequent filtering out of groups that are
+ # None.
+ string_pat = \
+ re.compile(r'"((?:[^"\\]|\\.)*)"|((?:[^\\\s]|\\.)+)(?:\s+|\s*$)')
+
+ # For interpreting escapes.
+ escape_pat = re.compile(r'\\(.)')
+
def collapseContinuations(self, lines):
L = []
state = 0
L[-1] += ' ' + line
lines = L
L = []
+
for line in lines:
in_quote = False
split_line = []
- while len(line) > 0:
- match = self.whitespace_re.match(line)
- if match is None:
- # If there's no match, that means that there's no
- # whitespace in the rest of the line, so it should
- # be treated as a single entity, quoted or not
- #
- # This also means that a closing quote isn't
- # strictly necessary if the line ends the quote
- substr = line
- end = ''
- else:
- substr, end = match.groups()
-
- if in_quote:
- # If we're in the middle of the quote, the string
- # we just grabbed belongs at the end of the
- # previous string
- #
- # Including the whitespace! Unless it's not
- # whitespace and is actually a closequote instead
- split_line[-1] += substr + (end if end != '"' else '')
- else:
- # If we're not in the middle of a quote, than this
- # is the next new string
- split_line.append(substr)
-
- if end == '"':
- in_quote = not in_quote
-
- # Then strip off what we just processed
- line = line[len(substr + end):]
+ for m in string_pat.finditer(line):
+ [x] = [x for x in m.groups() if x is not None]
+ split_line.append(escape_pat.sub(r'\1', x))
L.append(split_line)
return filter(None, L)