Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions Lib/test/test_textwrap.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,9 @@

import unittest

from test.support import cpython_only
from test.support.import_helper import ensure_lazy_imports

from textwrap import TextWrapper, wrap, fill, dedent, indent, shorten


Expand Down Expand Up @@ -1133,5 +1136,12 @@ def test_first_word_too_long_but_placeholder_fits(self):
self.check_shorten("Helloo", 5, "[...]")


class LazyImportTest(unittest.TestCase):

@cpython_only
def test_lazy_import(self):
ensure_lazy_imports("textwrap", {"re"})


if __name__ == '__main__':
unittest.main()
96 changes: 52 additions & 44 deletions Lib/textwrap.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
# Copyright (C) 2002 Python Software Foundation.
# Written by Greg Ward <gward@python.net>

import re
lazy import re

__all__ = ['TextWrapper', 'wrap', 'fill', 'dedent', 'indent', 'shorten']

Expand Down Expand Up @@ -65,49 +65,56 @@ class TextWrapper:

unicode_whitespace_trans = dict.fromkeys(map(ord, _whitespace), ord(' '))

# This funky little regex is just the trick for splitting
# text up into word-wrappable chunks. E.g.
# "Hello there -- you goof-ball, use the -b option!"
# splits into
# Hello/ /there/ /--/ /you/ /goof-/ball,/ /use/ /the/ /-b/ /option!
# (after stripping out empty strings).
word_punct = r'[\w!"\'&.,?]'
letter = r'[^\d\W]'
whitespace = r'[%s]' % re.escape(_whitespace)
nowhitespace = '[^' + whitespace[1:]
wordsep_re = re.compile(r'''
( # any whitespace
%(ws)s+
| # em-dash between words
(?<=%(wp)s) -{2,} (?=\w)
| # word, possibly hyphenated
%(nws)s+? (?:
# hyphenated word
-(?: (?<=%(lt)s{2}-) | (?<=%(lt)s-%(lt)s-))
(?= %(lt)s -? %(lt)s)
| # end of word
(?=%(ws)s|\z)
| # em-dash
(?<=%(wp)s) (?=-{2,}\w)
)
)''' % {'wp': word_punct, 'lt': letter,
'ws': whitespace, 'nws': nowhitespace},
re.VERBOSE)
del word_punct, letter, nowhitespace

# This less funky little regex just split on recognized spaces. E.g.
# "Hello there -- you goof-ball, use the -b option!"
# splits into
# Hello/ /there/ /--/ /you/ /goof-ball,/ /use/ /the/ /-b/ /option!/
wordsep_simple_re = re.compile(r'(%s+)' % whitespace)
del whitespace

# XXX this is not locale- or charset-aware -- string.lowercase
# is US-ASCII only (and therefore English-only)
sentence_end_re = re.compile(r'[a-z]' # lowercase letter
r'[\.\!\?]' # sentence-ending punct.
r'[\"\']?' # optional end-of-quote
r'\z') # end of chunk
wordsep_re = None
wordsep_simple_re = None
sentence_end_re = None

@classmethod
def _compile_wordseps(cls):
"""Compile word-separator regexes on first use."""
if cls.wordsep_re is not None:
return
# This funky little regex is just the trick for splitting
# text up into word-wrappable chunks. E.g.
# "Hello there -- you goof-ball, use the -b option!"
# splits into
# Hello/ /there/ /--/ /you/ /goof-/ball,/ /use/ /the/ /-b/ /option!
# (after stripping out empty strings).
word_punct = r'[\w!"\'&.,?]'
letter = r'[^\d\W]'
whitespace = r'[%s]' % re.escape(_whitespace)
nowhitespace = '[^' + whitespace[1:]
cls.wordsep_re = re.compile(r'''
( # any whitespace
%(ws)s+
| # em-dash between words
(?<=%(wp)s) -{2,} (?=\w)
| # word, possibly hyphenated
%(nws)s+? (?:
# hyphenated word
-(?: (?<=%(lt)s{2}-) | (?<=%(lt)s-%(lt)s-))
(?= %(lt)s -? %(lt)s)
| # end of word
(?=%(ws)s|\z)
| # em-dash
(?<=%(wp)s) (?=-{2,}\w)
)
)''' % {'wp': word_punct, 'lt': letter,
'ws': whitespace, 'nws': nowhitespace},
re.VERBOSE)

# This less funky little regex just split on recognized spaces. E.g.
# "Hello there -- you goof-ball, use the -b option!"
# splits into
# Hello/ /there/ /--/ /you/ /goof-ball,/ /use/ /the/ /-b/ /option!/
cls.wordsep_simple_re = re.compile(r'(%s+)' % whitespace)

# XXX this is not locale- or charset-aware -- string.lowercase
# is US-ASCII only (and therefore English-only)
cls.sentence_end_re = re.compile(r'[a-z]' # lowercase letter
r'[\.\!\?]' # sentence-ending punct.
r'[\"\']?' # optional end-of-quote
r'\z') # end of chunk

def __init__(self,
width=70,
Expand Down Expand Up @@ -135,6 +142,7 @@ def __init__(self,
self.tabsize = tabsize
self.max_lines = max_lines
self.placeholder = placeholder
self._compile_wordseps()


# -- Private methods -----------------------------------------------
Expand Down
Loading