Skip to content

Commit b50b541

Browse files
author
Dave Abrahams
committed
Merge pull request #806 from RLovelett/gyb-python-3
[gyb] Python 2 or 3 compatible Generate Your Boilerplate
2 parents feace85 + 8f22300 commit b50b541

File tree

4 files changed

+32
-18
lines changed

4 files changed

+32
-18
lines changed

lib/ClangImporter/SortedCFDatabase.def.gyb

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,8 @@
1717
%{
1818

1919
import re
20+
import sys
21+
import codecs
2022

2123
prologueLines = ""
2224
epilogueLines = ""
@@ -26,7 +28,7 @@ epilogueLines = ""
2628
lineForName = {}
2729

2830
# Load the data file.
29-
with open(CFDatabaseFile, 'rb') as f:
31+
with codecs.open(CFDatabaseFile, encoding=sys.getfilesystemencoding(), errors='strict') as f:
3032
for line in f:
3133
# Pass through preprocessor directives literally.
3234
# Assume that they all fall into either a strict prologue or epilogue.

utils/GYBUnicodeDataUtils.py

Lines changed: 16 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,8 @@
1111
##===----------------------------------------------------------------------===##
1212

1313
import re
14+
import sys
15+
import codecs
1416

1517
class UnicodeProperty(object):
1618
"""Abstract base class for Unicode properties."""
@@ -64,11 +66,11 @@ def __init__(self, grapheme_break_property_file_name):
6466
# values to symbolic values.
6567
self.symbolic_values = \
6668
[ None ] * (max(self.numeric_value_table.values()) + 1)
67-
for k,v in self.numeric_value_table.iteritems():
69+
for k,v in self.numeric_value_table.items():
6870
self.symbolic_values[v] = k
6971

7072
# Load the data file.
71-
with open(grapheme_break_property_file_name, 'rb') as f:
73+
with codecs.open(grapheme_break_property_file_name, encoding=sys.getfilesystemencoding(), errors='strict') as f:
7274
for line in f:
7375
# Strip comments.
7476
line = re.sub('#.*', '', line)
@@ -329,7 +331,10 @@ def map_index(idx):
329331
else:
330332
return idx
331333

332-
return map(map_index, indexes)
334+
# NOTE: Python 2's `map` function returns a list. Where Python 3's
335+
# `map` function returns an iterator. To work around this the
336+
# result of the `map` is explicitly converted to a `list`.
337+
return list(map(map_index, indexes))
333338

334339
# If self.BMP_data contains identical data blocks, keep the first one,
335340
# remove duplicates and change the indexes in self.BMP_lookup to point to
@@ -514,9 +519,9 @@ def _convert_line(line):
514519

515520
# Match a list of code points.
516521
for token in line.split(" "):
517-
if token == "÷":
522+
if token == u"÷":
518523
boundaries += [ curr_bytes ]
519-
elif token == "×":
524+
elif token == u"×":
520525
pass
521526
else:
522527
code_point = int(token, 16)
@@ -529,21 +534,21 @@ def _convert_line(line):
529534
# and test separately that we handle ill-formed UTF-8 sequences.
530535
if code_point >= 0xd800 and code_point <= 0xdfff:
531536
code_point = 0x200b
532-
code_point = ('\U%(cp)08x' % { 'cp': code_point }).decode('unicode_escape')
533-
as_UTF8_bytes = code_point.encode('utf8')
534-
as_UTF8_escaped = ''.join(['\\x%(byte)02x' % { 'byte': ord(byte) } for byte in as_UTF8_bytes])
537+
code_point = (b'\U%(cp)08x' % { b'cp': code_point }).decode('unicode_escape', 'strict')
538+
as_UTF8_bytes = bytearray(code_point.encode('utf8', 'strict'))
539+
as_UTF8_escaped = ''.join(['\\x%(byte)02x' % { 'byte': byte } for byte in as_UTF8_bytes])
535540
test += as_UTF8_escaped
536541
curr_bytes += len(as_UTF8_bytes)
537542

538543
return (test, boundaries)
539544

540545
# Self-test.
541-
assert(_convert_line('÷ 0903 × 0308 ÷ AC01 ÷ # abc') == ('\\xe0\\xa4\\x83\\xcc\\x88\\xea\\xb0\\x81', [ 0, 5, 8 ]))
542-
assert(_convert_line('÷ D800 ÷ # abc') == ('\\xe2\\x80\\x8b', [ 0, 3 ]))
546+
assert(_convert_line(u'÷ 0903 × 0308 ÷ AC01 ÷ # abc') == ('\\xe0\\xa4\\x83\\xcc\\x88\\xea\\xb0\\x81', [ 0, 5, 8 ]))
547+
assert(_convert_line(u'÷ D800 ÷ # abc') == ('\\xe2\\x80\\x8b', [ 0, 3 ]))
543548

544549
result = []
545550

546-
with open(grapheme_break_test_file_name, 'rb') as f:
551+
with codecs.open(grapheme_break_test_file_name, encoding=sys.getfilesystemencoding(), errors='strict') as f:
547552
for line in f:
548553
test = _convert_line(line)
549554
if test:

utils/gyb.py

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,10 @@
55
from __future__ import print_function
66

77
import re
8-
from cStringIO import StringIO
8+
try:
9+
from cStringIO import StringIO
10+
except ImportError:
11+
from io import StringIO
912
import tokenize
1013
import textwrap
1114
from bisect import bisect
@@ -135,7 +138,8 @@ def tokenizePythonToUnmatchedCloseCurly(sourceText, start, lineStarts):
135138
if nesting < 0:
136139
return tokenPosToIndex(tokenStart, start, lineStarts)
137140

138-
except tokenize.TokenError, (message, errorPos):
141+
except tokenize.TokenError as error:
142+
(message, errorPos) = error.args
139143
return tokenPosToIndex(errorPos, start, lineStarts)
140144

141145
return len(sourceText)
@@ -304,7 +308,7 @@ def splitGybLines(sourceLines):
304308
dedents = 0
305309
try:
306310
for tokenKind, tokenText, tokenStart, (tokenEndLine, tokenEndCol), lineText \
307-
in tokenize.generate_tokens(sourceLines.__iter__().next):
311+
in tokenize.generate_tokens(lambda i = iter(sourceLines): next(i)):
308312

309313
if tokenKind in (tokenize.COMMENT, tokenize.ENDMARKER):
310314
continue
@@ -324,7 +328,7 @@ def splitGybLines(sourceLines):
324328

325329
lastTokenText,lastTokenKind = tokenText,tokenKind
326330

327-
except tokenize.TokenError, (message, errorPos):
331+
except tokenize.TokenError:
328332
return [] # Let the later compile() call report the error
329333

330334
if lastTokenText == ':':
@@ -347,7 +351,7 @@ def codeStartsWithDedentKeyword(sourceLines):
347351
"""
348352
tokenText = None
349353
for tokenKind, tokenText, _, _, _ \
350-
in tokenize.generate_tokens(sourceLines.__iter__().next):
354+
in tokenize.generate_tokens(lambda i = iter(sourceLines): next(i)):
351355

352356
if tokenKind != tokenize.COMMENT and tokenText.strip() != '':
353357
break

utils/line-directive

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,10 @@ def run():
7171
sources = sys.argv[1:dashes]
7272

7373
command = subprocess.Popen(
74-
sys.argv[dashes + 1:], stderr = subprocess.STDOUT, stdout = subprocess.PIPE
74+
sys.argv[dashes + 1:],
75+
stderr = subprocess.STDOUT,
76+
stdout = subprocess.PIPE,
77+
universal_newlines = True
7578
)
7679

7780
error_pattern = re.compile(

0 commit comments

Comments
 (0)