Skip to content

Commit

Permalink
Merge pull request #283 from julia-vscode/sp/fix-char-parsing
Browse files Browse the repository at this point in the history
fix char parsing
  • Loading branch information
davidanthoff authored Jun 9, 2021
2 parents cb8f8d0 + 26d3bb5 commit 6154d19
Show file tree
Hide file tree
Showing 5 changed files with 97 additions and 69 deletions.
1 change: 1 addition & 0 deletions src/CSTParser.jl
Original file line number Diff line number Diff line change
Expand Up @@ -139,6 +139,7 @@ function parse_compound(ps::ParseState, ret::EXPR)
if err isa StackOverflowError
throw(error(string(ps, "\nsize: ", ps.l.io.size)))
end
mErrorToken(ps, ret, Unknown)
end
end
ret = EXPR(:errortoken, EXPR[ret, nextarg], nothing)
Expand Down
12 changes: 8 additions & 4 deletions src/components/strings.jl
Original file line number Diff line number Diff line change
Expand Up @@ -205,7 +205,11 @@ function parse_string_or_cmd(ps::ParseState, prefixed=false)
ex = mErrorToken(ps, Unknown)
push!(ret, ex)
else
str = tostr(b)
str = try
tostr(b)
catch err
return mErrorToken(ps, ret, InvalidString)
end
if istrip
str = str[1:prevind(str, lastindex(str), 3)]
# only mark non-interpolated triple strings
Expand Down Expand Up @@ -299,12 +303,12 @@ function unescape_prefixed(str)
elseif c === '\"'
push!(edits, start:i)
start = -1
else
else
start = -1
end
end
end

if !isempty(edits) || start > -1
str1 = deepcopy(str)
if start > -1
Expand All @@ -316,7 +320,7 @@ function unescape_prefixed(str)
for e in reverse(edits)
n = div(length(e), 2) - 1
str1 = string(str1[1:prevind(str1, first(e))], string(repeat("\\", n), "\""), str1[nextind(str1, last(e)):lastindex(str1)])

end
return str1
end
Expand Down
9 changes: 5 additions & 4 deletions src/spec.jl
Original file line number Diff line number Diff line change
Expand Up @@ -37,8 +37,9 @@ const AnonFuncOp = 14
MissingColon, # We didn't get a colon (`:`) when we expected to while parsing a `?` expression.
InvalidIterator,
StringInterpolationWithTrailingWhitespace,
TooLongChar,
InvalidChar,
EmptyChar,
InvalidString,
Unknown,
SignatureOfFunctionDefIsNotACall,
MalformedMacroName)
Expand Down Expand Up @@ -87,8 +88,8 @@ end
return parse_string_or_cmd(ps)
else
v = val(ps.t, ps)
if kindof(ps.t) === Tokens.CHAR && length(v) > 3 && !(v[2] == '\\' && valid_escaped_seq(v[2:prevind(v, length(v))]))
return mErrorToken(ps, EXPR(:CHAR, ps.nt.startbyte - ps.t.startbyte, ps.t.endbyte - ps.t.startbyte + 1, string(v[1:2], '\'')), TooLongChar)
if kindof(ps.t) === Tokens.CHAR && length(v) > 3 && !(v[2] == '\\' && valid_escaped_seq(v[2:prevind(v, end)]))
return mErrorToken(ps, EXPR(:CHAR, ps.nt.startbyte - ps.t.startbyte, ps.t.endbyte - ps.t.startbyte + 1, string(v[1:2], '\'')), InvalidChar)
elseif kindof(ps.t) === Tokens.CHAR && length(v) == 2
return mErrorToken(ps, EXPR(:CHAR, ps.nt.startbyte - ps.t.startbyte, ps.t.endbyte - ps.t.startbyte + 1, string(v[1:2], '\'')), EmptyChar)
end
Expand Down Expand Up @@ -229,7 +230,7 @@ function lastchildistrivia(x::EXPR)
end
end

function Base.length(x::EXPR)
function Base.length(x::EXPR)
headof(x) === :NONSTDIDENTIFIER && return 0
headof(x) === :flatten && return length(Iterating._flatten_lhs(x))
n = x.args isa Nothing ? 0 : length(x.args)
Expand Down
103 changes: 43 additions & 60 deletions src/utils.jl
Original file line number Diff line number Diff line change
Expand Up @@ -457,11 +457,11 @@ end

comp(x, y) = x == y
function comp(x::CSTParser.EXPR, y::CSTParser.EXPR)
comp(x.head, y.head) &&
x.span == y.span &&
x.fullspan == y.fullspan &&
x.val == y.val &&
length(x) == length(y) &&
comp(x.head, y.head) &&
x.span == y.span &&
x.fullspan == y.fullspan &&
x.val == y.val &&
length(x) == length(y) &&
all(comp(x[i], y[i]) for i = 1:length(x))
end

Expand All @@ -482,7 +482,7 @@ function minimal_reparse(s0, s1, x0 = CSTParser.parse(s0, true), x1 = CSTParser.
i1, i2 = revfirstdiff(s0, s1)
(i0 > x1.fullspan || i1 > x1.fullspan || i2 > x1.fullspan) && return inds ? (1:0, 1:length(x1.args), 1:0) : x1 # Should error?
# Find unaffected expressions at start
# CST should be unaffected (and able to be copied across) up to this point,
# CST should be unaffected (and able to be copied across) up to this point,
# but we need to check.
r1 = 1:min(find_arg_at(x0, i0) - 1, length(x0.args), find_arg_at(x1, i0) - 1)
for i = 1:min(find_arg_at(x0, i0) - 1, find_arg_at(x1, i0) - 1)
Expand All @@ -493,17 +493,17 @@ function minimal_reparse(s0, s1, x0 = CSTParser.parse(s0, true), x1 = CSTParser.
r1 = 1:i
end
# we can re-use x0.args[r1]

# assume we'll just use x1.args from here on
r2 = (last(r1) + 1):length(x1.args)
r3 = 0:-1

# though we now check whether there is a sequence at the end of x0.args and
# though we now check whether there is a sequence at the end of x0.args and
# x1.args that match
offset = sizeof(s1)
for i = 0:min(length(x0.args) - last(r1), length(x0.args), length(x1.args)) - 1
if !quick_comp(x0.args[end - i], x1.args[end - i]) ||
offset <= i1 ||
if !quick_comp(x0.args[end - i], x1.args[end - i]) ||
offset <= i1 ||
length(x0.args) - i == last(r1) + 1 ||
offset - x1.args[end-i].fullspan <= i2 <= offset

Expand All @@ -525,9 +525,9 @@ end
# Quick and very dirty comparison of two EXPR, makes extra effort for :errortokens
function quick_comp(a::EXPR, b::EXPR)
a.fullspan != b.fullspan && return false
if headof(a) === :errortoken
if headof(a) === :errortoken
headof(b) !== :errortoken && return false
if a.args !== nothing
if a.args !== nothing
b.args === nothing && return false
return length(a.args) == length(b.args) && (length(a.args) == 0 || quick_comp(first(a.args), first(b.args)))
end
Expand Down Expand Up @@ -649,59 +649,42 @@ function _unescape_string(io, s::AbstractString)
end
end


function valid_escaped_seq(s::AbstractString)
l = length(s)
l == 0 && return false # zero length chars are always invalid
l == 1 && return true # length-one chars are always valid to Julia's parser
a = Iterators.Stateful(s)
for c in a
if !isempty(a) && c == '\\'
c = popfirst!(a)
if c == 'x' || c == 'u' || c == 'U'
n = k = 0
m = c == 'x' ? 2 :
c == 'u' ? 4 : 8
while (k += 1) <= m && !isempty(a)
nc = Base.peek(a)
n = '0' <= nc <= '9' ? n << 4 + (nc - '0') :
'a' <= nc <= 'f' ? n << 4 + (nc - 'a' + 10) :
'A' <= nc <= 'F' ? n << 4 + (nc - 'A' + 10) : break
popfirst!(a)
end
if k == 1 || n > 0x10ffff
return false
end
elseif '0' <= c <= '7'
k = 1
n = c - '0'
while (k += 1) <= 3 && !isempty(a)
c = Base.peek(a)
n = ('0' <= c <= '7') ? n << 3 + c - '0' : break
popfirst!(a)
end
if n > 255
return false
end
else
c == 'a' ||
c == 'b' ||
c == 't' ||
c == 'n' ||
c == 'v' ||
c == 'f' ||
c == 'r' ||
c == 'e' ||
c == '\\' ||
c == '"' ||
c == '\'' ||
return false
if popfirst!(a) == '\\'
c = popfirst!(a)
if c === 'x' || c === 'u' || c === 'U'
maxiter = c === 'x' ? 2 : c === 'u' ? 4 : 5
0 < length(a) <= maxiter || return false
n = 0
while !isempty(a)
nc = popfirst!(a)
n = '0' <= nc <= '9' ? n << 4 + (nc - '0') :
'a' <= nc <= 'f' ? n << 4 + (nc - 'a' + 10) :
'A' <= nc <= 'F' ? n << 4 + (nc - 'A' + 10) : return false
end
return n <= 0x10ffff
elseif '0' <= c <= '7'
length(a) <= 3 || return false
n = c - '0'
while !isempty(a)
nc = popfirst!(a)
n = ('0' <= c <= '7') ? n << 3 + nc - '0' : return false
end
return n < 128
else
@static if VERSION < v"1.1.0"
c = string(c)
end
return ncodeunits(c) == 1 && isempty(a)
end
end
return true
return false
end




"""
disallowednumberjuxt(ret::EXPR)
Expand Down Expand Up @@ -769,7 +752,7 @@ macro cst_str(x)
CSTParser.parse(x)
end

function issuffixableliteral(ps::ParseState, x::EXPR)
function issuffixableliteral(ps::ParseState, x::EXPR)
isidentifier(ps.nt) && isemptyws(ps.ws) && ismacrocall(x) && (valof(x.args[1]) isa String && (endswith(valof(x.args[1]), "_str") || endswith(valof(x.args[1]), "_cmd")))
end

Expand All @@ -779,4 +762,4 @@ function loop_check(ps, prevpos)
else
position(ps)
end
end
end
41 changes: 40 additions & 1 deletion test/parser.jl
Original file line number Diff line number Diff line change
Expand Up @@ -974,8 +974,47 @@ end""" |> test_expr
@testset "toplevel strings" begin
@test test_expr(""""a" in b && c""")
end

@testset "@doc cont" begin
@test test_expr("module a\n@doc doc\"\"\"doc\"\"\"\nx\nend")
end

@testset "char escape" begin
@test test_expr(raw"'\$'")
@test test_expr(raw"'\a'")
@test test_expr(raw"'\3'")
@test test_expr(raw"'\000'")
@test test_expr(raw"'\033'")
@test test_expr(raw"'\177'")
@test test_expr(raw"'\u222'")
@test test_expr(raw"'\ufff'")
@test test_expr(raw"'\x2'")
@test test_expr(raw"'\x22'")
@test test_expr(raw"'\u22'")
@test test_expr(raw"'\u2222'")
@test test_expr(raw"'\U2222'")
@test test_expr(raw"'\U22222'")

@test CSTParser.parse(raw"'\200'").head == :errortoken
@test CSTParser.parse(raw"'\300'").head == :errortoken
@test CSTParser.parse(raw"'\377'").head == :errortoken
@test CSTParser.parse(raw"'\600'").head == :errortoken
@test CSTParser.parse(raw"'\777'").head == :errortoken
@test CSTParser.parse(raw"'\x222'").head == :errortoken
@test CSTParser.parse(raw"'\u22222'").head == :errortoken
@test CSTParser.parse(raw"'\U222222'").head == :errortoken
@test CSTParser.parse(raw"'\asdd'").head == :errortoken
@test CSTParser.parse(raw"''").head == :errortoken
@test CSTParser.parse(raw"'sdd'").head == :errortoken
@test CSTParser.parse(raw"'\u222ää'").head == :errortoken
@test CSTParser.parse(raw"'\x222ää'").head == :errortoken
@test CSTParser.parse(raw"'\U222ää'").head == :errortoken
for c in rand(Char, 1000)
@test test_expr(string("'", c, "'"))
end
end

@testset "invalid char in string" begin
@test CSTParser.parse(raw"\"\U222222222\"").head == :errortoken
end
end

0 comments on commit 6154d19

Please # to comment.