Skip to content

Account for C string literals and format_args in HiddenUnicodeCodepoints lint #134956

New issue

Have a question about this project? # for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “#”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? # to your account

Merged
merged 3 commits into from
Dec 31, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions compiler/rustc_ast/src/format.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ use rustc_span::{Ident, Span, Symbol};

use crate::Expr;
use crate::ptr::P;
use crate::token::LitKind;

// Definitions:
//
Expand Down Expand Up @@ -45,6 +46,10 @@ pub struct FormatArgs {
pub span: Span,
pub template: Vec<FormatArgsPiece>,
pub arguments: FormatArguments,
/// The raw, un-split format string literal, with no escaping or processing.
///
/// Generally only useful for lints that care about the raw bytes the user wrote.
pub uncooked_fmt_str: (LitKind, Symbol),
}

/// A piece of a format template string.
Expand Down
2 changes: 1 addition & 1 deletion compiler/rustc_ast/src/mut_visit.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1596,7 +1596,7 @@ fn walk_inline_asm_sym<T: MutVisitor>(

fn walk_format_args<T: MutVisitor>(vis: &mut T, fmt: &mut FormatArgs) {
// FIXME: visit the template exhaustively.
let FormatArgs { span, template: _, arguments } = fmt;
let FormatArgs { span, template: _, arguments, uncooked_fmt_str: _ } = fmt;
for FormatArgument { kind, expr } in arguments.all_args_mut() {
match kind {
FormatArgumentKind::Named(ident) | FormatArgumentKind::Captured(ident) => {
Expand Down
2 changes: 1 addition & 1 deletion compiler/rustc_ast/src/visit.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1061,7 +1061,7 @@ pub fn walk_inline_asm_sym<'a, V: Visitor<'a>>(
}

pub fn walk_format_args<'a, V: Visitor<'a>>(visitor: &mut V, fmt: &'a FormatArgs) -> V::Result {
let FormatArgs { span: _, template: _, arguments } = fmt;
let FormatArgs { span: _, template: _, arguments, uncooked_fmt_str: _ } = fmt;
for FormatArgument { kind, expr } in arguments.all_args() {
match kind {
FormatArgumentKind::Named(ident) | FormatArgumentKind::Captured(ident) => {
Expand Down
9 changes: 7 additions & 2 deletions compiler/rustc_builtin_macros/src/asm.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ use smallvec::smallvec;
use {rustc_ast as ast, rustc_parse_format as parse};

use crate::errors;
use crate::util::expr_to_spanned_string;
use crate::util::{ExprToSpannedString, expr_to_spanned_string};

pub struct AsmArgs {
pub templates: Vec<P<ast::Expr>>,
Expand Down Expand Up @@ -527,7 +527,12 @@ fn expand_preparsed_asm(
let msg = "asm template must be a string literal";
let template_sp = template_expr.span;
let template_is_mac_call = matches!(template_expr.kind, ast::ExprKind::MacCall(_));
let (template_str, template_style, template_span) = {
let ExprToSpannedString {
symbol: template_str,
style: template_style,
span: template_span,
..
} = {
let ExpandResult::Ready(mac) = expr_to_spanned_string(ecx, template_expr, msg) else {
return ExpandResult::Retry(());
};
Expand Down
18 changes: 14 additions & 4 deletions compiler/rustc_builtin_macros/src/format.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ use rustc_parse_format as parse;
use rustc_span::{BytePos, ErrorGuaranteed, Ident, InnerSpan, Span, Symbol};

use crate::errors;
use crate::util::expr_to_spanned_string;
use crate::util::{ExprToSpannedString, expr_to_spanned_string};

// The format_args!() macro is expanded in three steps:
// 1. First, `parse_args` will parse the `(literal, arg, arg, name=arg, name=arg)` syntax,
Expand Down Expand Up @@ -166,13 +166,18 @@ fn make_format_args(

let MacroInput { fmtstr: efmt, mut args, is_direct_literal } = input;

let (fmt_str, fmt_style, fmt_span) = {
let ExprToSpannedString {
symbol: fmt_str,
span: fmt_span,
style: fmt_style,
uncooked_symbol: uncooked_fmt_str,
} = {
let ExpandResult::Ready(mac) = expr_to_spanned_string(ecx, efmt.clone(), msg) else {
return ExpandResult::Retry(());
};
match mac {
Ok(mut fmt) if append_newline => {
fmt.0 = Symbol::intern(&format!("{}\n", fmt.0));
fmt.symbol = Symbol::intern(&format!("{}\n", fmt.symbol));
Comment on lines -175 to +180
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Remark: this is indeed easier to follow

fmt
}
Ok(fmt) => fmt,
Expand Down Expand Up @@ -584,7 +589,12 @@ fn make_format_args(
}
}

ExpandResult::Ready(Ok(FormatArgs { span: fmt_span, template, arguments: args }))
ExpandResult::Ready(Ok(FormatArgs {
span: fmt_span,
template,
arguments: args,
uncooked_fmt_str,
}))
}

fn invalid_placeholder_type_error(
Expand Down
23 changes: 19 additions & 4 deletions compiler/rustc_builtin_macros/src/util.rs
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,17 @@ pub(crate) fn warn_on_duplicate_attribute(ecx: &ExtCtxt<'_>, item: &Annotatable,

/// `Ok` represents successfully retrieving the string literal at the correct
/// position, e.g., `println("abc")`.
type ExprToSpannedStringResult<'a> = Result<(Symbol, ast::StrStyle, Span), UnexpectedExprKind<'a>>;
pub(crate) type ExprToSpannedStringResult<'a> = Result<ExprToSpannedString, UnexpectedExprKind<'a>>;

pub(crate) struct ExprToSpannedString {
pub symbol: Symbol,
pub style: ast::StrStyle,
pub span: Span,
/// The raw string literal, with no escaping or processing.
///
/// Generally only useful for lints that care about the raw bytes the user wrote.
pub uncooked_symbol: (ast::token::LitKind, Symbol),
}

/// - `Ok` is returned when the conversion to a string literal is unsuccessful,
/// but another type of expression is obtained instead.
Expand Down Expand Up @@ -90,7 +100,12 @@ pub(crate) fn expr_to_spanned_string<'a>(
ExpandResult::Ready(Err(match expr.kind {
ast::ExprKind::Lit(token_lit) => match ast::LitKind::from_token_lit(token_lit) {
Ok(ast::LitKind::Str(s, style)) => {
return ExpandResult::Ready(Ok((s, style, expr.span)));
return ExpandResult::Ready(Ok(ExprToSpannedString {
symbol: s,
style,
span: expr.span,
uncooked_symbol: (token_lit.kind, token_lit.symbol),
}));
}
Ok(ast::LitKind::ByteStr(..)) => {
let mut err = cx.dcx().struct_span_err(expr.span, err_msg);
Expand Down Expand Up @@ -128,7 +143,7 @@ pub(crate) fn expr_to_string(
Ok((err, _)) => err.emit(),
Err(guar) => guar,
})
.map(|(symbol, style, _)| (symbol, style))
.map(|ExprToSpannedString { symbol, style, .. }| (symbol, style))
})
}

Expand Down Expand Up @@ -183,7 +198,7 @@ pub(crate) fn get_single_str_spanned_from_tts(
Ok((err, _)) => err.emit(),
Err(guar) => guar,
})
.map(|(symbol, _style, span)| (symbol, span))
.map(|ExprToSpannedString { symbol, span, .. }| (symbol, span))
})
}

Expand Down
46 changes: 34 additions & 12 deletions compiler/rustc_lint/src/hidden_unicode_codepoints.rs
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,36 @@ impl HiddenUnicodeCodepoints {
sub,
});
}

fn check_literal(
&mut self,
cx: &EarlyContext<'_>,
text: Symbol,
lit_kind: ast::token::LitKind,
span: Span,
label: &'static str,
) {
if !contains_text_flow_control_chars(text.as_str()) {
return;
}
let (padding, point_at_inner_spans) = match lit_kind {
// account for `"` or `'`
ast::token::LitKind::Str | ast::token::LitKind::Char => (1, true),
// account for `c"`
ast::token::LitKind::CStr => (2, true),
// account for `r###"`
ast::token::LitKind::StrRaw(n) => (n as u32 + 2, true),
// account for `cr###"`
ast::token::LitKind::CStrRaw(n) => (n as u32 + 3, true),
// suppress bad literals.
ast::token::LitKind::Err(_) => return,
// Be conservative just in case new literals do support these.
_ => (0, false),
};
self.lint_text_direction_codepoint(cx, text, span, padding, point_at_inner_spans, label);
}
}

impl EarlyLintPass for HiddenUnicodeCodepoints {
fn check_attribute(&mut self, cx: &EarlyContext<'_>, attr: &ast::Attribute) {
if let ast::AttrKind::DocComment(_, comment) = attr.kind {
Expand All @@ -97,18 +126,11 @@ impl EarlyLintPass for HiddenUnicodeCodepoints {
// byte strings are already handled well enough by `EscapeError::NonAsciiCharInByteString`
match &expr.kind {
ast::ExprKind::Lit(token_lit) => {
let text = token_lit.symbol;
if !contains_text_flow_control_chars(text.as_str()) {
return;
}
let padding = match token_lit.kind {
// account for `"` or `'`
ast::token::LitKind::Str | ast::token::LitKind::Char => 1,
// account for `r###"`
ast::token::LitKind::StrRaw(n) => n as u32 + 2,
_ => return,
};
self.lint_text_direction_codepoint(cx, text, expr.span, padding, true, "literal");
self.check_literal(cx, token_lit.symbol, token_lit.kind, expr.span, "literal");
}
ast::ExprKind::FormatArgs(args) => {
let (lit_kind, text) = args.uncooked_fmt_str;
self.check_literal(cx, text, lit_kind, args.span, "format string");
}
_ => {}
};
Expand Down
10 changes: 10 additions & 0 deletions tests/ui/parser/unicode-control-codepoints.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
//@ edition: 2021

fn main() {
// if access_level != "us‫e‪r" { // Check if admin
//~^ ERROR unicode codepoint changing visible direction of text present in comment
Expand Down Expand Up @@ -25,6 +27,14 @@ fn main() {
//~| ERROR non-ASCII character in raw byte string literal
println!("{:?}", '‮');
//~^ ERROR unicode codepoint changing visible direction of text present in literal

let _ = c"‮";
//~^ ERROR unicode codepoint changing visible direction of text present in literal
let _ = cr#"‮"#;
//~^ ERROR unicode codepoint changing visible direction of text present in literal

println!("{{‮}}");
//~^ ERROR unicode codepoint changing visible direction of text present in format string
}

//"/*‮ } ⁦if isAdmin⁩ ⁦ begin admins only */"
Expand Down
Loading
Loading