tamer: diagnose::resolver::SourceLine: {Vec<u8>=>String}

Using a byte vector just makes life more difficult with regard to preparing
the diagnostic reports.  We're already validating UTF-8 data for column
generation, which is necessary for a robust report, so let's just store it
as a String to begin with.

DEV-12151
main
Mike Gerwitz 2022-04-28 22:03:37 -04:00
parent 838db689ad
commit 3a5dcfc016
2 changed files with 20 additions and 18 deletions

View File

@ -257,8 +257,8 @@ pub struct SourceLine {
/// The [`Span`] representing the entire source line.
span: Span,
/// Source code text of the line _excluding_ the newline.
text: Vec<u8>,
/// Source code text of the line.
text: String,
}
impl SourceLine {
@ -271,7 +271,7 @@ impl SourceLine {
num: NonZeroU32,
column: Option<Column>,
span: Span,
text: Vec<u8>,
text: String,
) -> Self {
Self {
num,
@ -284,14 +284,12 @@ impl SourceLine {
impl Display for SourceLine {
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
// TODO: Just store String instead of a byte vector so we're not
// validating UTF-8 twice.
let s = String::from_utf8_lossy(&self.text);
let text = &self.text;
let disp = if self.text.last() == Some(&b'\n') {
&s[0..s.len() - 1]
let disp = if text.chars().last() == Some('\n') {
&text[0..text.len() - 1]
} else {
&s[..]
&text[..]
};
f.write_str(disp)
@ -571,19 +569,15 @@ impl Line {
/// Produce formatted output for a line containing invalid UTF-8 data.
///
/// This still produces the actual line so that we can help the user
/// track down the invalid byte sequences,
/// but it is unable to resolve columns.
///
/// This is delegated to by [`Line::format_for`].
fn format_invalid_utf8_for(
&mut self,
span: Span,
) -> (Vec<u8>, Span, Option<Column>) {
) -> (String, Span, Option<Column>) {
let bytes = self.take_buf().unwrap_or(vec![]);
let span = span.context().span_or_zz(0, bytes.len());
(bytes, span, None)
(String::from_utf8_lossy(&bytes).into(), span, None)
}
/// Format the line buffer and provide an associated line [`Span`]
@ -593,7 +587,7 @@ impl Line {
/// this means that any trailing newline will be stripped unless it is
/// directly referenced by the `span` offset
/// (starts _at_ the newline).
fn format_for(&mut self, span: Span) -> (Vec<u8>, Span, Option<Column>) {
fn format_for(&mut self, span: Span) -> (String, Span, Option<Column>) {
// Trim any newline unless the span starts at the last byte,
// which would reference the newline character itself.
if span.offset() as usize != self.total_offset_read() {
@ -611,7 +605,15 @@ impl Line {
let buf = self.take_buf().unwrap_or(vec![]);
let line_span = span.context().span_or_zz(offset_start, buf.len());
(buf, line_span, Some(column))
// SAFETY: We have already verified above,
// by `line`,
// that this represents a valid UTF-8 string,
// otherwise we would not have reached this line.
// Refactoring could likely avoid the need to convert to a string
// separately from the original check.
let strbuf = unsafe { String::from_utf8_unchecked(buf) };
(strbuf, line_span, Some(column))
}
/// Determine the [`Span`] endpoint offsets relative to the line start.

View File

@ -610,7 +610,7 @@ fn invalid_unicode_no_column() {
// Make sure we're still trimming despite the
// error.
buf.pop();
buf.into()
String::from_utf8_lossy(&buf).into()
},
}])
.unwrap(),