tamer: diagnose::resolver::SourceLine:: Guarantee non-empty lines

This simplifies types and error handling since we will always have at least
one line, provided that the span is within the range of the context.  To
ensure that, this patch introduces a new error.

DEV-12151
main
Mike Gerwitz 2022-04-22 16:50:16 -04:00
parent 56b8aec9b7
commit c0ace258f0
3 changed files with 160 additions and 63 deletions

View File

@ -115,16 +115,15 @@ impl<R: SpanResolver> VisualReporter<R> {
fn render_col(to: &mut impl Write, rspan: ResolvedSpan) -> fmt::Result {
let span = rspan.span;
match rspan.col() {
match rspan.col_num() {
Some(col) => writeln!(to, ":{}", col)?,
// The column is unavailable,
// which means that the line must have contained invalid UTF-8.
// Output what we can in an attempt to help the user debug.
None => {
let rel = rspan
.first_line_span()
.and_then(|lspan| span.relative_to(lspan))
let rel = span
.relative_to(rspan.first_line_span())
.unwrap_or(UNKNOWN_SPAN);
writeln!(
@ -192,13 +191,10 @@ impl<R: SpanResolver> Reporter for VisualReporter<R> {
.into(),
)?;
}
Ok(rspan) => match rspan.line() {
Some(line) => {
write!(to, ":{}", line)?;
Self::render_col(to, rspan)?;
}
None => Self::render_fallback_span_offset(to, span)?,
},
Ok(rspan) => {
write!(to, ":{}", rspan.line_num())?;
Self::render_col(to, rspan)?;
}
}
}

View File

@ -29,6 +29,8 @@ use std::{
io::{self, BufRead, BufReader, Seek},
mem::take,
num::NonZeroU32,
ops::Deref,
slice::SliceIndex,
str::Utf8Error,
};
use unicode_width::UnicodeWidthChar;
@ -69,6 +71,35 @@ pub trait SpanResolver {
) -> Result<ResolvedSpan, SpanResolverError>;
}
/// Wrapper around a non-empty [`Vec`].
///
/// This is for correctness,
/// not performance.
/// This type is half-assed and is only what we need for this module.
#[derive(Debug, PartialEq, Eq)]
struct NonEmptyVec<T>(Vec<T>);
impl<T> NonEmptyVec<T> {
fn new(from: Vec<T>) -> Option<Self> {
if from.is_empty() {
return None;
}
Some(NonEmptyVec(from))
}
/// Returns the first element of the [`Vec`].
fn first(&self) -> &T {
self.0.first().unwrap()
}
}
impl<T> AsRef<Vec<T>> for NonEmptyVec<T> {
fn as_ref(&self) -> &Vec<T> {
&self.0
}
}
/// A [`Span`] resolved to its source location.
///
/// [`Span`] itself is optimized for size---it
@ -98,20 +129,22 @@ pub struct ResolvedSpan {
///
/// It should be the case that the [`Context`] of each [`SourceLine`] of
/// this field is equal to the [`Context`] of the `span` field.
pub lines: Vec<SourceLine>,
///
/// _This vector will always have at least one line._
lines: NonEmptyVec<SourceLine>,
}
impl ResolvedSpan {
pub fn line(&self) -> Option<NonZeroU32> {
self.lines.get(0).map(|line| line.num)
pub fn line_num(&self) -> NonZeroU32 {
self.lines.first().num
}
pub fn col(&self) -> Option<Column> {
self.lines.get(0).and_then(|line| line.column)
pub fn col_num(&self) -> Option<Column> {
self.lines.first().column
}
pub fn first_line_span(&self) -> Option<Span> {
self.lines.get(0).map(|line| line.span)
pub fn first_line_span(&self) -> Span {
self.lines.first().span
}
}
@ -285,7 +318,18 @@ impl<R: BufRead + Seek> SpanResolver for BufSpanResolver<R> {
self.line_num = self.line_num.saturating_add(1);
}
Ok(ResolvedSpan { span, lines })
// The empty check should be redundant,
// but is meant to guarantee that we actually do have lines.
let pos = self.reader.stream_position()? as usize;
let nelines = (pos >= span.endpoints_saturated().1.offset() as usize)
.then(|| NonEmptyVec::new(lines))
.flatten()
.ok_or(SpanResolverError::OutOfRange(pos - 1))?;
Ok(ResolvedSpan {
span,
lines: nelines,
})
}
}
@ -638,6 +682,13 @@ pub enum SpanResolverError {
given: Context,
expected: Context,
},
/// The [`Span`] was not in range of the provided buffer.
///
/// This means that the span's offset+len ≥ EOF.
/// The provided size is the maximum offset of the buffer
/// (seek position - 1).
OutOfRange(usize),
}
impl From<io::Error> for SpanResolverError {
@ -655,6 +706,9 @@ impl Display for SpanResolverError {
"attempted to read context {given} \
using a resolver for context {expected}"
),
Self::OutOfRange(eof_pos) => {
write!(f, "span exceeds context size of {eof_pos} bytes")
}
}
}
}

View File

@ -39,6 +39,31 @@ fn rejects_context_mismatch() {
);
}
#[test]
fn rejects_span_with_endpoint_past_eof() {
let ctx = Context::from("pasteof");
let buf = "01234";
// Intentionally the byte _directly_ after EOF.
let span_high_offset = ctx.span(5, 0);
let span_high_len = ctx.span(3, 5);
let mut sut = BufSpanResolver::new(Cursor::new(buf), ctx);
assert_eq!(
Err(SpanResolverError::OutOfRange(4)),
sut.resolve(span_high_offset),
);
assert_eq!(
Err(SpanResolverError::OutOfRange(4)),
sut.resolve(span_high_len),
);
// Sanity check just to verify that we don't have an off-by-1 error
assert!(sut.resolve(ctx.span(0, 5)).is_ok());
}
// Span starts on the first byte of the line.
//
// In particular,
@ -61,7 +86,7 @@ fn first_byte_of_line() {
assert_eq!(
Ok(ResolvedSpan {
span,
lines: vec![SourceLine {
lines: NonEmptyVec::new(vec![SourceLine {
num: 2.unwrap_into(),
column: Some(Column::Endpoints(
1.unwrap_into(),
@ -69,7 +94,8 @@ fn first_byte_of_line() {
)),
span: ctx.span(7, 6),
text: "line 2".into(),
}],
}])
.unwrap(),
}),
sut.resolve(span),
);
@ -91,12 +117,13 @@ fn last_byte_of_line() {
assert_eq!(
Ok(ResolvedSpan {
span,
lines: vec![SourceLine {
lines: NonEmptyVec::new(vec![SourceLine {
num: 3.unwrap_into(),
column: Some(Column::At(6.unwrap_into(),)),
span: ctx.span(14, 6),
text: "line 3".into(),
}],
}])
.unwrap(),
}),
sut.resolve(span),
);
@ -121,7 +148,7 @@ fn last_byte_of_file_no_trailing_nl() {
assert_eq!(
Ok(ResolvedSpan {
span,
lines: vec![SourceLine {
lines: NonEmptyVec::new(vec![SourceLine {
num: 3.unwrap_into(),
column: Some(Column::Endpoints(
3.unwrap_into(),
@ -129,7 +156,8 @@ fn last_byte_of_file_no_trailing_nl() {
)),
span: ctx.span(14, 6),
text: "line 3".into(),
}],
}])
.unwrap(),
}),
sut.resolve(span),
);
@ -152,7 +180,7 @@ fn multiple_lines_first_last() {
assert_eq!(
Ok(ResolvedSpan {
span,
lines: vec![
lines: NonEmptyVec::new(vec![
SourceLine {
num: 2.unwrap_into(),
// From the point, to the end of the line.
@ -173,7 +201,8 @@ fn multiple_lines_first_last() {
span: ctx.span(20, 10),
text: "end line 3".into(),
},
],
])
.unwrap(),
}),
sut.resolve(span),
);
@ -197,7 +226,7 @@ fn multiple_lines_middle_line_endpoints() {
assert_eq!(
Ok(ResolvedSpan {
span,
lines: vec![
lines: NonEmptyVec::new(vec![
SourceLine {
num: 1.unwrap_into(),
// From the point, to the end of the line.
@ -228,7 +257,8 @@ fn multiple_lines_middle_line_endpoints() {
span: ctx.span(20, 10),
text: "end line 3".into(),
},
],
])
.unwrap(),
}),
sut.resolve(span),
);
@ -253,7 +283,7 @@ fn first_line() {
assert_eq!(
Ok(ResolvedSpan {
span,
lines: vec![SourceLine {
lines: NonEmptyVec::new(vec![SourceLine {
num: 1.unwrap_into(),
column: Some(Column::Endpoints(
1.unwrap_into(),
@ -261,7 +291,8 @@ fn first_line() {
)),
span: ctx.span(0, 6),
text: "line 1".into(),
},],
},])
.unwrap(),
}),
sut.resolve(span),
);
@ -289,7 +320,7 @@ fn newline_between_lines() {
assert_eq!(
Ok(ResolvedSpan {
span,
lines: vec![SourceLine {
lines: NonEmptyVec::new(vec![SourceLine {
num: 2.unwrap_into(),
column: Some(Column::At(7.unwrap_into())),
// Trailing newline _is not_ stripped since it was
@ -298,7 +329,8 @@ fn newline_between_lines() {
// requested span.
span: ctx.span(7, 7),
text: "line 2\n".into(),
}],
}])
.unwrap(),
}),
sut.resolve(span),
);
@ -323,12 +355,13 @@ fn zero_length_span() {
assert_eq!(
Ok(ResolvedSpan {
span,
lines: vec![SourceLine {
lines: NonEmptyVec::new(vec![SourceLine {
num: 2.unwrap_into(),
column: Some(Column::Before(4.unwrap_into())),
span: ctx.span(7, 6),
text: "line 2".into(),
}],
}])
.unwrap(),
}),
sut.resolve(span),
);
@ -355,7 +388,7 @@ fn zero_length_span_at_eol() {
assert_eq!(
Ok(ResolvedSpan {
span,
lines: vec![SourceLine {
lines: NonEmptyVec::new(vec![SourceLine {
num: 2.unwrap_into(),
column: Some(Column::Before(7.unwrap_into())),
// Trailing newline _is not_ stripped since it was
@ -364,7 +397,8 @@ fn zero_length_span_at_eol() {
// requested span.
span: ctx.span(7, 7),
text: "line 2\n".into(),
}],
}])
.unwrap(),
}),
sut.resolve(span),
);
@ -391,12 +425,13 @@ fn zero_length_span_at_bol() {
assert_eq!(
Ok(ResolvedSpan {
span,
lines: vec![SourceLine {
lines: NonEmptyVec::new(vec![SourceLine {
num: 2.unwrap_into(),
column: Some(Column::Before(1.unwrap_into())),
span: ctx.span(7, 6),
text: "line 2".into(),
}],
}])
.unwrap(),
}),
sut.resolve(span),
);
@ -420,7 +455,7 @@ fn resolve_multiple_spans() {
assert_eq!(
Ok(ResolvedSpan {
span: span_a,
lines: vec![SourceLine {
lines: NonEmptyVec::new(vec![SourceLine {
num: 2.unwrap_into(),
column: Some(Column::Endpoints(
1.unwrap_into(),
@ -428,7 +463,8 @@ fn resolve_multiple_spans() {
)),
span: span_a,
text: "line 2".into(),
}],
}])
.unwrap(),
}),
sut.resolve(span_a),
);
@ -436,7 +472,7 @@ fn resolve_multiple_spans() {
assert_eq!(
Ok(ResolvedSpan {
span: span_b,
lines: vec![SourceLine {
lines: NonEmptyVec::new(vec![SourceLine {
num: 3.unwrap_into(),
column: Some(Column::Endpoints(
1.unwrap_into(),
@ -444,7 +480,8 @@ fn resolve_multiple_spans() {
)),
span: span_b,
text: "line 3".into(),
}],
}])
.unwrap(),
}),
sut.resolve(span_b),
);
@ -466,7 +503,7 @@ fn resolve_same_span_multiple_times() {
assert_eq!(
Ok(ResolvedSpan {
span: span,
lines: vec![SourceLine {
lines: NonEmptyVec::new(vec![SourceLine {
num: 2.unwrap_into(),
column: Some(Column::Endpoints(
1.unwrap_into(),
@ -474,7 +511,8 @@ fn resolve_same_span_multiple_times() {
)),
span: span,
text: "line 2".into(),
}],
}])
.unwrap(),
}),
sut.resolve(span),
);
@ -498,7 +536,7 @@ fn resolve_earlier_span_after_later() {
assert_eq!(
Ok(ResolvedSpan {
span: span_later,
lines: vec![SourceLine {
lines: NonEmptyVec::new(vec![SourceLine {
num: 2.unwrap_into(),
column: Some(Column::Endpoints(
1.unwrap_into(),
@ -506,7 +544,8 @@ fn resolve_earlier_span_after_later() {
)),
span: span_later,
text: "line 2".into(),
}],
}])
.unwrap(),
}),
sut.resolve(span_later),
);
@ -516,7 +555,7 @@ fn resolve_earlier_span_after_later() {
assert_eq!(
Ok(ResolvedSpan {
span: span_earlier,
lines: vec![SourceLine {
lines: NonEmptyVec::new(vec![SourceLine {
num: 1.unwrap_into(),
column: Some(Column::Endpoints(
1.unwrap_into(),
@ -524,7 +563,8 @@ fn resolve_earlier_span_after_later() {
)),
span: span_earlier,
text: "line 1".into(),
}],
}])
.unwrap(),
}),
sut.resolve(span_earlier),
);
@ -556,7 +596,7 @@ fn invalid_unicode_no_column() {
assert_eq!(
Ok(ResolvedSpan {
span,
lines: vec![SourceLine {
lines: NonEmptyVec::new(vec![SourceLine {
num: 1.unwrap_into(),
column: None,
span: ctx.span(0, 6),
@ -566,7 +606,8 @@ fn invalid_unicode_no_column() {
buf.pop();
buf.into()
},
}],
}])
.unwrap(),
}),
sut.resolve(span),
);
@ -595,7 +636,7 @@ fn unicode_width() {
assert_eq!(
Ok(ResolvedSpan {
span: span_0,
lines: vec![SourceLine {
lines: NonEmptyVec::new(vec![SourceLine {
num: 1.unwrap_into(),
column: Some(Column::Endpoints(
1.unwrap_into(),
@ -603,7 +644,8 @@ fn unicode_width() {
)),
span: span_0,
text: "0:\0".into(),
}],
}])
.unwrap(),
}),
sut.resolve(span_0),
);
@ -611,7 +653,7 @@ fn unicode_width() {
assert_eq!(
Ok(ResolvedSpan {
span: span_1,
lines: vec![SourceLine {
lines: NonEmptyVec::new(vec![SourceLine {
num: 2.unwrap_into(),
column: Some(Column::Endpoints(
1.unwrap_into(),
@ -619,7 +661,8 @@ fn unicode_width() {
)),
span: span_1,
text: "1:“".into(),
}],
}])
.unwrap(),
}),
sut.resolve(span_1),
);
@ -627,7 +670,7 @@ fn unicode_width() {
assert_eq!(
Ok(ResolvedSpan {
span: span_2,
lines: vec![SourceLine {
lines: NonEmptyVec::new(vec![SourceLine {
num: 3.unwrap_into(),
column: Some(Column::Endpoints(
1.unwrap_into(),
@ -635,7 +678,8 @@ fn unicode_width() {
)),
span: span_2,
text: "2:😊".into(),
}],
}])
.unwrap(),
}),
sut.resolve(span_2),
);
@ -678,7 +722,7 @@ fn at_invalid_char_boundary() {
assert_eq!(
Ok(ResolvedSpan {
span: span_end_bad,
lines: vec![SourceLine {
lines: NonEmptyVec::new(vec![SourceLine {
num: 1.unwrap_into(),
column: Some(Column::Endpoints(
1.unwrap_into(),
@ -686,7 +730,8 @@ fn at_invalid_char_boundary() {
)),
span: line_span,
text: buf.clone().into(),
}],
}])
.unwrap(),
}),
sut.resolve(span_end_bad),
);
@ -694,7 +739,7 @@ fn at_invalid_char_boundary() {
assert_eq!(
Ok(ResolvedSpan {
span: span_start_bad,
lines: vec![SourceLine {
lines: NonEmptyVec::new(vec![SourceLine {
num: 1.unwrap_into(),
// Intuitively this really should be [2,4],
// but the implementation shouldn't change to
@ -702,7 +747,8 @@ fn at_invalid_char_boundary() {
column: Some(Column::At(4.unwrap_into(),)),
span: line_span,
text: buf.clone().into(),
}],
}])
.unwrap(),
}),
sut.resolve(span_start_bad),
);
@ -710,14 +756,15 @@ fn at_invalid_char_boundary() {
assert_eq!(
Ok(ResolvedSpan {
span: span_all_bad,
lines: vec![SourceLine {
lines: NonEmptyVec::new(vec![SourceLine {
num: 1.unwrap_into(),
// Also unideal,
// but see comment for previous assertion.
column: Some(Column::At(4.unwrap_into(),)),
span: line_span,
text: buf.clone().into(),
}],
}])
.unwrap(),
}),
sut.resolve(span_all_bad),
);