Skip to content

Commit 288ca5d

Browse files
Compress token trees for best memory usage
1 parent f4aca71 commit 288ca5d

8 files changed

Lines changed: 1254 additions & 432 deletions

File tree

Cargo.lock

Lines changed: 2 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

crates/syntax-bridge/src/lib.rs

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -866,7 +866,8 @@ impl TtTreeSink<'_> {
866866
/// Parses a float literal as if it was a one to two name ref nodes with a dot inbetween.
867867
/// This occurs when a float literal is used as a field access.
868868
fn float_split(&mut self, has_pseudo_dot: bool) {
869-
let (text, span) = match self.cursor.token_tree() {
869+
let token_tree = self.cursor.token_tree();
870+
let (text, span) = match &token_tree {
870871
Some(tt::TokenTree::Leaf(tt::Leaf::Literal(
871872
lit @ tt::Literal { span, kind: tt::LitKind::Float, .. },
872873
))) => (lit.text(), *span),
@@ -928,9 +929,15 @@ impl TtTreeSink<'_> {
928929
self.buf.push_str("r#");
929930
self.text_pos += TextSize::of("r#");
930931
}
931-
let r = (ident.sym.as_str(), ident.span);
932+
let text = ident.sym.as_str();
933+
self.buf += text;
934+
self.text_pos += TextSize::of(text);
935+
combined_span = match combined_span {
936+
None => Some(ident.span),
937+
Some(prev_span) => Some(Self::merge_spans(prev_span, ident.span)),
938+
};
932939
self.cursor.bump();
933-
r
940+
continue 'tokens;
934941
}
935942
tt::Leaf::Punct(punct) => {
936943
assert!(punct.char.is_ascii());

crates/syntax-bridge/src/tests.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -36,9 +36,9 @@ fn check_punct_spacing(fixture: &str) {
3636
if let tt::TokenTree::Leaf(Leaf::Punct(Punct {
3737
spacing, span: Span { range, .. }, ..
3838
})) = token_tree
39-
&& let Some(expected) = annotations.remove(range)
39+
&& let Some(expected) = annotations.remove(&range)
4040
{
41-
assert_eq!(expected, *spacing);
41+
assert_eq!(expected, spacing);
4242
}
4343
cursor.bump();
4444
}

crates/tt/Cargo.toml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,8 @@ doctest = false
1515
[dependencies]
1616
arrayvec.workspace = true
1717
text-size.workspace = true
18+
rustc-hash.workspace = true
19+
indexmap.workspace = true
1820

1921
span = { path = "../span", version = "0.0", default-features = false }
2022
stdx.workspace = true

crates/tt/src/buffer.rs

Lines changed: 23 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,17 @@
11
//! Stateful iteration over token trees.
22
//!
33
//! We use this as the source of tokens for parser.
4-
use crate::{Leaf, Subtree, TokenTree, TokenTreesView};
4+
use crate::{Leaf, Subtree, TokenTree, TokenTreesView, dispatch_ref};
55

66
pub struct Cursor<'a> {
7-
buffer: &'a [TokenTree],
7+
buffer: TokenTreesView<'a>,
88
index: usize,
99
subtrees_stack: Vec<usize>,
1010
}
1111

1212
impl<'a> Cursor<'a> {
1313
pub fn new(buffer: TokenTreesView<'a>) -> Self {
14-
Self { buffer: buffer.0, index: 0, subtrees_stack: Vec::new() }
14+
Self { buffer, index: 0, subtrees_stack: Vec::new() }
1515
}
1616

1717
/// Check whether it is eof
@@ -23,16 +23,22 @@ impl<'a> Cursor<'a> {
2323
self.subtrees_stack.is_empty()
2424
}
2525

26-
fn last_subtree(&self) -> Option<(usize, &'a Subtree)> {
26+
fn at(&self, idx: usize) -> Option<TokenTree> {
27+
dispatch_ref! {
28+
match self.buffer.repr => tt => Some(tt.get(idx)?.to_api(self.buffer.span_parts))
29+
}
30+
}
31+
32+
fn last_subtree(&self) -> Option<(usize, Subtree)> {
2733
self.subtrees_stack.last().map(|&subtree_idx| {
28-
let TokenTree::Subtree(subtree) = &self.buffer[subtree_idx] else {
34+
let Some(TokenTree::Subtree(subtree)) = self.at(subtree_idx) else {
2935
panic!("subtree pointing to non-subtree");
3036
};
3137
(subtree_idx, subtree)
3238
})
3339
}
3440

35-
pub fn end(&mut self) -> &'a Subtree {
41+
pub fn end(&mut self) -> Subtree {
3642
let (last_subtree_idx, last_subtree) =
3743
self.last_subtree().expect("called `Cursor::end()` without an open subtree");
3844
// +1 because `Subtree.len` excludes the subtree itself.
@@ -46,14 +52,14 @@ impl<'a> Cursor<'a> {
4652
}
4753

4854
/// Returns the `TokenTree` at the cursor if it is not at the end of a subtree.
49-
pub fn token_tree(&self) -> Option<&'a TokenTree> {
55+
pub fn token_tree(&self) -> Option<TokenTree> {
5056
if let Some((last_subtree_idx, last_subtree)) = self.last_subtree() {
5157
// +1 because `Subtree.len` excludes the subtree itself.
5258
if last_subtree_idx + last_subtree.usize_len() + 1 == self.index {
5359
return None;
5460
}
5561
}
56-
self.buffer.get(self.index)
62+
self.at(self.index)
5763
}
5864

5965
/// Bump the cursor, and enters a subtree if it is on one.
@@ -66,7 +72,7 @@ impl<'a> Cursor<'a> {
6672
"called `Cursor::bump()` when at the end of a subtree"
6773
);
6874
}
69-
if let TokenTree::Subtree(_) = self.buffer[self.index] {
75+
if let Some(TokenTree::Subtree(_)) = self.at(self.index) {
7076
self.subtrees_stack.push(self.index);
7177
}
7278
self.index += 1;
@@ -81,28 +87,31 @@ impl<'a> Cursor<'a> {
8187
}
8288
}
8389
// +1 because `Subtree.len` excludes the subtree itself.
84-
if let TokenTree::Subtree(_) = self.buffer[self.index] {
90+
if let Some(TokenTree::Subtree(_)) = self.at(self.index) {
8591
self.subtrees_stack.push(self.index);
8692
}
8793
self.index += 1;
8894
}
8995

90-
pub fn peek_two_leaves(&self) -> Option<[&'a Leaf; 2]> {
96+
pub fn peek_two_leaves(&self) -> Option<[Leaf; 2]> {
9197
if let Some((last_subtree_idx, last_subtree)) = self.last_subtree() {
9298
// +1 because `Subtree.len` excludes the subtree itself.
9399
let last_end = last_subtree_idx + last_subtree.usize_len() + 1;
94100
if last_end == self.index || last_end == self.index + 1 {
95101
return None;
96102
}
97103
}
98-
self.buffer.get(self.index..self.index + 2).and_then(|it| match it {
99-
[TokenTree::Leaf(a), TokenTree::Leaf(b)] => Some([a, b]),
104+
self.at(self.index).zip(self.at(self.index + 1)).and_then(|it| match it {
105+
(TokenTree::Leaf(a), TokenTree::Leaf(b)) => Some([a, b]),
100106
_ => None,
101107
})
102108
}
103109

104110
pub fn crossed(&self) -> TokenTreesView<'a> {
105111
assert!(self.is_root());
106-
TokenTreesView::new(&self.buffer[..self.index])
112+
TokenTreesView {
113+
repr: self.buffer.repr.get(..self.index).unwrap(),
114+
span_parts: self.buffer.span_parts,
115+
}
107116
}
108117
}

crates/tt/src/iter.rs

Lines changed: 62 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -7,11 +7,14 @@ use arrayvec::ArrayVec;
77
use intern::sym;
88
use span::Span;
99

10-
use crate::{Ident, Leaf, MAX_GLUED_PUNCT_LEN, Punct, Spacing, Subtree, TokenTree, TokenTreesView};
10+
use crate::{
11+
Ident, Leaf, MAX_GLUED_PUNCT_LEN, Punct, Spacing, Subtree, TokenTree, TokenTreesReprRef,
12+
TokenTreesView, dispatch_ref,
13+
};
1114

1215
#[derive(Clone)]
1316
pub struct TtIter<'a> {
14-
inner: std::slice::Iter<'a, TokenTree>,
17+
inner: TokenTreesView<'a>,
1518
}
1619

1720
impl fmt::Debug for TtIter<'_> {
@@ -21,17 +24,17 @@ impl fmt::Debug for TtIter<'_> {
2124
}
2225

2326
#[derive(Clone, Copy)]
24-
pub struct TtIterSavepoint<'a>(&'a [TokenTree]);
27+
pub struct TtIterSavepoint<'a>(TokenTreesView<'a>);
2528

2629
impl<'a> TtIterSavepoint<'a> {
2730
pub fn remaining(self) -> TokenTreesView<'a> {
28-
TokenTreesView::new(self.0)
31+
self.0
2932
}
3033
}
3134

3235
impl<'a> TtIter<'a> {
33-
pub(crate) fn new(tt: &'a [TokenTree]) -> TtIter<'a> {
34-
TtIter { inner: tt.iter() }
36+
pub(crate) fn new(tt: TokenTreesView<'a>) -> TtIter<'a> {
37+
TtIter { inner: tt }
3538
}
3639

3740
pub fn expect_char(&mut self, char: char) -> Result<(), ()> {
@@ -141,8 +144,8 @@ impl<'a> TtIter<'a> {
141144
let _ = self.next().unwrap();
142145
let _ = self.next().unwrap();
143146
res.push(first);
144-
res.push(*second);
145-
res.push(*third.unwrap());
147+
res.push(second);
148+
res.push(third.unwrap());
146149
}
147150
('-' | '!' | '*' | '/' | '&' | '%' | '^' | '+' | '<' | '=' | '>' | '|', '=', _)
148151
| ('-' | '=' | '>', '>', _)
@@ -154,25 +157,29 @@ impl<'a> TtIter<'a> {
154157
| ('|', '|', _) => {
155158
let _ = self.next().unwrap();
156159
res.push(first);
157-
res.push(*second);
160+
res.push(second);
158161
}
159162
_ => res.push(first),
160163
}
161164
Ok(res)
162165
}
163166

164167
/// This method won't check for subtrees, so the nth token tree may not be the nth sibling of the current tree.
165-
fn peek_n(&self, n: usize) -> Option<&'a TokenTree> {
166-
self.inner.as_slice().get(n)
168+
fn peek_n(&self, n: usize) -> Option<TokenTree> {
169+
dispatch_ref! {
170+
match self.inner.repr => tt => Some(tt.get(n)?.to_api(self.inner.span_parts))
171+
}
167172
}
168173

169174
pub fn peek(&self) -> Option<TtElement<'a>> {
170-
match self.inner.as_slice().first()? {
171-
TokenTree::Leaf(leaf) => Some(TtElement::Leaf(leaf.clone())),
175+
match self.peek_n(0)? {
176+
TokenTree::Leaf(leaf) => Some(TtElement::Leaf(leaf)),
172177
TokenTree::Subtree(subtree) => {
173-
let nested_iter =
174-
TtIter { inner: self.inner.as_slice()[1..][..subtree.usize_len()].iter() };
175-
Some(TtElement::Subtree(*subtree, nested_iter))
178+
let nested_repr = self.inner.repr.get(1..subtree.usize_len() + 1).unwrap();
179+
let nested_iter = TtIter {
180+
inner: TokenTreesView { repr: nested_repr, span_parts: self.inner.span_parts },
181+
};
182+
Some(TtElement::Subtree(subtree, nested_iter))
176183
}
177184
}
178185
}
@@ -183,26 +190,51 @@ impl<'a> TtIter<'a> {
183190
}
184191

185192
pub fn next_span(&self) -> Option<Span> {
186-
Some(self.inner.as_slice().first()?.first_span())
193+
Some(self.peek()?.first_span())
187194
}
188195

189196
pub fn remaining(&self) -> TokenTreesView<'a> {
190-
TokenTreesView::new(self.inner.as_slice())
197+
self.inner
191198
}
192199

193200
/// **Warning**: This advances `skip` **flat** token trees, subtrees account for children+1!
194201
pub fn flat_advance(&mut self, skip: usize) {
195-
self.inner = self.inner.as_slice()[skip..].iter();
202+
self.inner.repr = self.inner.repr.get(skip..).unwrap();
196203
}
197204

198205
pub fn savepoint(&self) -> TtIterSavepoint<'a> {
199-
TtIterSavepoint(self.inner.as_slice())
206+
TtIterSavepoint(self.inner)
200207
}
201208

202209
pub fn from_savepoint(&self, savepoint: TtIterSavepoint<'a>) -> TokenTreesView<'a> {
203-
let len = (self.inner.as_slice().as_ptr() as usize - savepoint.0.as_ptr() as usize)
204-
/ size_of::<TokenTree>();
205-
TokenTreesView::new(&savepoint.0[..len])
210+
let len = match (self.inner.repr, savepoint.0.repr) {
211+
(
212+
TokenTreesReprRef::SpanStorage32(this),
213+
TokenTreesReprRef::SpanStorage32(savepoint),
214+
) => {
215+
(this.as_ptr() as usize - savepoint.as_ptr() as usize)
216+
/ size_of::<crate::storage::TokenTree<crate::storage::SpanStorage32>>()
217+
}
218+
(
219+
TokenTreesReprRef::SpanStorage64(this),
220+
TokenTreesReprRef::SpanStorage64(savepoint),
221+
) => {
222+
(this.as_ptr() as usize - savepoint.as_ptr() as usize)
223+
/ size_of::<crate::storage::TokenTree<crate::storage::SpanStorage64>>()
224+
}
225+
(
226+
TokenTreesReprRef::SpanStorage96(this),
227+
TokenTreesReprRef::SpanStorage96(savepoint),
228+
) => {
229+
(this.as_ptr() as usize - savepoint.as_ptr() as usize)
230+
/ size_of::<crate::storage::TokenTree<crate::storage::SpanStorage96>>()
231+
}
232+
_ => panic!("savepoint did not originate from this TtIter"),
233+
};
234+
TokenTreesView {
235+
repr: savepoint.0.repr.get(..len).unwrap(),
236+
span_parts: savepoint.0.span_parts,
237+
}
206238
}
207239

208240
pub fn next_as_view(&mut self) -> Option<TokenTreesView<'a>> {
@@ -242,14 +274,12 @@ impl TtElement<'_> {
242274
impl<'a> Iterator for TtIter<'a> {
243275
type Item = TtElement<'a>;
244276
fn next(&mut self) -> Option<Self::Item> {
245-
match self.inner.next()? {
246-
TokenTree::Leaf(leaf) => Some(TtElement::Leaf(leaf.clone())),
247-
TokenTree::Subtree(subtree) => {
248-
let nested_iter =
249-
TtIter { inner: self.inner.as_slice()[..subtree.usize_len()].iter() };
250-
self.inner = self.inner.as_slice()[subtree.usize_len()..].iter();
251-
Some(TtElement::Subtree(*subtree, nested_iter))
252-
}
253-
}
277+
let result = self.peek()?;
278+
let skip = match &result {
279+
TtElement::Leaf(_) => 1,
280+
TtElement::Subtree(subtree, _) => subtree.usize_len() + 1,
281+
};
282+
self.inner.repr = self.inner.repr.get(skip..).unwrap();
283+
Some(result)
254284
}
255285
}

0 commit comments

Comments
 (0)