Skip to content

Commit cad2b74

Browse files
committed
Rust: integrate Rust Analyzer's Semantic module into extractor
1 parent 6ede20c commit cad2b74

7 files changed

Lines changed: 176 additions & 62 deletions

File tree

Cargo.lock

Lines changed: 2 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

rust/ast-generator/src/main.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -428,7 +428,7 @@ use ra_ap_syntax::ast::{{
428428
}};
429429
use ra_ap_syntax::{{ast, AstNode}};
430430
431-
impl Translator {{
431+
impl Translator<'_> {{
432432
fn emit_else_branch(&mut self, node: ast::ElseBranch) -> Label<generated::Expr> {{
433433
match node {{
434434
ast::ElseBranch::IfExpr(inner) => self.emit_if_expr(inner).into(),

rust/extractor/Cargo.toml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,12 +13,14 @@ ra_ap_base_db = "0.0.232"
1313
ra_ap_hir = "0.0.232"
1414
ra_ap_hir_def = "0.0.232"
1515
ra_ap_ide_db = "0.0.232"
16+
ra_ap_hir_expand = "0.0.232"
1617
ra_ap_load-cargo = "0.0.232"
1718
ra_ap_paths = "0.0.232"
1819
ra_ap_project_model = "0.0.232"
1920
ra_ap_syntax = "0.0.232"
2021
ra_ap_vfs = "0.0.232"
2122
ra_ap_parser = "0.0.232"
23+
ra_ap_span = "0.0.232"
2224
serde = "1.0.209"
2325
serde_with = "3.9.0"
2426
stderrlog = "0.6.0"

rust/extractor/src/main.rs

Lines changed: 14 additions & 57 deletions
Original file line numberDiff line numberDiff line change
@@ -1,76 +1,28 @@
11
use anyhow::Context;
22
use ra_ap_ide_db::line_index::LineIndex;
3-
use ra_ap_parser::Edition;
4-
use std::borrow::Cow;
53
mod archive;
64
mod config;
75
pub mod generated;
6+
mod rust_analyzer;
87
mod translate;
98
pub mod trap;
10-
use ra_ap_syntax::ast::SourceFile;
11-
use ra_ap_syntax::{AstNode, SyntaxError, TextRange, TextSize};
12-
13-
fn from_utf8_lossy(v: &[u8]) -> (Cow<'_, str>, Option<SyntaxError>) {
14-
let mut iter = v.utf8_chunks();
15-
let (first_valid, first_invalid) = if let Some(chunk) = iter.next() {
16-
let valid = chunk.valid();
17-
let invalid = chunk.invalid();
18-
if invalid.is_empty() {
19-
debug_assert_eq!(valid.len(), v.len());
20-
return (Cow::Borrowed(valid), None);
21-
}
22-
(valid, invalid)
23-
} else {
24-
return (Cow::Borrowed(""), None);
25-
};
26-
27-
const REPLACEMENT: &str = "\u{FFFD}";
28-
let error_start = first_valid.len() as u32;
29-
let error_end = error_start + first_invalid.len() as u32;
30-
let error_range = TextRange::new(TextSize::new(error_start), TextSize::new(error_end));
31-
let error = SyntaxError::new("invalid utf-8 sequence".to_owned(), error_range);
32-
let mut res = String::with_capacity(v.len());
33-
res.push_str(first_valid);
34-
35-
res.push_str(REPLACEMENT);
36-
37-
for chunk in iter {
38-
res.push_str(chunk.valid());
39-
if !chunk.invalid().is_empty() {
40-
res.push_str(REPLACEMENT);
41-
}
42-
}
43-
44-
(Cow::Owned(res), Some(error))
45-
}
469

4710
fn extract(
48-
archiver: &archive::Archiver,
11+
rust_analyzer: &rust_analyzer::RustAnalyzer,
4912
traps: &trap::TrapFileProvider,
5013
file: std::path::PathBuf,
5114
) -> anyhow::Result<()> {
52-
let file = std::path::absolute(&file).unwrap_or(file);
53-
let file = std::fs::canonicalize(&file).unwrap_or(file);
54-
archiver.archive(&file);
55-
let input = std::fs::read(&file)?;
56-
let (input, err) = from_utf8_lossy(&input);
57-
let line_index = LineIndex::new(&input);
15+
let (ast, input, parse_errors, semi) = rust_analyzer.parse(&file);
16+
let line_index = LineIndex::new(input.as_ref());
5817
let display_path = file.to_string_lossy();
5918
let mut trap = traps.create("source", &file);
6019
let label = trap.emit_file(&file);
61-
let mut translator = translate::Translator::new(trap, label, line_index);
62-
if let Some(err) = err {
63-
translator.emit_parse_error(display_path.as_ref(), err);
64-
}
65-
let parse = ra_ap_syntax::ast::SourceFile::parse(&input, Edition::CURRENT);
66-
for err in parse.errors() {
20+
let mut translator = translate::Translator::new(trap, label, line_index, semi);
21+
22+
for err in parse_errors {
6723
translator.emit_parse_error(display_path.as_ref(), err);
6824
}
69-
if let Some(ast) = SourceFile::cast(parse.syntax_node()) {
70-
translator.emit_source_file(ast);
71-
} else {
72-
log::warn!("Skipped {}", display_path);
73-
}
25+
translator.emit_source_file(ast);
7426
translator.trap.commit()?;
7527
Ok(())
7628
}
@@ -81,12 +33,17 @@ fn main() -> anyhow::Result<()> {
8133
.verbosity(2 + cfg.verbose as usize)
8234
.init()?;
8335
log::info!("{cfg:?}");
36+
let rust_analyzer = rust_analyzer::RustAnalyzer::new(&cfg)?;
37+
8438
let traps = trap::TrapFileProvider::new(&cfg).context("failed to set up trap files")?;
8539
let archiver = archive::Archiver {
8640
root: cfg.source_archive_dir,
8741
};
8842
for file in cfg.inputs {
89-
extract(&archiver, &traps, file)?;
43+
let file = std::path::absolute(&file).unwrap_or(file);
44+
let file = std::fs::canonicalize(&file).unwrap_or(file);
45+
archiver.archive(&file);
46+
extract(&rust_analyzer, &traps, file)?;
9047
}
9148

9249
Ok(())
Lines changed: 144 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,144 @@
1+
use crate::config::Config;
2+
use anyhow::Context;
3+
use itertools::Itertools;
4+
use log::info;
5+
use ra_ap_base_db::SourceDatabase;
6+
use ra_ap_hir::Semantics;
7+
use ra_ap_ide_db::RootDatabase;
8+
use ra_ap_load_cargo::{load_workspace_at, LoadCargoConfig, ProcMacroServerChoice};
9+
use ra_ap_paths::Utf8PathBuf;
10+
use ra_ap_project_model::CargoConfig;
11+
use ra_ap_project_model::RustLibSource;
12+
use ra_ap_span::Edition;
13+
use ra_ap_span::EditionedFileId;
14+
use ra_ap_span::TextRange;
15+
use ra_ap_span::TextSize;
16+
use ra_ap_syntax::SourceFile;
17+
use ra_ap_syntax::SyntaxError;
18+
use ra_ap_vfs::AbsPathBuf;
19+
use ra_ap_vfs::Vfs;
20+
use ra_ap_vfs::VfsPath;
21+
use std::borrow::Cow;
22+
use std::collections::HashMap;
23+
use std::path::{Path, PathBuf};
24+
use triomphe::Arc;
25+
pub struct RustAnalyzer {
26+
workspace: HashMap<PathBuf, (Vfs, RootDatabase)>,
27+
}
28+
29+
impl RustAnalyzer {
30+
pub fn new(cfg: &Config) -> anyhow::Result<RustAnalyzer> {
31+
let mut workspace = HashMap::new();
32+
let config = CargoConfig {
33+
sysroot: Some(RustLibSource::Discover),
34+
target_dir: ra_ap_paths::Utf8PathBuf::from_path_buf(cfg.scratch_dir.to_path_buf())
35+
.map(|x| x.join("target"))
36+
.ok(),
37+
..Default::default()
38+
};
39+
let progress = |t| (log::info!("progress: {}", t));
40+
let load_config = LoadCargoConfig {
41+
load_out_dirs_from_check: true,
42+
with_proc_macro_server: ProcMacroServerChoice::Sysroot,
43+
prefill_caches: false,
44+
};
45+
let projects = find_project_manifests(&cfg.inputs).context("loading inputs")?;
46+
for project in projects {
47+
let manifest = project.manifest_path();
48+
let (db, vfs, _macro_server) =
49+
load_workspace_at(manifest.as_ref(), &config, &load_config, &progress)?;
50+
let path: &Path = manifest.parent().as_ref();
51+
workspace.insert(path.to_path_buf(), (vfs, db));
52+
}
53+
Ok(RustAnalyzer { workspace })
54+
}
55+
pub fn parse(
56+
&self,
57+
path: &PathBuf,
58+
) -> (
59+
SourceFile,
60+
Arc<str>,
61+
Vec<SyntaxError>,
62+
Option<Semantics<'_, RootDatabase>>,
63+
) {
64+
let mut p = path.as_path();
65+
while let Some(parent) = p.parent() {
66+
p = parent;
67+
if let Some((vfs, db)) = self.workspace.get(parent) {
68+
if let Some(file_id) = Utf8PathBuf::from_path_buf(path.to_path_buf())
69+
.ok()
70+
.and_then(|x| AbsPathBuf::try_from(x).ok())
71+
.map(VfsPath::from)
72+
.and_then(|x| vfs.file_id(&x))
73+
{
74+
let semi = Semantics::new(db);
75+
let file_id = EditionedFileId::current_edition(file_id);
76+
77+
return (
78+
semi.parse(file_id),
79+
db.file_text(file_id.into()),
80+
db.parse_errors(file_id)
81+
.map(|x| x.to_vec())
82+
.unwrap_or_default(),
83+
Some(semi),
84+
);
85+
}
86+
}
87+
}
88+
let input = std::fs::read(&path).unwrap();
89+
let (input, err) = from_utf8_lossy(&input);
90+
let parse = ra_ap_syntax::ast::SourceFile::parse(&input, Edition::CURRENT);
91+
let mut errors = parse.errors();
92+
errors.extend(err.into_iter());
93+
(parse.tree(), input.as_ref().into(), errors, None)
94+
}
95+
}
96+
97+
fn find_project_manifests(
98+
files: &[PathBuf],
99+
) -> anyhow::Result<Vec<ra_ap_project_model::ProjectManifest>> {
100+
let current = std::env::current_dir()?;
101+
let abs_files: Vec<_> = files
102+
.iter()
103+
.map(|path| AbsPathBuf::assert_utf8(current.join(path)))
104+
.collect();
105+
let ret = ra_ap_project_model::ProjectManifest::discover_all(&abs_files);
106+
info!(
107+
"found manifests: {}",
108+
ret.iter().map(|m| format!("{m}")).join(", ")
109+
);
110+
Ok(ret)
111+
}
112+
fn from_utf8_lossy(v: &[u8]) -> (Cow<'_, str>, Option<SyntaxError>) {
113+
let mut iter = v.utf8_chunks();
114+
let (first_valid, first_invalid) = if let Some(chunk) = iter.next() {
115+
let valid = chunk.valid();
116+
let invalid = chunk.invalid();
117+
if invalid.is_empty() {
118+
debug_assert_eq!(valid.len(), v.len());
119+
return (Cow::Borrowed(valid), None);
120+
}
121+
(valid, invalid)
122+
} else {
123+
return (Cow::Borrowed(""), None);
124+
};
125+
126+
const REPLACEMENT: &str = "\u{FFFD}";
127+
let error_start = first_valid.len() as u32;
128+
let error_end = error_start + first_invalid.len() as u32;
129+
let error_range = TextRange::new(TextSize::new(error_start), TextSize::new(error_end));
130+
let error = SyntaxError::new("invalid utf-8 sequence".to_owned(), error_range);
131+
let mut res = String::with_capacity(v.len());
132+
res.push_str(first_valid);
133+
134+
res.push_str(REPLACEMENT);
135+
136+
for chunk in iter {
137+
res.push_str(chunk.valid());
138+
if !chunk.invalid().is_empty() {
139+
res.push_str(REPLACEMENT);
140+
}
141+
}
142+
143+
(Cow::Owned(res), Some(error))
144+
}

rust/extractor/src/translate/base.rs

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,9 @@ use crate::generated::{self, AstNode};
22
use crate::trap::{DiagnosticSeverity, TrapFile, TrapId};
33
use crate::trap::{Label, TrapClass};
44
use codeql_extractor::trap::{self};
5+
use ra_ap_hir::Semantics;
56
use ra_ap_ide_db::line_index::{LineCol, LineIndex};
7+
use ra_ap_ide_db::RootDatabase;
68
use ra_ap_parser::SyntaxKind;
79
use ra_ap_syntax::ast::RangeItem;
810
use ra_ap_syntax::{ast, NodeOrToken, SyntaxElementChildren, SyntaxError, SyntaxToken, TextRange};
@@ -56,18 +58,25 @@ impl TextValue for ast::RangePat {
5658
self.op_token().map(|x| x.text().to_string())
5759
}
5860
}
59-
pub struct Translator {
61+
pub struct Translator<'a> {
6062
pub trap: TrapFile,
6163
label: trap::Label,
6264
line_index: LineIndex,
65+
semi: Option<Semantics<'a, RootDatabase>>,
6366
}
6467

65-
impl Translator {
66-
pub fn new(trap: TrapFile, label: trap::Label, line_index: LineIndex) -> Translator {
68+
impl Translator<'_> {
69+
pub fn new(
70+
trap: TrapFile,
71+
label: trap::Label,
72+
line_index: LineIndex,
73+
semi: Option<Semantics<'_, RootDatabase>>,
74+
) -> Translator {
6775
Translator {
6876
trap,
6977
label,
7078
line_index,
79+
semi,
7180
}
7281
}
7382
pub fn location(&self, range: TextRange) -> (LineCol, LineCol) {

rust/extractor/src/translate/generated.rs

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)