@@ -14,7 +14,7 @@ use rustc_ast::ptr::P;
14
14
use rustc_ast:: token:: { self , BinOpToken , CommentKind , Delimiter , Nonterminal , Token , TokenKind } ;
15
15
use rustc_ast:: tokenstream:: { Spacing , TokenStream , TokenTree } ;
16
16
use rustc_ast:: util:: classify;
17
- use rustc_ast:: util:: comments:: { gather_comments , Comment , CommentStyle } ;
17
+ use rustc_ast:: util:: comments:: { Comment , CommentStyle } ;
18
18
use rustc_ast:: util:: parser;
19
19
use rustc_ast:: { self as ast, AttrArgs , AttrArgsEq , BlockCheckMode , PatKind } ;
20
20
use rustc_ast:: { attr, BindingAnnotation , ByRef , DelimArgs , RangeEnd , RangeSyntax , Term } ;
@@ -24,7 +24,7 @@ use rustc_ast::{InlineAsmOptions, InlineAsmTemplatePiece};
24
24
use rustc_span:: edition:: Edition ;
25
25
use rustc_span:: source_map:: { SourceMap , Spanned } ;
26
26
use rustc_span:: symbol:: { kw, sym, Ident , IdentPrinter , Symbol } ;
27
- use rustc_span:: { BytePos , FileName , Span , DUMMY_SP } ;
27
+ use rustc_span:: { BytePos , CharPos , FileName , Pos , Span , DUMMY_SP } ;
28
28
use std:: borrow:: Cow ;
29
29
use thin_vec:: ThinVec ;
30
30
@@ -59,6 +59,127 @@ pub struct Comments<'a> {
59
59
current : usize ,
60
60
}
61
61
62
+ /// Returns `None` if the first `col` chars of `s` contain a non-whitespace char.
63
+ /// Otherwise returns `Some(k)` where `k` is first char offset after that leading
64
+ /// whitespace. Note that `k` may be outside bounds of `s`.
65
+ fn all_whitespace ( s : & str , col : CharPos ) -> Option < usize > {
66
+ let mut idx = 0 ;
67
+ for ( i, ch) in s. char_indices ( ) . take ( col. to_usize ( ) ) {
68
+ if !ch. is_whitespace ( ) {
69
+ return None ;
70
+ }
71
+ idx = i + ch. len_utf8 ( ) ;
72
+ }
73
+ Some ( idx)
74
+ }
75
+
76
+ fn trim_whitespace_prefix ( s : & str , col : CharPos ) -> & str {
77
+ let len = s. len ( ) ;
78
+ match all_whitespace ( s, col) {
79
+ Some ( col) => {
80
+ if col < len {
81
+ & s[ col..]
82
+ } else {
83
+ ""
84
+ }
85
+ }
86
+ None => s,
87
+ }
88
+ }
89
+
90
+ fn split_block_comment_into_lines ( text : & str , col : CharPos ) -> Vec < String > {
91
+ let mut res: Vec < String > = vec ! [ ] ;
92
+ let mut lines = text. lines ( ) ;
93
+ // just push the first line
94
+ res. extend ( lines. next ( ) . map ( |it| it. to_string ( ) ) ) ;
95
+ // for other lines, strip common whitespace prefix
96
+ for line in lines {
97
+ res. push ( trim_whitespace_prefix ( line, col) . to_string ( ) )
98
+ }
99
+ res
100
+ }
101
+
102
+ fn gather_comments ( sm : & SourceMap , path : FileName , src : String ) -> Vec < Comment > {
103
+ let sm = SourceMap :: new ( sm. path_mapping ( ) . clone ( ) ) ;
104
+ let source_file = sm. new_source_file ( path, src) ;
105
+ let text = ( * source_file. src . as_ref ( ) . unwrap ( ) ) . clone ( ) ;
106
+
107
+ let text: & str = text. as_str ( ) ;
108
+ let start_bpos = source_file. start_pos ;
109
+ let mut pos = 0 ;
110
+ let mut comments: Vec < Comment > = Vec :: new ( ) ;
111
+ let mut code_to_the_left = false ;
112
+
113
+ if let Some ( shebang_len) = rustc_lexer:: strip_shebang ( text) {
114
+ comments. push ( Comment {
115
+ style : CommentStyle :: Isolated ,
116
+ lines : vec ! [ text[ ..shebang_len] . to_string( ) ] ,
117
+ pos : start_bpos,
118
+ } ) ;
119
+ pos += shebang_len;
120
+ }
121
+
122
+ for token in rustc_lexer:: tokenize ( & text[ pos..] ) {
123
+ let token_text = & text[ pos..pos + token. len as usize ] ;
124
+ match token. kind {
125
+ rustc_lexer:: TokenKind :: Whitespace => {
126
+ if let Some ( mut idx) = token_text. find ( '\n' ) {
127
+ code_to_the_left = false ;
128
+ while let Some ( next_newline) = & token_text[ idx + 1 ..] . find ( '\n' ) {
129
+ idx += 1 + next_newline;
130
+ comments. push ( Comment {
131
+ style : CommentStyle :: BlankLine ,
132
+ lines : vec ! [ ] ,
133
+ pos : start_bpos + BytePos ( ( pos + idx) as u32 ) ,
134
+ } ) ;
135
+ }
136
+ }
137
+ }
138
+ rustc_lexer:: TokenKind :: BlockComment { doc_style, .. } => {
139
+ if doc_style. is_none ( ) {
140
+ let code_to_the_right = !matches ! (
141
+ text[ pos + token. len as usize ..] . chars( ) . next( ) ,
142
+ Some ( '\r' | '\n' )
143
+ ) ;
144
+ let style = match ( code_to_the_left, code_to_the_right) {
145
+ ( _, true ) => CommentStyle :: Mixed ,
146
+ ( false , false ) => CommentStyle :: Isolated ,
147
+ ( true , false ) => CommentStyle :: Trailing ,
148
+ } ;
149
+
150
+ // Count the number of chars since the start of the line by rescanning.
151
+ let pos_in_file = start_bpos + BytePos ( pos as u32 ) ;
152
+ let line_begin_in_file = source_file. line_begin_pos ( pos_in_file) ;
153
+ let line_begin_pos = ( line_begin_in_file - start_bpos) . to_usize ( ) ;
154
+ let col = CharPos ( text[ line_begin_pos..pos] . chars ( ) . count ( ) ) ;
155
+
156
+ let lines = split_block_comment_into_lines ( token_text, col) ;
157
+ comments. push ( Comment { style, lines, pos : pos_in_file } )
158
+ }
159
+ }
160
+ rustc_lexer:: TokenKind :: LineComment { doc_style } => {
161
+ if doc_style. is_none ( ) {
162
+ comments. push ( Comment {
163
+ style : if code_to_the_left {
164
+ CommentStyle :: Trailing
165
+ } else {
166
+ CommentStyle :: Isolated
167
+ } ,
168
+ lines : vec ! [ token_text. to_string( ) ] ,
169
+ pos : start_bpos + BytePos ( pos as u32 ) ,
170
+ } )
171
+ }
172
+ }
173
+ _ => {
174
+ code_to_the_left = true ;
175
+ }
176
+ }
177
+ pos += token. len as usize ;
178
+ }
179
+
180
+ comments
181
+ }
182
+
62
183
impl < ' a > Comments < ' a > {
63
184
pub fn new ( sm : & ' a SourceMap , filename : FileName , input : String ) -> Comments < ' a > {
64
185
let comments = gather_comments ( sm, filename, input) ;
0 commit comments