@@ -4,6 +4,7 @@ use regex::{Matches, Regex};
4
4
use std:: collections:: HashSet ;
5
5
use std:: sync:: LazyLock ;
6
6
use std:: { collections:: HashMap , sync:: Arc } ;
7
+ use unicase:: UniCase ;
7
8
8
9
use crate :: base:: field_attrs;
9
10
use crate :: { fields_value, ops:: sdk:: * } ;
@@ -31,10 +32,10 @@ struct LanguageConfig {
31
32
}
32
33
33
34
fn add_language < ' a > (
34
- output : & ' a mut HashMap < & ' static str , Arc < LanguageConfig > > ,
35
+ output : & ' a mut HashMap < UniCase < & ' static str > , Arc < LanguageConfig > > ,
35
36
name : & ' static str ,
36
37
aliases : impl IntoIterator < Item = & ' static str > ,
37
- lang_fn : tree_sitter_language :: LanguageFn ,
38
+ lang_fn : impl Into < tree_sitter :: Language > ,
38
39
terminal_node_kinds : impl IntoIterator < Item = & ' a str > ,
39
40
) {
40
41
let tree_sitter_lang: tree_sitter:: Language = lang_fn. into ( ) ;
@@ -58,49 +59,143 @@ fn add_language<'a>(
58
59
terminal_node_kind_ids,
59
60
} ) ;
60
61
for name in std:: iter:: once ( name) . chain ( aliases. into_iter ( ) ) {
61
- if output. insert ( name, config. clone ( ) ) . is_some ( ) {
62
+ if output. insert ( name. into ( ) , config. clone ( ) ) . is_some ( ) {
62
63
panic ! ( "Language `{name}` already exists" ) ;
63
64
}
64
65
}
65
66
}
66
67
67
- static TREE_SITTER_LANGUAGE_BY_LANG : LazyLock < HashMap < & ' static str , Arc < LanguageConfig > > > =
68
+ static TREE_SITTER_LANGUAGE_BY_LANG : LazyLock < HashMap < UniCase < & ' static str > , Arc < LanguageConfig > > > =
68
69
LazyLock :: new ( || {
69
70
let mut map = HashMap :: new ( ) ;
71
+ add_language ( & mut map, "C" , [ ".c" ] , tree_sitter_c:: LANGUAGE , [ ] ) ;
70
72
add_language (
71
73
& mut map,
72
- "Python" ,
73
- [ "py" , "python" ] ,
74
- tree_sitter_python:: LANGUAGE ,
74
+ "C++" ,
75
+ [ ".cpp" , ".cc" , ".cxx" , ".h" , ".hpp" , "cpp" ] ,
76
+ tree_sitter_c:: LANGUAGE ,
77
+ [ ] ,
78
+ ) ;
79
+ add_language (
80
+ & mut map,
81
+ "C#" ,
82
+ [ ".cs" , "cs" ] ,
83
+ tree_sitter_c_sharp:: LANGUAGE ,
84
+ [ ] ,
85
+ ) ;
86
+ add_language ( & mut map, "CSS" , [ ".css" ] , tree_sitter_css:: LANGUAGE , [ ] ) ;
87
+ add_language (
88
+ & mut map,
89
+ "Fortran" ,
90
+ [ ".f" , ".f90" , ".f95" , ".f03" , "f" , "f90" , "f95" , "f03" ] ,
91
+ tree_sitter_fortran:: LANGUAGE ,
92
+ [ ] ,
93
+ ) ;
94
+ add_language (
95
+ & mut map,
96
+ "Go" ,
97
+ [ ".go" , "golang" ] ,
98
+ tree_sitter_go:: LANGUAGE ,
99
+ [ ] ,
100
+ ) ;
101
+ add_language (
102
+ & mut map,
103
+ "HTML" ,
104
+ [ ".html" , ".htm" ] ,
105
+ tree_sitter_html:: LANGUAGE ,
75
106
[ ] ,
76
107
) ;
108
+ add_language ( & mut map, "Java" , [ ".java" ] , tree_sitter_java:: LANGUAGE , [ ] ) ;
77
109
add_language (
78
110
& mut map,
79
111
"JavaScript" ,
80
- [ "JS" , " js", "Javascript" , "javascript "] ,
112
+ [ ". js" , "js " ] ,
81
113
tree_sitter_javascript:: LANGUAGE ,
82
114
[ ] ,
83
115
) ;
116
+ add_language ( & mut map, "JSON" , [ ".json" ] , tree_sitter_json:: LANGUAGE , [ ] ) ;
84
117
add_language (
85
118
& mut map,
86
- "TypeScript" ,
87
- [ "TS" , "ts" , "Typescript" , "typescript" ] ,
88
- tree_sitter_typescript:: LANGUAGE_TYPESCRIPT ,
119
+ "Markdown" ,
120
+ [ ".md" , "md" ] ,
121
+ tree_sitter_md:: LANGUAGE ,
122
+ [ "inline" ] ,
123
+ ) ;
124
+ add_language (
125
+ & mut map,
126
+ "Pascal" ,
127
+ [ ".pas" , "pas" , ".dpr" , "dpr" , "Delphi" ] ,
128
+ tree_sitter_pascal:: LANGUAGE ,
129
+ [ ] ,
130
+ ) ;
131
+ add_language ( & mut map, "PHP" , [ ".php" ] , tree_sitter_php:: LANGUAGE_PHP , [ ] ) ;
132
+ add_language (
133
+ & mut map,
134
+ "Python" ,
135
+ [ ".py" ] ,
136
+ tree_sitter_python:: LANGUAGE ,
137
+ [ ] ,
138
+ ) ;
139
+ add_language ( & mut map, "R" , [ ".r" ] , tree_sitter_r:: LANGUAGE , [ ] ) ;
140
+ add_language ( & mut map, "Ruby" , [ ".rb" ] , tree_sitter_ruby:: LANGUAGE , [ ] ) ;
141
+ add_language (
142
+ & mut map,
143
+ "Rust" ,
144
+ [ ".rs" , "rs" ] ,
145
+ tree_sitter_rust:: LANGUAGE ,
146
+ [ ] ,
147
+ ) ;
148
+ add_language (
149
+ & mut map,
150
+ "Scala" ,
151
+ [ ".scala" ] ,
152
+ tree_sitter_scala:: LANGUAGE ,
153
+ [ ] ,
154
+ ) ;
155
+ add_language (
156
+ & mut map,
157
+ "SCSS" ,
158
+ [ ".scss" ] ,
159
+ tree_sitter_scss:: language ( ) ,
160
+ [ ] ,
161
+ ) ;
162
+ add_language ( & mut map, "SQL" , [ ".sql" ] , tree_sitter_sequel:: LANGUAGE , [ ] ) ;
163
+ add_language (
164
+ & mut map,
165
+ "Swift" ,
166
+ [ ".swift" ] ,
167
+ tree_sitter_swift:: LANGUAGE ,
168
+ [ ] ,
169
+ ) ;
170
+ add_language (
171
+ & mut map,
172
+ "TOML" ,
173
+ [ ".toml" ] ,
174
+ tree_sitter_toml_ng:: LANGUAGE ,
89
175
[ ] ,
90
176
) ;
91
177
add_language (
92
178
& mut map,
93
179
"TSX" ,
94
- [ "tsx" ] ,
180
+ [ ". tsx" ] ,
95
181
tree_sitter_typescript:: LANGUAGE_TSX ,
96
182
[ ] ,
97
183
) ;
98
184
add_language (
99
185
& mut map,
100
- "Markdown" ,
101
- [ "md" , "markdown" ] ,
102
- tree_sitter_md:: LANGUAGE . into ( ) ,
103
- [ "inline" ] ,
186
+ "TypeScript" ,
187
+ [ ".ts" , "ts" ] ,
188
+ tree_sitter_typescript:: LANGUAGE_TYPESCRIPT ,
189
+ [ ] ,
190
+ ) ;
191
+ add_language ( & mut map, "XML" , [ ".xml" ] , tree_sitter_xml:: LANGUAGE_XML , [ ] ) ;
192
+ add_language ( & mut map, "DTD" , [ ".dtd" ] , tree_sitter_xml:: LANGUAGE_DTD , [ ] ) ;
193
+ add_language (
194
+ & mut map,
195
+ "YAML" ,
196
+ [ ".yaml" , ".yml" ] ,
197
+ tree_sitter_yaml:: LANGUAGE ,
198
+ [ ] ,
104
199
) ;
105
200
map
106
201
} ) ;
@@ -416,7 +511,7 @@ impl SimpleFunctionExecutor for Executor {
416
511
. optional ( )
417
512
. map ( |v| anyhow:: Ok ( v. as_str ( ) ?. as_ref ( ) ) )
418
513
. transpose ( ) ?
419
- . and_then ( |lang| TREE_SITTER_LANGUAGE_BY_LANG . get ( lang) )
514
+ . and_then ( |lang| TREE_SITTER_LANGUAGE_BY_LANG . get ( & UniCase :: new ( lang) ) )
420
515
} ;
421
516
422
517
let recursive_chunker = RecursiveChunker {
0 commit comments