|
| 1 | +# frozen_string_literal: true |
| 2 | + |
| 3 | +require 'rouge' |
| 4 | + |
| 5 | +# We define a rouge lexer for Sail source |
| 6 | +class SailLexer < Rouge::RegexLexer |
| 7 | + title 'Sail' |
| 8 | + desc 'Sail ISA Description Language (https://github.com/rems-project/sail)' |
| 9 | + tag 'sail' |
| 10 | + filenames '*.sail' |
| 11 | + |
| 12 | + id = /[a-zA-Z_?][a-zA-Z0-9_?#]*/ |
| 13 | + |
| 14 | + # Specially handle inserted cross-references |
| 15 | + sailref = /sailref:[^\[]*\[[^\]]*\]/ |
| 16 | + |
| 17 | + tyvar = /'[a-zA-Z_?][a-zA-Z0-9_?#]*/ |
| 18 | + |
| 19 | + # We are careful with the definition of operators to ensure openers |
| 20 | + # like // and /* cannot prefix valid operators |
| 21 | + op_char = '[!%&*+-./:<=>@^|]' |
| 22 | + op_char_no_slash = '[!%&*+-.:<=>@^|]' |
| 23 | + op_char_no_slash_star = '[!%&+-.:<=>@^|]' |
| 24 | + |
| 25 | + # Sail operators can be suffixed with an underscore followed by a |
| 26 | + # regular identifier i.e. <=_u for unsigned less that or equal to |
| 27 | + op_suffix = "#{op_char}*(_#{id.source})" |
| 28 | + |
| 29 | + # Operators of length 1, 2, and n > 2 |
| 30 | + operator1 = Regexp.new(op_char) |
| 31 | + operator2 = Regexp.new("(#{op_char}#{op_char_no_slash_star})|(#{op_char_no_slash}#{op_char})") |
| 32 | + operatorn = Regexp.new("(#{op_char}#{op_char_no_slash_star}#{op_suffix})|(#{op_char_no_slash}#{op_char}#{op_suffix})") |
| 33 | + |
| 34 | + def self.return_type |
| 35 | + @return_type ||= Set.new %w[ |
| 36 | + RETIRE_SUCCESS RETIRE_FAIL |
| 37 | + ] |
| 38 | + end |
| 39 | + |
| 40 | + def self.keywords |
| 41 | + @keywords ||= Set.new %w[ |
| 42 | + and as by match clause operator default end enum else forall foreach function mapping overload throw |
| 43 | + try catch if in let var ref pure monadic register return scattered struct then type union newtype with |
| 44 | + val outcome instantiation impl repeat until while do bitfield forwards backwards to from |
| 45 | + ] |
| 46 | + end |
| 47 | + |
| 48 | + # Keywords that appear in types, and builtin special types |
| 49 | + def self.keywords_type |
| 50 | + @keywords_type ||= Set.new %w[ |
| 51 | + dec inc Int Order Bool Type bits bool int option unit implicit |
| 52 | + ] |
| 53 | + end |
| 54 | + |
| 55 | + # These keywords appear like special functions rather than regular |
| 56 | + # keywords, i.e. `assert(cond, "message")` |
| 57 | + def self.builtins |
| 58 | + @builtins ||= Set.new %w[ |
| 59 | + bitzero bitone exit false sizeof constraint true undefined |
| 60 | + ] |
| 61 | + end |
| 62 | + |
| 63 | + # Reserved and internal keywords, as well as deprecated keywords |
| 64 | + def self.reserved |
| 65 | + @reserved ||= Set.new %w[ |
| 66 | + effect cast constant import module mutual configuration termination_measure internal_plet internal_return |
| 67 | + ] |
| 68 | + end |
| 69 | + |
| 70 | + state :whitespace do |
| 71 | + rule(/\s+/, Text::Whitespace) |
| 72 | + end |
| 73 | + |
| 74 | + state :root do |
| 75 | + mixin :whitespace |
| 76 | + |
| 77 | + rule(/\b(RETIRE_SUCCESS|RETIRE_FAIL)\b/, Name::Constant) |
| 78 | + |
| 79 | + rule(/0x[0-9A-Fa-f_]+/, Num::Hex) |
| 80 | + rule(/0b[0-1_]+/, Num::Bin) |
| 81 | + rule(/[0-9]+\.[0-9]+/, Num::Float) |
| 82 | + rule(/[0-9_]+/, Num::Integer) |
| 83 | + |
| 84 | + rule(/"/, Str, :string) |
| 85 | + |
| 86 | + rule tyvar, Name::Variable |
| 87 | + |
| 88 | + rule(/(val\b)(\s+)(#{id})/) do |
| 89 | + groups Keyword, Text::Whitespace, Name::Function |
| 90 | + end |
| 91 | + |
| 92 | + rule(/(function\b)(\s+)(#{id})/) do |
| 93 | + groups Keyword, Text::Whitespace, Name::Function |
| 94 | + end |
| 95 | + |
| 96 | + rule %r{//}, Comment, :line_comment |
| 97 | + rule %r{/\*}, Comment, :comment |
| 98 | + rule(/\$\[/, Comment::Preproc, :attribute) |
| 99 | + rule(/\$#{id}/, Comment::Preproc, :pragma) |
| 100 | + |
| 101 | + # Function arrows |
| 102 | + rule(/->/, Punctuation) |
| 103 | + |
| 104 | + # Two character brackets |
| 105 | + rule(/\[\|/, Punctuation) |
| 106 | + rule(/\|\]/, Punctuation) |
| 107 | + # rule(/{\|/, Punctuation) |
| 108 | + rule(/\|}/, Punctuation) |
| 109 | + |
| 110 | + rule(/[,@=(){}\[\];:]/, Punctuation) |
| 111 | + rule operatorn, Operator |
| 112 | + rule operator2, Operator |
| 113 | + rule operator1, Operator |
| 114 | + |
| 115 | + rule sailref, Name |
| 116 | + |
| 117 | + rule id do |m| |
| 118 | + name = m[0] |
| 119 | + |
| 120 | + if self.class.keywords.include? name |
| 121 | + token Keyword |
| 122 | + elsif self.class.keywords_type.include? name |
| 123 | + token Keyword::Type |
| 124 | + elsif self.class.builtins.include? name |
| 125 | + token Name::Builtin |
| 126 | + elsif self.class.reserved.include? name |
| 127 | + token Keyword::Reserved |
| 128 | + elsif self.class.return_type.include? name |
| 129 | + token Name::Constant |
| 130 | + else |
| 131 | + token Name |
| 132 | + end |
| 133 | + end |
| 134 | + end |
| 135 | + |
| 136 | + state :string do |
| 137 | + rule(/"/, Str, :pop!) |
| 138 | + # Sail escape sequences are a subset of OCaml's https://v2.ocaml.org/manual/lex.html#escape-sequence |
| 139 | + rule(/\\([\\ntbr"']|x[a-fA-F0-9]{2}|[0-7]{3})/, Str::Escape) |
| 140 | + rule(/[^\\"\n]+/, Str) |
| 141 | + end |
| 142 | + |
| 143 | + state :attribute do |
| 144 | + rule(/\[/, Comment::Preproc, :attribute) |
| 145 | + rule(/\]/, Comment::Preproc, :pop!) |
| 146 | + rule(/[^\[\]]+/, Comment::Preproc) |
| 147 | + end |
| 148 | + |
| 149 | + state :pragma do |
| 150 | + rule(/\n/, Text::Whitespace, :pop!) |
| 151 | + rule(/[^\n]+/, Comment::Preproc) |
| 152 | + end |
| 153 | + |
| 154 | + state :line_comment do |
| 155 | + rule(/\n/, Text::Whitespace, :pop!) |
| 156 | + rule(/[^\n]+/, Comment) |
| 157 | + end |
| 158 | + |
| 159 | + state :comment do |
| 160 | + rule %r{/\*}, Comment, :comment |
| 161 | + rule %r{\*/}, Comment, :pop! |
| 162 | + rule(/\n/, Text::Whitespace) |
| 163 | + rule(/./, Comment) |
| 164 | + end |
| 165 | +end |
0 commit comments