1
1
-- | These functions allow PureScript strings to be treated as if they were
2
2
-- | sequences of Unicode code points instead of their true underlying
3
3
-- | implementation (sequences of UTF-16 code units). For nearly all uses of
4
- -- | strings, these functions should be preferred over the ones in Data.String.
4
+ -- | strings, these functions should be preferred over the ones in ` Data.String` .
5
5
module Data.String.CodePoints
6
6
( module StringReExports
7
7
, CodePoint ()
@@ -59,10 +59,34 @@ instance showCodePoint :: Show CodePoint where
59
59
-- I would prefer that this smart constructor not need to exist and instead
60
60
-- CodePoint just implements Enum, but the Enum module already depends on this
61
61
-- one. To avoid the circular dependency, we just expose these two functions.
62
+ -- |
63
+ -- | ```purescript
64
+ -- | >>> it = codePointFromInt 0x1D400 -- U+1D400 MATHEMATICAL BOLD CAPITAL A
65
+ -- | Just (CodePoint 0x1D400)
66
+ -- |
67
+ -- | >>> map singleton it
68
+ -- | Just "𝐀"
69
+ -- |
70
+ -- | >>> codePointFromInt 0x110000 -- does not correspond to a Unicode code point
71
+ -- | Nothing
72
+ -- | ```
73
+ -- |
62
74
codePointFromInt :: Int -> Maybe CodePoint
63
75
codePointFromInt n | 0 <= n && n <= 0x10FFFF = Just (CodePoint n)
64
76
codePointFromInt n = Nothing
65
77
78
+ -- |
79
+ -- | ```purescript
80
+ -- | >>> codePointToInt (codePointFromChar 'B')
81
+ -- | 66
82
+ -- |
83
+ -- | >>> boldA = codePointFromInt 0x1D400
84
+ -- | >>> boldA
85
+ -- | Just (CodePoint 0x1D400)
86
+ -- | >>> map codePointToInt boldA
87
+ -- | Just 119808 -- is the same as 0x1D400
88
+ -- | ```
89
+ -- |
66
90
codePointToInt :: CodePoint -> Int
67
91
codePointToInt (CodePoint n) = n
68
92
@@ -109,6 +133,15 @@ unsafeCodePointAt0Fallback s =
109
133
-- | Returns the first code point of the string after dropping the given number
110
134
-- | of code points from the beginning, if there is such a code point. Operates
111
135
-- | in constant space and in time linear to the given index.
136
+ -- |
137
+ -- | ```purescript
138
+ -- | >>> codePointAt 1 "𝐀𝐀𝐀𝐀"
139
+ -- | Just (CodePoint 0x1D400) -- represents "𝐀"
140
+ -- | -- compare to Data.String:
141
+ -- | >>> charAt 1 "𝐀𝐀𝐀𝐀"
142
+ -- | Just '�'
143
+ -- | ```
144
+ -- |
112
145
codePointAt :: Int -> String -> Maybe CodePoint
113
146
codePointAt n _ | n < 0 = Nothing
114
147
codePointAt 0 " " = Nothing
@@ -133,6 +166,12 @@ codePointAtFallback n s = case uncons s of
133
166
-- | Returns the number of code points in the leading sequence of code points
134
167
-- | which all match the given predicate. Operates in constant space and in
135
168
-- | time linear to the length of the string.
169
+ -- |
170
+ -- | ```purescript
171
+ -- | >>> count (\c -> codePointToInt c == 0x1D400) "𝐀𝐀 b c 𝐀"
172
+ -- | 2
173
+ -- | ```
174
+ -- |
136
175
count :: (CodePoint -> Boolean ) -> String -> Int
137
176
count = _count countFallback unsafeCodePointAt0
138
177
@@ -155,19 +194,43 @@ countTail p s accum = case uncons s of
155
194
-- | Drops the given number of code points from the beginning of the string. If
156
195
-- | the string does not have that many code points, returns the empty string.
157
196
-- | Operates in constant space and in time linear to the given number.
197
+ -- |
198
+ -- | ```purescript
199
+ -- | >>> drop 5 "𝐀𝐀 b c"
200
+ -- | "c"
201
+ -- | -- compared to Data.String:
202
+ -- | >>> drop 5 "𝐀𝐀 b c"
203
+ -- | "b c" -- because "𝐀" occupies 2 code units
204
+ -- | ```
205
+ -- |
158
206
drop :: Int -> String -> String
159
207
drop n s = String .drop (String .length (take n s)) s
160
208
161
209
162
210
-- | Drops the leading sequence of code points which all match the given
163
211
-- | predicate from the string. Operates in constant space and in time linear
164
212
-- | to the length of the string.
213
+ -- |
214
+ -- | ```purescript
215
+ -- | >>> dropWhile (\c -> codePointToInt c == 0x1D400) "𝐀𝐀 b c 𝐀"
216
+ -- | " b c 𝐀"
217
+ -- | ```
218
+ -- |
165
219
dropWhile :: (CodePoint -> Boolean ) -> String -> String
166
220
dropWhile p s = drop (count p s) s
167
221
168
222
169
223
-- | Creates a string from an array of code points. Operates in space and time
170
224
-- | linear to the length of the array.
225
+ -- |
226
+ -- | ```purescript
227
+ -- | >>> codePointArray = toCodePointArray "c 𝐀"
228
+ -- | >>> codePointArray
229
+ -- | [CodePoint 0x63, CodePoint 0x20, CodePoint 0x1D400]
230
+ -- | >>> fromCodePointArray codePointArray
231
+ -- | "c 𝐀"
232
+ -- | ```
233
+ -- |
171
234
fromCodePointArray :: Array CodePoint -> String
172
235
fromCodePointArray = _fromCodePointArray singletonFallback
173
236
@@ -178,13 +241,29 @@ foreign import _fromCodePointArray
178
241
179
242
-- | Returns the number of code points preceding the first match of the given
180
243
-- | pattern in the string. Returns Nothing when no matches are found.
244
+ -- |
245
+ -- | ```purescript
246
+ -- | >>> indexOf (Pattern "𝐀") "b 𝐀𝐀 c 𝐀"
247
+ -- | Just 2
248
+ -- | >>> indexOf (Pattern "o") "b 𝐀𝐀 c 𝐀"
249
+ -- | Nothing
250
+ -- | ```
251
+ -- |
181
252
indexOf :: String.Pattern -> String -> Maybe Int
182
253
indexOf p s = (\i -> length (String .take i s)) <$> String .indexOf p s
183
254
184
255
185
256
-- | Returns the number of code points preceding the first match of the given
186
257
-- | pattern in the string. Pattern matches preceding the given index will be
187
258
-- | ignored. Returns Nothing when no matches are found.
259
+ -- |
260
+ -- | ```purescript
261
+ -- | >>> indexOf' (Pattern "𝐀") 4 "b 𝐀𝐀 c 𝐀"
262
+ -- | Just 7
263
+ -- | >>> indexOf' (Pattern "o") 4 "b 𝐀𝐀 c 𝐀"
264
+ -- | Nothing
265
+ -- | ```
266
+ -- |
188
267
indexOf' :: String.Pattern -> Int -> String -> Maybe Int
189
268
indexOf' p i s =
190
269
let s' = drop i s in
@@ -193,13 +272,29 @@ indexOf' p i s =
193
272
194
273
-- | Returns the number of code points preceding the last match of the given
195
274
-- | pattern in the string. Returns Nothing when no matches are found.
275
+ -- |
276
+ -- | ```purescript
277
+ -- | >>> lastIndexOf (Pattern "𝐀") "b 𝐀𝐀 c 𝐀"
278
+ -- | Just 7
279
+ -- | >>> lastIndexOf (Pattern "o") "b 𝐀𝐀 c 𝐀"
280
+ -- | Nothing
281
+ -- | ```
282
+ -- |
196
283
lastIndexOf :: String.Pattern -> String -> Maybe Int
197
284
lastIndexOf p s = (\i -> length (String .take i s)) <$> String .lastIndexOf p s
198
285
199
286
200
287
-- | Returns the number of code points preceding the first match of the given
201
288
-- | pattern in the string. Pattern matches following the given index will be
202
289
-- | ignored. Returns Nothing when no matches are found.
290
+ -- |
291
+ -- | ```purescript
292
+ -- | >>> lastIndexOf' (Pattern "𝐀") 5 "b 𝐀𝐀 c 𝐀"
293
+ -- | Just 3
294
+ -- | >>> lastIndexOf' (Pattern "o") 5 "b 𝐀𝐀 c 𝐀"
295
+ -- | Nothing
296
+ -- | ```
297
+ -- |
203
298
lastIndexOf' :: String.Pattern -> Int -> String -> Maybe Int
204
299
lastIndexOf' p i s =
205
300
let i' = String .length (take i s) in
@@ -208,12 +303,27 @@ lastIndexOf' p i s =
208
303
209
304
-- | Returns the number of code points in the string. Operates in constant
210
305
-- | space and in time linear to the length of the string.
306
+ -- |
307
+ -- | ```purescript
308
+ -- | >>> length "b 𝐀𝐀 c 𝐀"
309
+ -- | 8
310
+ -- | -- compare to Data.String:
311
+ -- | >>> length "b 𝐀𝐀 c 𝐀"
312
+ -- | 11
313
+ -- | ```
314
+ -- |
211
315
length :: String -> Int
212
316
length = Array .length <<< toCodePointArray
213
317
214
318
215
319
-- | Creates a string containing just the given code point. Operates in
216
320
-- | constant space and time.
321
+ -- |
322
+ -- | ```purescript
323
+ -- | >>> map singleton (codePointFromInt 0x1D400)
324
+ -- | Just "𝐀"
325
+ -- | ```
326
+ -- |
217
327
singleton :: CodePoint -> String
218
328
singleton = _singleton singletonFallback
219
329
@@ -233,6 +343,12 @@ singletonFallback (CodePoint cp) =
233
343
-- | Returns a record with strings created from the code points on either side
234
344
-- | of the given index. If the index is not within the string, Nothing is
235
345
-- | returned.
346
+ -- |
347
+ -- | ```purescript
348
+ -- | >>> splitAt 3 "b 𝐀𝐀 c 𝐀"
349
+ -- | Just { before: "b 𝐀", after: "𝐀 c 𝐀" }
350
+ -- | ```
351
+ -- |
236
352
splitAt :: Int -> String -> Maybe { before :: String , after :: String }
237
353
splitAt i s =
238
354
let cps = toCodePointArray s in
@@ -248,6 +364,15 @@ splitAt i s =
248
364
-- | beginning of the given string. If the string does not have that many code
249
365
-- | points, returns the empty string. Operates in constant space and in time
250
366
-- | linear to the given number.
367
+ -- |
368
+ -- | ```purescript
369
+ -- | >>> take 3 "b 𝐀𝐀 c 𝐀"
370
+ -- | "b 𝐀"
371
+ -- | -- compare to Data.String:
372
+ -- | >>> take 3 "b 𝐀𝐀 c 𝐀"
373
+ -- | "b �"
374
+ -- | ```
375
+ -- |
251
376
take :: Int -> String -> String
252
377
take = _take takeFallback
253
378
@@ -263,12 +388,27 @@ takeFallback n s = case uncons s of
263
388
-- | Returns a string containing the leading sequence of code points which all
264
389
-- | match the given predicate from the string. Operates in constant space and
265
390
-- | in time linear to the length of the string.
391
+ -- |
392
+ -- | ```purescript
393
+ -- | >>> takeWhile (\c -> codePointToInt c == 0x1D400) "𝐀𝐀 b c 𝐀"
394
+ -- | "𝐀𝐀"
395
+ -- | ```
396
+ -- |
266
397
takeWhile :: (CodePoint -> Boolean ) -> String -> String
267
398
takeWhile p s = take (count p s) s
268
399
269
400
270
401
-- | Creates an array of code points from a string. Operates in space and time
271
402
-- | linear to the length of the string.
403
+ -- |
404
+ -- | ```purescript
405
+ -- | >>> codePointArray = toCodePointArray "b 𝐀𝐀"
406
+ -- | >>> codePointArray
407
+ -- | [CodePoint 0x62, CodePoint 0x20, CodePoint 0x1D400, CodePoint 0x1D400]
408
+ -- | >>> map singleton codePointArray
409
+ -- | ["b", " ", "𝐀", "𝐀", " ", "c", " ", "𝐀"]
410
+ -- | ```
411
+ -- |
272
412
toCodePointArray :: String -> Array CodePoint
273
413
toCodePointArray = _toCodePointArray toCodePointArrayFallback unsafeCodePointAt0
274
414
@@ -288,6 +428,14 @@ unconsButWithTuple s = (\{ head, tail } -> Tuple head tail) <$> uncons s
288
428
-- | Returns a record with the first code point and the remaining code points
289
429
-- | of the string. Returns Nothing if the string is empty. Operates in
290
430
-- | constant space and time.
431
+ -- |
432
+ -- | ```purescript
433
+ -- | >>> uncons "𝐀𝐀 c 𝐀"
434
+ -- | Just { head: CodePoint 0x1D400, tail: "𝐀 c 𝐀" }
435
+ -- | >>> uncons ""
436
+ -- | Nothing
437
+ -- | ```
438
+ -- |
291
439
uncons :: String -> Maybe { head :: CodePoint , tail :: String }
292
440
uncons s = case String .length s of
293
441
0 -> Nothing
0 commit comments