Skip to content

Commit 20fad27

Browse files
authored
Lint
1 parent e86d973 commit 20fad27

File tree

1 file changed

+0
-36
lines changed

1 file changed

+0
-36
lines changed

llama_cpp/llama_cpp.py

Lines changed: 0 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -1736,15 +1736,13 @@ def llama_kv_cache_view_init(
17361736
ctx: llama_context_p, n_seq_max: Union[ctypes.c_int32, int], /,
17371737
) -> llama_kv_cache_view:
17381738
"""Create an empty KV cache view. (use only for debugging purposes)"""
1739-
...
17401739

17411740

17421741
# // Free a KV cache view. (use only for debugging purposes)
17431742
# LLAMA_API void llama_kv_cache_view_free(struct llama_kv_cache_view * view);
17441743
@ctypes_function("llama_kv_cache_view_free", [llama_kv_cache_view_p], None)
17451744
def llama_kv_cache_view_free(view: ctypes.pointer[llama_kv_cache_view], /): # type: ignore
17461745
"""Free a KV cache view. (use only for debugging purposes)"""
1747-
...
17481746

17491747

17501748
# // Update the KV cache view structure with the current state of the KV cache. (use only for debugging purposes)
@@ -1754,7 +1752,6 @@ def llama_kv_cache_view_free(view: ctypes.pointer[llama_kv_cache_view], /): # t
17541752
)
17551753
def llama_kv_cache_view_update(ctx: llama_context_p, view: CtypesPointerOrRef[llama_kv_cache_view], /): # type: ignore
17561754
"""Update the KV cache view structure with the current state of the KV cache. (use only for debugging purposes)"""
1757-
...
17581755

17591756

17601757
# // Returns the number of tokens in the KV cache (slow, use only for debug)
@@ -1767,7 +1764,6 @@ def llama_get_kv_cache_token_count(ctx: llama_context_p, /) -> int:
17671764
"""Returns the number of tokens in the KV cache (slow, use only for debug)
17681765
If a KV cell has multiple sequences assigned to it, it will be counted multiple times
17691766
"""
1770-
...
17711767

17721768

17731769
# // Returns the number of used KV cells (i.e. have at least one sequence assigned to them)
@@ -1777,7 +1773,6 @@ def llama_get_kv_cache_token_count(ctx: llama_context_p, /) -> int:
17771773
)
17781774
def llama_get_kv_cache_used_cells(ctx: llama_context_p, /) -> int:
17791775
"""Returns the number of used KV cells (i.e. have at least one sequence assigned to them)"""
1780-
...
17811776

17821777

17831778
# // Clear the KV cache - both cell info is erased and KV data is zeroed
@@ -1786,7 +1781,6 @@ def llama_get_kv_cache_used_cells(ctx: llama_context_p, /) -> int:
17861781
@ctypes_function("llama_kv_cache_clear", [llama_context_p_ctypes], None)
17871782
def llama_kv_cache_clear(ctx: llama_context_p, /):
17881783
"""Clear the KV cache"""
1789-
...
17901784

17911785

17921786
# // Removes all tokens that belong to the specified sequence and have positions in [p0, p1)
@@ -1823,7 +1817,6 @@ def llama_kv_cache_seq_rm(
18231817
seq_id < 0 : match any sequence
18241818
p0 < 0 : [0, p1]
18251819
p1 < 0 : [p0, inf)"""
1826-
...
18271820

18281821

18291822
# // Copy all tokens that belong to the specified sequence to another sequence
@@ -1859,7 +1852,6 @@ def llama_kv_cache_seq_cp(
18591852
Note that this does not allocate extra KV cache memory - it simply assigns the tokens to the new sequence
18601853
p0 < 0 : [0, p1]
18611854
p1 < 0 : [p0, inf)"""
1862-
...
18631855

18641856

18651857
# // Removes all tokens that do not belong to the specified sequence
@@ -1871,7 +1863,6 @@ def llama_kv_cache_seq_cp(
18711863
)
18721864
def llama_kv_cache_seq_keep(ctx: llama_context_p, seq_id: Union[llama_seq_id, int], /):
18731865
"""Removes all tokens that do not belong to the specified sequence"""
1874-
...
18751866

18761867

18771868
# // Adds relative position "delta" to all tokens that belong to the specified sequence and have positions in [p0, p1)
@@ -1947,7 +1938,6 @@ def llama_kv_cache_seq_div(
19471938
If the KV cache is RoPEd, the KV data is updated accordingly
19481939
p0 < 0 : [0, p1]
19491940
p1 < 0 : [p0, inf)"""
1950-
...
19511941

19521942

19531943
# // Defragment the KV cache
@@ -1961,15 +1951,13 @@ def llama_kv_cache_defrag(ctx: llama_context_p, /):
19611951
This will be applied:
19621952
- lazily on next llama_decode()
19631953
- explicitly with llama_kv_cache_update()"""
1964-
...
19651954

19661955

19671956
# // Apply the KV cache updates (such as K-shifts, defragmentation, etc.)
19681957
# LLAMA_API void llama_kv_cache_update(struct llama_context * ctx);
19691958
@ctypes_function("llama_kv_cache_update", [llama_context_p_ctypes], None)
19701959
def llama_kv_cache_update(ctx: llama_context_p, /):
19711960
"""Apply the KV cache updates (such as K-shifts, defragmentation, etc.)"""
1972-
...
19731961

19741962

19751963
# //
@@ -1984,7 +1972,6 @@ def llama_kv_cache_update(ctx: llama_context_p, /):
19841972
@ctypes_function("llama_state_get_size", [llama_context_p_ctypes], ctypes.c_size_t)
19851973
def llama_state_get_size(ctx: llama_context_p, /) -> int:
19861974
"""Returns the *actual* size in bytes of the state (rng, logits, embedding and kv_cache) - will often be smaller after compacting tokens"""
1987-
...
19881975

19891976

19901977
# LLAMA_API DEPRECATED(size_t llama_get_state_size(struct llama_context * ctx),
@@ -1993,7 +1980,6 @@ def llama_state_get_size(ctx: llama_context_p, /) -> int:
19931980
def llama_get_state_size(ctx: llama_context_p, /) -> int:
19941981
"""Returns the maximum size in bytes of the state (rng, logits, embedding
19951982
and kv_cache) - will often be smaller after compacting tokens"""
1996-
...
19971983

19981984

19991985
# // Copies the state to the specified destination address.
@@ -2021,7 +2007,6 @@ def llama_state_get_data(
20212007
"""Copies the state to the specified destination address.
20222008
Destination needs to have allocated enough memory.
20232009
Returns the number of bytes copied"""
2024-
...
20252010

20262011

20272012
# LLAMA_API DEPRECATED(size_t llama_copy_state_data(
@@ -2042,7 +2027,6 @@ def llama_copy_state_data(
20422027
"""Copies the state to the specified destination address.
20432028
Destination needs to have allocated enough memory.
20442029
Returns the number of bytes copied"""
2045-
...
20462030

20472031

20482032
# // Set the state reading from the specified address
@@ -2064,7 +2048,6 @@ def llama_state_set_data(
20642048
) -> int:
20652049
"""Set the state reading from the specified address
20662050
Returns the number of bytes read"""
2067-
...
20682051

20692052

20702053
# LLAMA_API DEPRECATED(size_t llama_set_state_data(
@@ -2080,7 +2063,6 @@ def llama_set_state_data(
20802063
ctx: llama_context_p, src: CtypesArray[ctypes.c_uint8], /,
20812064
) -> int:
20822065
"""Set the state reading from the specified address"""
2083-
...
20842066

20852067

20862068
# Save/load session file
@@ -2203,7 +2185,6 @@ def llama_save_session_file(
22032185
)
22042186
def llama_state_seq_get_size(ctx: llama_context_p, seq_id: llama_seq_id, /) -> int:
22052187
"""Get the exact size needed to copy the KV cache of a single sequence"""
2206-
...
22072188

22082189

22092190
# // Copy the KV cache of a single sequence into the specified buffer
@@ -2260,7 +2241,6 @@ def llama_state_seq_set_data(
22602241
/,
22612242
) -> int:
22622243
"""Copy the sequence data (originally copied with `llama_state_seq_get_data`) into the specified sequence"""
2263-
...
22642244

22652245

22662246
# LLAMA_API size_t llama_state_seq_save_file(
@@ -2357,7 +2337,6 @@ def llama_batch_get_one(
23572337
23582338
NOTE: this is a helper function to facilitate transition to the new batch API - avoid using it
23592339
"""
2360-
...
23612340

23622341

23632342
# // Allocates a batch of tokens on the heap that can hold a maximum of n_tokens
@@ -2387,15 +2366,13 @@ def llama_batch_init(
23872366
Otherwise, llama_batch.token will be allocated to store n_tokens llama_token
23882367
The rest of the llama_batch members are allocated with size n_tokens
23892368
All members are left uninitialized"""
2390-
...
23912369

23922370

23932371
# // Frees a batch of tokens allocated with llama_batch_init()
23942372
# LLAMA_API void llama_batch_free(struct llama_batch batch);
23952373
@ctypes_function("llama_batch_free", [llama_batch], None)
23962374
def llama_batch_free(batch: llama_batch, /):
23972375
"""Frees a batch of tokens allocated with llama_batch_init()"""
2398-
...
23992376

24002377

24012378
# // Processes a batch of tokens with the ecoder part of the encoder-decoder model.
@@ -2411,7 +2388,6 @@ def llama_encode(ctx: llama_context_p, batch: llama_batch, /) -> int:
24112388
Stores the encoder output internally for later use by the decoder cross-attention layers.
24122389
0 - success
24132390
< 0 - error"""
2414-
...
24152391

24162392

24172393
# // Positive return values does not mean a fatal error, but rather a warning.
@@ -2427,7 +2403,6 @@ def llama_decode(ctx: llama_context_p, batch: llama_batch, /) -> int:
24272403
0 - success
24282404
1 - could not find a KV slot for the batch (try reducing the size of the batch or increase the context)
24292405
< 0 - error"""
2430-
...
24312406

24322407

24332408
# // Set the number of threads used for decoding
@@ -2453,23 +2428,20 @@ def llama_set_n_threads(
24532428
n_threads is the number of threads used for generation (single token)
24542429
n_threads_batch is the number of threads used for prompt and batch processing (multiple tokens)
24552430
"""
2456-
...
24572431

24582432

24592433
# // Get the number of threads used for generation of a single token.
24602434
# LLAMA_API uint32_t llama_n_threads(struct llama_context * ctx);
24612435
@ctypes_function("llama_n_threads", [llama_context_p_ctypes], ctypes.c_uint32)
24622436
def llama_n_threads(ctx: llama_context_p, /) -> int:
24632437
"""Get the number of threads used for generation of a single token"""
2464-
...
24652438

24662439

24672440
# // Get the number of threads used for prompt and batch processing (multiple token).
24682441
# LLAMA_API uint32_t llama_n_threads_batch(struct llama_context * ctx);
24692442
@ctypes_function("llama_n_threads_batch", [llama_context_p_ctypes], ctypes.c_uint32)
24702443
def llama_n_threads_batch(ctx: llama_context_p, /) -> int:
24712444
"""Get the number of threads used for prompt and batch processing (multiple token)"""
2472-
...
24732445

24742446

24752447
# // Set whether the model is in embeddings mode or not
@@ -2479,7 +2451,6 @@ def llama_n_threads_batch(ctx: llama_context_p, /) -> int:
24792451
def llama_set_embeddings(ctx: llama_context_p, embeddings: bool, /):
24802452
"""Set whether the model is in embeddings model or not
24812453
If true, embeddings will be returned but logits will not"""
2482-
...
24832454

24842455

24852456
# // Set whether to use causal attention or not
@@ -2489,7 +2460,6 @@ def llama_set_embeddings(ctx: llama_context_p, embeddings: bool, /):
24892460
def llama_set_causal_attn(ctx: llama_context_p, causal_attn: bool, /):
24902461
"""Set whether to use causal attention or not
24912462
If set to true, the model will only attend to the past tokens"""
2492-
...
24932463

24942464

24952465
# // Set abort callback
@@ -2506,7 +2476,6 @@ def llama_set_abort_callback(
25062476
/,
25072477
):
25082478
"""Set abort callback"""
2509-
...
25102479

25112480

25122481
# // Wait until all computations are finished
@@ -2518,7 +2487,6 @@ def llama_synchronize(ctx: llama_context_p, /):
25182487
"""Wait until all computations are finished
25192488
This is automatically done when using one of the functions below to obtain the computation results
25202489
and is not necessary to call it explicitly in most cases"""
2521-
...
25222490

25232491

25242492
# // Token logits obtained from the last call to llama_decode()
@@ -2539,7 +2507,6 @@ def llama_get_logits(ctx: llama_context_p, /) -> CtypesArray[ctypes.c_float]:
25392507
25402508
Returns:
25412509
Pointer to the logits buffer of shape (n_tokens, n_vocab)"""
2542-
...
25432510

25442511

25452512
# // Logits for the ith token. For positive indices, Equivalent to:
@@ -2557,7 +2524,6 @@ def llama_get_logits_ith(
25572524
) -> CtypesArray[ctypes.c_float]:
25582525
"""Logits for the ith token. Equivalent to:
25592526
llama_get_logits(ctx) + i*n_vocab"""
2560-
...
25612527

25622528

25632529
# // Get all output token embeddings.
@@ -2573,7 +2539,6 @@ def llama_get_logits_ith(
25732539
def llama_get_embeddings(ctx: llama_context_p, /) -> CtypesArray[ctypes.c_float]:
25742540
"""Get the embeddings for the input
25752541
shape: [n_embd] (1-dimensional)"""
2576-
...
25772542

25782543

25792544
# // Get the embeddings for the ith token. For positive indices, Equivalent to:
@@ -2592,7 +2557,6 @@ def llama_get_embeddings_ith(
25922557
) -> CtypesArray[ctypes.c_float]:
25932558
"""Get the embeddings for the ith sequence
25942559
llama_get_embeddings(ctx) + i*n_embd"""
2595-
...
25962560

25972561

25982562
# // Get the embeddings for a sequence id

0 commit comments

Comments
 (0)