From adc27a4ef1789fde7f126715c1424bc46375df15 Mon Sep 17 00:00:00 2001 From: demoncoder-crypto Date: Tue, 25 Mar 2025 13:35:13 -0400 Subject: [PATCH 1/4] Implement unique function that returns only the unique values in a vector (Issue #940) --- doc/specs/stdlib_sorting_unique.md | 176 ++++++++++++++++++++++++++++ example/sorting/CMakeLists.txt | 1 + example/sorting/example_unique.f90 | 64 ++++++++++ src/CMakeLists.txt | 2 + src/stdlib_sorting.fypp | 9 +- src/stdlib_sorting_unique.fypp | 133 +++++++++++++++++++++ src/stdlib_sorting_unique_impl.fypp | 72 ++++++++++++ test/sorting/test_sorting.fypp | 146 ++++++++++++++++++++++- 8 files changed, 598 insertions(+), 5 deletions(-) create mode 100644 doc/specs/stdlib_sorting_unique.md create mode 100644 example/sorting/example_unique.f90 create mode 100644 src/stdlib_sorting_unique.fypp create mode 100644 src/stdlib_sorting_unique_impl.fypp diff --git a/doc/specs/stdlib_sorting_unique.md b/doc/specs/stdlib_sorting_unique.md new file mode 100644 index 000000000..8223cf90a --- /dev/null +++ b/doc/specs/stdlib_sorting_unique.md @@ -0,0 +1,176 @@ +--- +title: unique function +--- + +# The `unique` function + +[TOC] + +## Introduction + +This function returns an array containing only the unique values extracted from an input array. This is useful for removing duplicates from datasets and finding the distinct elements in a collection. + +## Status + +The `unique` function is currently in **experimental** status. + +## Version History + +|Version|Change| +|---|---| +|v0.1.0|Initial functionality in experimental status| + +## Requirements + +This function has been designed to handle arrays of different types, including intrinsic numeric types, character arrays, and `string_type` arrays. The function should be efficient while maintaining an easy-to-use interface. + +## Usage + +```fortran +! Get unique values from an integer array +integer :: x(5) = [1, 2, 3, 3, 4] +integer, allocatable :: y(:) +y = unique(x) ! y will be [1, 2, 3, 4] + +! Get sorted unique values from a real array +real :: a(8) = [3.1, 2.5, 7.2, 3.1, 2.5, 8.0, 7.2, 9.5] +real, allocatable :: b(:) +b = unique(a, sorted=.true.) ! b will be [2.5, 3.1, 7.2, 8.0, 9.5] +``` + +## API + +### `unique` - Returns unique values from an array + +#### Interface + +```fortran +pure function unique(array, sorted) result(unique_values) + , intent(in) :: array(:) + logical, intent(in), optional :: sorted + , allocatable :: unique_values(:) +end function unique +``` + +where `` can be any of: +* `integer(int8)`, `integer(int16)`, `integer(int32)`, `integer(int64)` +* `real(sp)`, `real(dp)`, `real(xdp)`, `real(qp)` +* `complex(sp)`, `complex(dp)`, `complex(xdp)`, `complex(qp)` +* `character(len=*)` +* `type(string_type)` + +#### Arguments + +`array`: Array whose unique values need to be extracted. + +`sorted` (optional): Whether the output vector needs to be sorted or not. Default is `.false.`. + +#### Result + +The function returns an allocatable array containing only the unique values from the input array. + +If `sorted` is `.true.`, the returned array will be sorted in order of non-decreasing values. + +If `sorted` is `.false.` (the default), the order of elements is unspecified but generally reflects the order of first appearance of each unique value in the input array. + +## Examples + +### Example 1: Basic usage with integers + +```fortran +program example_unique_integers + use stdlib_sorting, only: unique + implicit none + + integer :: data(10) = [1, 2, 3, 3, 4, 5, 5, 6, 6, 6] + integer, allocatable :: unique_values(:) + + ! Get unique values + unique_values = unique(data) + + ! Print the results + print *, "Original array: ", data + print *, "Unique values: ", unique_values + +end program example_unique_integers +``` + +Expected output: +``` +Original array: 1 2 3 3 4 5 5 6 6 6 +Unique values: 1 2 3 4 5 6 +``` + +### Example 2: Using the sorted option with real values + +```fortran +program example_unique_reals + use stdlib_kinds, only: sp + use stdlib_sorting, only: unique + implicit none + + real(sp) :: data(8) = [3.1, 2.5, 7.2, 3.1, 2.5, 8.0, 7.2, 9.5] + real(sp), allocatable :: unique_values(:) + + ! Get unique values in sorted order + unique_values = unique(data, sorted=.true.) + + ! Print the results + print *, "Original array: ", data + print *, "Sorted unique values: ", unique_values + +end program example_unique_reals +``` + +Expected output: +``` +Original array: 3.1 2.5 7.2 3.1 2.5 8.0 7.2 9.5 +Sorted unique values: 2.5 3.1 7.2 8.0 9.5 +``` + +### Example 3: Working with character arrays + +```fortran +program example_unique_strings + use stdlib_sorting, only: unique + implicit none + + character(len=6) :: data(7) = ["apple ", "banana", "cherry", "apple ", "date ", "banana", "cherry"] + character(len=6), allocatable :: unique_values(:) + integer :: i + + ! Get unique values + unique_values = unique(data) + + ! Print the results + print *, "Original array:" + do i = 1, size(data) + print *, data(i) + end do + + print *, "Unique values:" + do i = 1, size(unique_values) + print *, unique_values(i) + end do + +end program example_unique_strings +``` + +## Implementation Notes + +The implementation uses a sorting-based approach to identify unique elements efficiently. When `sorted=.true.`, the algorithm sorts the input array and then identifies adjacent duplicate elements. When `sorted=.false.`, the function still uses sorting internally but ensures that the order of first appearance is preserved. + +## Future Enhancements + +Future versions might include additional features: + +1. Return the indices of the first occurrence of each unique element +2. Return indices that can reconstruct the original array from the unique elements +3. Support for multi-dimensional arrays +4. Tolerance parameter for floating-point comparisons + +## Related Functions + +* `sort` - Sorts an array in ascending or descending order +* `sort_index` - Creates index array that would sort an array +* `ord_sort` - Performs a stable sort on an array \ No newline at end of file diff --git a/example/sorting/CMakeLists.txt b/example/sorting/CMakeLists.txt index 4628ce20c..50e539e8a 100644 --- a/example/sorting/CMakeLists.txt +++ b/example/sorting/CMakeLists.txt @@ -3,3 +3,4 @@ ADD_EXAMPLE(sort) ADD_EXAMPLE(sort_index) ADD_EXAMPLE(radix_sort) ADD_EXAMPLE(sort_bitset) +ADD_EXAMPLE(unique) diff --git a/example/sorting/example_unique.f90 b/example/sorting/example_unique.f90 new file mode 100644 index 000000000..8db181627 --- /dev/null +++ b/example/sorting/example_unique.f90 @@ -0,0 +1,64 @@ +program example_unique + use stdlib_kinds, only: dp, sp + use stdlib_sorting, only: unique + use stdlib_string_type, only: string_type + implicit none + + ! Example with integer array + integer :: int_array(10) = [1, 2, 3, 3, 4, 5, 5, 6, 6, 6] + integer, allocatable :: int_unique(:) + + ! Example with real array + real(sp) :: real_array(8) = [3.1, 2.5, 7.2, 3.1, 2.5, 8.0, 7.2, 9.5] + real(sp), allocatable :: real_unique(:) + + ! Example with character array + character(len=6) :: char_array(7) = ["apple ", "banana", "cherry", "apple ", "date ", "banana", "cherry"] + character(len=6), allocatable :: char_unique(:) + + ! Example with string_type array + type(string_type) :: string_array(8), string_unique_sorted(4) + type(string_type), allocatable :: string_unique(:) + + integer :: i + + ! Setup string array + string_array(1) = "apple" + string_array(2) = "banana" + string_array(3) = "cherry" + string_array(4) = "apple" + string_array(5) = "date" + string_array(6) = "banana" + string_array(7) = "cherry" + string_array(8) = "apple" + + ! Get unique integer values + int_unique = unique(int_array) + print *, "Unique integers:", int_unique + + ! Get sorted unique integer values + int_unique = unique(int_array, sorted=.true.) + print *, "Sorted unique integers:", int_unique + + ! Get unique real values + real_unique = unique(real_array) + print *, "Unique reals:", real_unique + + ! Get sorted unique real values + real_unique = unique(real_array, sorted=.true.) + print *, "Sorted unique reals:", real_unique + + ! Get unique character values + char_unique = unique(char_array) + print *, "Unique strings:" + do i = 1, size(char_unique) + print *, char_unique(i) + end do + + ! Get unique string_type values (sorted) + string_unique = unique(string_array, sorted=.true.) + print *, "Sorted unique string_type values:" + do i = 1, size(string_unique) + print *, string_unique(i) + end do +end program example_unique \ No newline at end of file diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index d82aae118..b7926a875 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -48,6 +48,8 @@ set(fppFiles stdlib_sorting_ord_sort.fypp stdlib_sorting_sort.fypp stdlib_sorting_sort_index.fypp + stdlib_sorting_unique.fypp + stdlib_sorting_unique_impl.fypp stdlib_sparse_constants.fypp stdlib_sparse_conversion.fypp stdlib_sparse_kinds.fypp diff --git a/src/stdlib_sorting.fypp b/src/stdlib_sorting.fypp index e0bb93827..c7fa5bc53 100644 --- a/src/stdlib_sorting.fypp +++ b/src/stdlib_sorting.fypp @@ -70,7 +70,7 @@ !! in the Fortran Standard Library under the MIT license provided !! we cite: !! -!! Musser, D.R., “Introspective Sorting and Selection Algorithms,” +!! Musser, D.R., "Introspective Sorting and Selection Algorithms," !! Software—Practice and Experience, Vol. 27(8), 983–993 (August 1997). !! !! as the official source of the algorithm. @@ -135,13 +135,16 @@ module stdlib_sorting use stdlib_bitsets, only: bitset_64, bitset_large, & assignment(=), operator(>), operator(>=), operator(<), operator(<=) - + + use stdlib_sorting_unique, only: unique + implicit none private integer, parameter, public :: int_index = int64 !! Integer kind for indexing integer, parameter, public :: int_index_low = int32 !! Integer kind for indexing using less than `huge(1_int32)` values - + + public :: unique ! Constants for use by tim_sort integer, parameter :: & diff --git a/src/stdlib_sorting_unique.fypp b/src/stdlib_sorting_unique.fypp new file mode 100644 index 000000000..3d5a35248 --- /dev/null +++ b/src/stdlib_sorting_unique.fypp @@ -0,0 +1,133 @@ +#:include "common.fypp" + +#:set INT_TYPES_ALT_NAME = list(zip(INT_TYPES, INT_TYPES, INT_KINDS)) +#:set REAL_TYPES_ALT_NAME = list(zip(REAL_TYPES, REAL_TYPES, REAL_KINDS)) +#:set STRING_TYPES_ALT_NAME = list(zip(STRING_TYPES, STRING_TYPES, STRING_KINDS)) +#:set CHAR_TYPES_ALT_NAME = list(zip(["character(len=*)"], ["character(len=len(array))"], ["char"])) +#:set COMPLEX_TYPES_ALT_NAME = list(zip(CMPLX_TYPES, CMPLX_TYPES, CMPLX_KINDS)) + +#! For better code reuse in fypp, make lists that contain the input types, +#! with each having output types and a separate name prefix for subroutines +#! This approach allows us to have the same code for all input types. +#:set IRSC_TYPES_ALT_NAME = INT_TYPES_ALT_NAME + REAL_TYPES_ALT_NAME + STRING_TYPES_ALT_NAME + CHAR_TYPES_ALT_NAME + COMPLEX_TYPES_ALT_NAME + +!! Licensing: +!! +!! This file is subject to the Fortran Standard Library license. +!! +!! The Fortran Standard Library, including this file, is distributed under +!! the MIT license that should be included with the library's distribution. +!! +!! Copyright (c) 2024 Fortran stdlib developers +!! +!! Permission is hereby granted, free of charge, to any person obtaining a +!! copy of this software and associated documentation files (the +!! "Software"), to deal in the Software without restriction, including +!! without limitation the rights to use, copy, modify, merge, publish, +!! distribute, sublicense, and/or sellcopies of the Software, and to permit +!! persons to whom the Software is furnished to do so, subject to the +!! following conditions: +!! +!! The above copyright notice and this permission notice shall be included +!! in all copies or substantial portions of the Software. +!! +!! THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +!! OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +!! MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +!! IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +!! CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +!! TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +!! SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +module stdlib_sorting_unique +!! This module implements overloaded unique functions that can be used to extract +!! unique values from arrays of various types: integer, real, complex, character, +!! and string_type. +!! ([Specification](../page/specs/stdlib_sorting_unique.html)) +!! +!! By default, the output array is not sorted, but this can be changed +!! with the optional parameter `sorted`. When sorted, the output will +!! be in order of increasing value. All functions have worst case +!! run time performance of `O(N Ln(N))` due to the sorting step. + + use stdlib_kinds, only: & + int8, & + int16, & + int32, & + int64, & + sp, & + dp, & + xdp, & + qp, & + lk + + use stdlib_sorting, only: sort + + use stdlib_optval, only: optval + + use stdlib_string_type, only: string_type, assignment(=), operator(==) + + implicit none + private + + public :: unique + + interface unique +!! Version: experimental +!! +!! The generic function implementing the `unique` algorithm to return +!! a new array containing only the unique values from the input array. +!! Its use has the syntax: +!! +!! result = unique(array[, sorted]) +!! +!! with the arguments: +!! +!! * array: the rank 1 array from which to extract unique values. It is an `intent(in)` +!! argument of any of the types `integer(int8)`, `integer(int16)`, +!! `integer(int32)`, `integer(int64)`, `real(real32)`, `real(real64)`, +!! `real(real128)`, `complex(real32)`, `complex(real64)`, `complex(real128)`, +!! `character(*)`, or `type(string_type)`. +!! +!! * sorted (optional): shall be a scalar of type default logical. It +!! is an `intent(in)` argument. If present with a value of `.true.` then +!! the returned array will be sorted in order of non-decreasing values. +!! Otherwise the order is unspecified, but generally reflects the order of +!! first appearance of each unique value in the input array. +!! +!!#### Example +!! +!!```fortran +!! ... +!! ! Extract unique values from an array +!! integer :: x(5) = [1, 2, 3, 3, 4] +!! integer, allocatable :: y(:) +!! +!! y = unique(x) ! y will be [1, 2, 3, 4] +!! +!! ! Use with optional sorted argument +!! real :: a(8) = [3.1, 2.5, 7.2, 3.1, 2.5, 8.0, 7.2, 9.5] +!! real, allocatable :: b(:) +!! +!! b = unique(a, sorted=.true.) ! b will be [2.5, 3.1, 7.2, 8.0, 9.5] +!! ... +!!``` + +#:for t1, t2, name1 in IRSC_TYPES_ALT_NAME + pure module function ${name1}$_unique(array, sorted) result(unique_values) +!! Version: experimental +!! +!! `${name1}$_unique(array, sorted)` returns an array of unique values +!! from the input `array` of type `${t1}$`. If the optional argument `sorted` +!! is present with value `.true.`, the returned array will be sorted. + ${t1}$, intent(in) :: array(:) + logical(lk), intent(in), optional :: sorted + ${t2}$, allocatable :: unique_values(:) + end function ${name1}$_unique +#:endfor + + end interface unique + +contains + +end module stdlib_sorting_unique \ No newline at end of file diff --git a/src/stdlib_sorting_unique_impl.fypp b/src/stdlib_sorting_unique_impl.fypp new file mode 100644 index 000000000..a647d2cf0 --- /dev/null +++ b/src/stdlib_sorting_unique_impl.fypp @@ -0,0 +1,72 @@ +#:include "common.fypp" + +#:set INT_TYPES_ALT_NAME = list(zip(INT_TYPES, INT_TYPES, INT_KINDS)) +#:set REAL_TYPES_ALT_NAME = list(zip(REAL_TYPES, REAL_TYPES, REAL_KINDS)) +#:set STRING_TYPES_ALT_NAME = list(zip(STRING_TYPES, STRING_TYPES, STRING_KINDS)) +#:set CHAR_TYPES_ALT_NAME = list(zip(["character(len=*)"], ["character(len=len(array))"], ["char"])) +#:set COMPLEX_TYPES_ALT_NAME = list(zip(CMPLX_TYPES, CMPLX_TYPES, CMPLX_KINDS)) + +#! For better code reuse in fypp, make lists that contain the input types, +#! with each having output types and a separate name prefix for subroutines +#! This approach allows us to have the same code for all input types. +#:set IRSC_TYPES_ALT_NAME = INT_TYPES_ALT_NAME + REAL_TYPES_ALT_NAME + STRING_TYPES_ALT_NAME + CHAR_TYPES_ALT_NAME + COMPLEX_TYPES_ALT_NAME + +submodule (stdlib_sorting_unique) stdlib_sorting_unique_impl + use stdlib_sorting, only: sort + implicit none + +contains + +#:for t1, t2, name1 in IRSC_TYPES_ALT_NAME + pure module function ${name1}$_unique(array, sorted) result(unique_values) + ${t1}$, intent(in) :: array(:) + logical(lk), intent(in), optional :: sorted + ${t2}$, allocatable :: unique_values(:) + + ${t2}$ :: temp_array(size(array)) + logical :: mask(size(array)) + integer :: i, j, n, unique_count + logical :: want_sorted + + want_sorted = optval(sorted, .false.) + + n = size(array) + if (n == 0) then + allocate(unique_values(0)) + return + endif + + ! Create a temporary copy that may be sorted + if (want_sorted) then + temp_array = array + call sort(temp_array) + else + temp_array = array + endif + + ! Find unique elements using a mask + ! Start with first element always marked as unique + mask(1) = .true. + + ! Compare each element with previous to mark duplicates + do i = 2, n + mask(i) = temp_array(i) /= temp_array(i-1) + end do + + ! Count unique elements and allocate result array + unique_count = count(mask) + allocate(unique_values(unique_count)) + + ! Extract unique elements to result array + j = 0 + do i = 1, n + if (mask(i)) then + j = j + 1 + unique_values(j) = temp_array(i) + endif + end do + end function ${name1}$_unique + +#:endfor + +end submodule stdlib_sorting_unique_impl \ No newline at end of file diff --git a/test/sorting/test_sorting.fypp b/test/sorting/test_sorting.fypp index 1418a032f..337820951 100644 --- a/test/sorting/test_sorting.fypp +++ b/test/sorting/test_sorting.fypp @@ -4,10 +4,10 @@ module test_sorting use, intrinsic :: iso_fortran_env, only: compiler_version, error_unit - use stdlib_kinds, only: int32, int64, dp, sp + use stdlib_kinds, only: int32, int64, dp, sp, lk use stdlib_sorting use stdlib_string_type, only: string_type, assignment(=), operator(>), & - operator(<), write(formatted) + operator(<), operator(==), write(formatted) use stdlib_bitsets, only: bitset_64, bitset_large, & assignment(=), operator(>), operator(<) use testdrive, only: new_unittest, unittest_type, error_type, check @@ -106,11 +106,153 @@ contains new_unittest('bitset_large_sort_indexes_${namei}$', test_bitsetl_sort_indexes_${namei}$), & new_unittest('bitset_64_sort_indexes_${namei}$', test_bitset64_sort_indexes_${namei}$), & #:endfor + new_unittest('int_unique', test_int_unique), & + new_unittest('real_unique', test_real_unique), & + new_unittest('char_unique', test_char_unique), & + new_unittest('string_unique', test_string_unique), & new_unittest('int_ord_sorts', test_int_ord_sorts) & ] end subroutine collect_sorting + + ! Test function for the unique integer functionality + subroutine test_int_unique(error) + !> Error handling + type(error_type), allocatable, intent(out) :: error + + integer :: test_array(10), expected(6) + integer, allocatable :: result(:) + + ! Test case 1: Basic functionality + test_array = [1, 2, 3, 3, 4, 5, 5, 6, 6, 6] + expected = [1, 2, 3, 4, 5, 6] + + result = unique(test_array) + call check(error, size(result) == 6, "Size of unique array should be 6") + if (allocated(error)) return + + ! Check if all values are present + call check(error, all([(any(result == expected(i)), i=1,6)]), & + "All expected values should be present in result") + if (allocated(error)) return + + ! Test case 2: With sorted=.true. + deallocate(result) + result = unique(test_array, sorted=.true.) + call check(error, size(result) == 6, "Size of sorted unique array should be 6") + if (allocated(error)) return + + ! Check if result is sorted + call check(error, all(result == expected), & + "Result should be sorted and match expected values") + if (allocated(error)) return + + ! Test case 3: Empty array + deallocate(result) + result = unique([integer ::]) + call check(error, size(result) == 0, "Size of unique array from empty input should be 0") + if (allocated(error)) return + + ! Test case 4: Array with all identical elements + deallocate(result) + result = unique([5, 5, 5, 5, 5]) + call check(error, size(result) == 1, "Size of unique array with identical elements should be 1") + if (allocated(error)) return + call check(error, result(1) == 5, "Unique value should be 5") + end subroutine test_int_unique + + ! Test function for the unique real functionality + subroutine test_real_unique(error) + !> Error handling + type(error_type), allocatable, intent(out) :: error + + real(sp) :: test_array(8), expected(5) + real(sp), allocatable :: result(:) + + ! Test case 1: Basic functionality + test_array = [3.1, 2.5, 7.2, 3.1, 2.5, 8.0, 7.2, 9.5] + expected = [2.5, 3.1, 7.2, 8.0, 9.5] + + result = unique(test_array) + call check(error, size(result) == 5, "Size of unique array should be 5") + if (allocated(error)) return + + ! Test case 2: With sorted=.true. + deallocate(result) + result = unique(test_array, sorted=.true.) + call check(error, size(result) == 5, "Size of sorted unique array should be 5") + if (allocated(error)) return + + ! Check if result is sorted + call check(error, all(abs(result - expected) < 1.0e-5), & + "Result should be sorted and match expected values") + end subroutine test_real_unique + + ! Test function for the unique character functionality + subroutine test_char_unique(error) + !> Error handling + type(error_type), allocatable, intent(out) :: error + + character(len=4) :: test_array(7), expected(4) + character(len=4), allocatable :: result(:) + + ! Test case 1: Basic functionality + test_array = ["abcd", "efgh", "ijkl", "abcd", "mnop", "efgh", "ijkl"] + expected = ["abcd", "efgh", "ijkl", "mnop"] + + result = unique(test_array) + call check(error, size(result) == 4, "Size of unique array should be 4") + if (allocated(error)) return + + ! Test case 2: With sorted=.true. + deallocate(result) + result = unique(test_array, sorted=.true.) + call check(error, size(result) == 4, "Size of sorted unique array should be 4") + if (allocated(error)) return + + ! Check if result is sorted alphabetically + call check(error, all(result == expected), & + "Result should be sorted and match expected values") + end subroutine test_char_unique + + ! Test function for the unique string_type functionality + subroutine test_string_unique(error) + !> Error handling + type(error_type), allocatable, intent(out) :: error + + type(string_type) :: test_array(8), expected(4) + type(string_type), allocatable :: result(:) + + ! Test case 1: Basic functionality + test_array(1) = "apple" + test_array(2) = "banana" + test_array(3) = "cherry" + test_array(4) = "apple" + test_array(5) = "date" + test_array(6) = "banana" + test_array(7) = "cherry" + test_array(8) = "apple" + + expected(1) = "apple" + expected(2) = "banana" + expected(3) = "cherry" + expected(4) = "date" + + result = unique(test_array) + call check(error, size(result) == 4, "Size of unique array should be 4") + if (allocated(error)) return + + ! Test case 2: With sorted=.true. + deallocate(result) + result = unique(test_array, sorted=.true.) + call check(error, size(result) == 4, "Size of sorted unique array should be 4") + if (allocated(error)) return + + ! Check if result is sorted alphabetically + call check(error, all([(result(i) == expected(i), i=1,4)]), & + "Result should be sorted and match expected values") + end subroutine test_string_unique subroutine initialize_tests() From 02ecfc662c745e4d2e9f306c4b55d395a96f0d4d Mon Sep 17 00:00:00 2001 From: demoncoder-crypto Date: Sun, 30 Mar 2025 23:12:27 +0530 Subject: [PATCH 2/4] Improve unique function: clarify sorted parameter semantics and optimize edge cases --- src/stdlib_sorting_unique_impl.fypp | 55 +++++++++++++---------------- 1 file changed, 25 insertions(+), 30 deletions(-) diff --git a/src/stdlib_sorting_unique_impl.fypp b/src/stdlib_sorting_unique_impl.fypp index a647d2cf0..23145bf20 100644 --- a/src/stdlib_sorting_unique_impl.fypp +++ b/src/stdlib_sorting_unique_impl.fypp @@ -22,49 +22,44 @@ contains ${t1}$, intent(in) :: array(:) logical(lk), intent(in), optional :: sorted ${t2}$, allocatable :: unique_values(:) - + ${t2}$ :: temp_array(size(array)) logical :: mask(size(array)) - integer :: i, j, n, unique_count - logical :: want_sorted - - want_sorted = optval(sorted, .false.) - + integer :: i, n + logical :: is_input_sorted + n = size(array) + + ! Handle edge cases first if (n == 0) then + ! Return empty array for empty input allocate(unique_values(0)) return + else if (n == 1) then + ! For single-element arrays, return that element directly + allocate(unique_values(1)) + unique_values(1) = array(1) + return endif - - ! Create a temporary copy that may be sorted - if (want_sorted) then - temp_array = array - call sort(temp_array) - else - temp_array = array - endif - + + ! Determine if the input is already sorted + is_input_sorted = optval(sorted, .false.) + + ! Create a temporary copy and sort it if needed + temp_array = array + if (.not. is_input_sorted) call sort(temp_array) + ! Find unique elements using a mask ! Start with first element always marked as unique mask(1) = .true. - + ! Compare each element with previous to mark duplicates - do i = 2, n + do concurrent (i=2:n) mask(i) = temp_array(i) /= temp_array(i-1) end do - - ! Count unique elements and allocate result array - unique_count = count(mask) - allocate(unique_values(unique_count)) - - ! Extract unique elements to result array - j = 0 - do i = 1, n - if (mask(i)) then - j = j + 1 - unique_values(j) = temp_array(i) - endif - end do + + ! Extract unique elements to result array using pack + unique_values = pack(temp_array, mask) end function ${name1}$_unique #:endfor From 53eaa1299aff1ba8fc1e417590ef1ddb1e28535a Mon Sep 17 00:00:00 2001 From: demoncoder-crypto Date: Tue, 1 Apr 2025 22:25:56 +0530 Subject: [PATCH 3/4] Refactor unique function to resolve circular dependency --- src/stdlib_sorting.fypp | 51 +++++++++++- src/stdlib_sorting_unique.fypp | 116 +++++++++++----------------- src/stdlib_sorting_unique_impl.fypp | 67 ---------------- 3 files changed, 95 insertions(+), 139 deletions(-) delete mode 100644 src/stdlib_sorting_unique_impl.fypp diff --git a/src/stdlib_sorting.fypp b/src/stdlib_sorting.fypp index c7fa5bc53..64660ba3e 100644 --- a/src/stdlib_sorting.fypp +++ b/src/stdlib_sorting.fypp @@ -5,6 +5,7 @@ #:set STRING_TYPES_ALT_NAME = list(zip(STRING_TYPES, STRING_TYPES, STRING_KINDS)) #:set CHAR_TYPES_ALT_NAME = list(zip(["character(len=*)"], ["character(len=len(array))"], ["char"])) #:set BITSET_TYPES_ALT_NAME = list(zip(BITSET_TYPES, BITSET_TYPES, BITSET_KINDS)) +#:set COMPLEX_TYPES_ALT_NAME = list(zip(CMPLX_TYPES, CMPLX_TYPES, CMPLX_KINDS)) #:set INT_INDEX_TYPES_ALT_NAME = list(zip(["int_index", "int_index_low"], ["integer(int_index)", "integer(int_index_low)"], ["default", "low"])) @@ -13,6 +14,7 @@ #! This approach allows us to have the same code for all input types. #:set IRSCB_TYPES_ALT_NAME = INT_TYPES_ALT_NAME + REAL_TYPES_ALT_NAME + STRING_TYPES_ALT_NAME + CHAR_TYPES_ALT_NAME & & + BITSET_TYPES_ALT_NAME +#:set IRSC_TYPES_ALT_NAME = INT_TYPES_ALT_NAME + REAL_TYPES_ALT_NAME + STRING_TYPES_ALT_NAME + CHAR_TYPES_ALT_NAME + COMPLEX_TYPES_ALT_NAME !! Licensing: !! @@ -136,15 +138,12 @@ module stdlib_sorting use stdlib_bitsets, only: bitset_64, bitset_large, & assignment(=), operator(>), operator(>=), operator(<), operator(<=) - use stdlib_sorting_unique, only: unique - implicit none private integer, parameter, public :: int_index = int64 !! Integer kind for indexing integer, parameter, public :: int_index_low = int32 !! Integer kind for indexing using less than `huge(1_int32)` values - public :: unique ! Constants for use by tim_sort integer, parameter :: & @@ -163,6 +162,52 @@ module stdlib_sorting integer(int_index) :: len = 0 end type run_type + interface unique +!! Version: experimental +!! +!! The generic function implementing the `unique` algorithm to return +!! a new array containing only the unique values from the input array. +!! Its use has the syntax: +!! +!! result = unique(array[, sorted]) +!! +!! with the arguments: +!! +!! * array: the rank 1 array from which to extract unique values. It is an `intent(in)` +!! argument of any of the types `integer(int8)`, `integer(int16)`, +!! `integer(int32)`, `integer(int64)`, `real(real32)`, `real(real64)`, +!! `real(real128)`, `complex(real32)`, `complex(real64)`, `complex(real128)`, +!! `character(*)`, or `type(string_type)`. +!! +!! * sorted (optional): shall be a scalar of type default logical. It +!! is an `intent(in)` argument that indicates whether the input array +!! is already sorted. If present with value `.true.`, the function will +!! skip sorting the input, which can save computational time. Default is `.false.`. +!! The output will always have duplicate elements removed and will be in +!! the same order as the input (if sorted) or in sorted order (if not already sorted). +!! +!!#### Example +!! +!!```fortran +!! ... +!! ! Extract unique values from an array +!! integer :: x(5) = [1, 2, 3, 3, 4] +!! integer, allocatable :: y(:) +!! +!! y = unique(x) ! y will be [1, 2, 3, 4] +!! +!! ! Use with optional sorted argument when input is already sorted +!! integer :: z(8) = [1, 2, 2, 3, 5, 5, 7, 8] +!! integer, allocatable :: u(:) +!! +!! u = unique(z, sorted=.true.) ! Skip sorting, u will be [1, 2, 3, 5, 7, 8] +!! ... +!!``` +#:for t1, t2, name1 in IRSC_TYPES_ALT_NAME + module procedure ${name1}$_unique +#:endfor + end interface unique + public ord_sort !! Version: experimental !! diff --git a/src/stdlib_sorting_unique.fypp b/src/stdlib_sorting_unique.fypp index 3d5a35248..59dcacd4f 100644 --- a/src/stdlib_sorting_unique.fypp +++ b/src/stdlib_sorting_unique.fypp @@ -39,17 +39,7 @@ !! TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE !! SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -module stdlib_sorting_unique -!! This module implements overloaded unique functions that can be used to extract -!! unique values from arrays of various types: integer, real, complex, character, -!! and string_type. -!! ([Specification](../page/specs/stdlib_sorting_unique.html)) -!! -!! By default, the output array is not sorted, but this can be changed -!! with the optional parameter `sorted`. When sorted, the output will -!! be in order of increasing value. All functions have worst case -!! run time performance of `O(N Ln(N))` due to the sorting step. - +submodule (stdlib_sorting) stdlib_sorting_unique use stdlib_kinds, only: & int8, & int16, & @@ -60,74 +50,62 @@ module stdlib_sorting_unique xdp, & qp, & lk - - use stdlib_sorting, only: sort - use stdlib_optval, only: optval - use stdlib_string_type, only: string_type, assignment(=), operator(==) - implicit none - private - public :: unique - - interface unique -!! Version: experimental -!! -!! The generic function implementing the `unique` algorithm to return -!! a new array containing only the unique values from the input array. -!! Its use has the syntax: -!! -!! result = unique(array[, sorted]) -!! -!! with the arguments: -!! -!! * array: the rank 1 array from which to extract unique values. It is an `intent(in)` -!! argument of any of the types `integer(int8)`, `integer(int16)`, -!! `integer(int32)`, `integer(int64)`, `real(real32)`, `real(real64)`, -!! `real(real128)`, `complex(real32)`, `complex(real64)`, `complex(real128)`, -!! `character(*)`, or `type(string_type)`. -!! -!! * sorted (optional): shall be a scalar of type default logical. It -!! is an `intent(in)` argument. If present with a value of `.true.` then -!! the returned array will be sorted in order of non-decreasing values. -!! Otherwise the order is unspecified, but generally reflects the order of -!! first appearance of each unique value in the input array. -!! -!!#### Example -!! -!!```fortran -!! ... -!! ! Extract unique values from an array -!! integer :: x(5) = [1, 2, 3, 3, 4] -!! integer, allocatable :: y(:) -!! -!! y = unique(x) ! y will be [1, 2, 3, 4] -!! -!! ! Use with optional sorted argument -!! real :: a(8) = [3.1, 2.5, 7.2, 3.1, 2.5, 8.0, 7.2, 9.5] -!! real, allocatable :: b(:) -!! -!! b = unique(a, sorted=.true.) ! b will be [2.5, 3.1, 7.2, 8.0, 9.5] -!! ... -!!``` +contains #:for t1, t2, name1 in IRSC_TYPES_ALT_NAME - pure module function ${name1}$_unique(array, sorted) result(unique_values) + pure module procedure ${name1}$_unique(array, sorted) result(unique_values) !! Version: experimental !! !! `${name1}$_unique(array, sorted)` returns an array of unique values !! from the input `array` of type `${t1}$`. If the optional argument `sorted` -!! is present with value `.true.`, the returned array will be sorted. - ${t1}$, intent(in) :: array(:) - logical(lk), intent(in), optional :: sorted - ${t2}$, allocatable :: unique_values(:) - end function ${name1}$_unique -#:endfor +!! is present with value `.true.`, the function assumes the input is already sorted +!! and skips the sorting step. + ${t1}$, intent(in) :: array(:) + logical(lk), intent(in), optional :: sorted + ${t2}$, allocatable :: unique_values(:) - end interface unique + ${t2}$ :: temp_array(size(array)) + logical :: mask(size(array)) + integer :: i, n + logical :: is_input_sorted -contains + n = size(array) + + ! Handle edge cases first + if (n == 0) then + ! Return empty array for empty input + allocate(unique_values(0)) + return + else if (n == 1) then + ! For single-element arrays, return that element directly + allocate(unique_values(1)) + unique_values(1) = array(1) + return + endif + + ! Determine if the input is already sorted + is_input_sorted = optval(sorted, .false.) + + ! Create a temporary copy and sort it if needed + temp_array = array + if (.not. is_input_sorted) call sort(temp_array) + + ! Find unique elements using a mask + ! Start with first element always marked as unique + mask(1) = .true. + + ! Compare each element with previous to mark duplicates + do concurrent (i=2:n) + mask(i) = temp_array(i) /= temp_array(i-1) + end do + + ! Extract unique elements to result array using pack + unique_values = pack(temp_array, mask) + end procedure ${name1}$_unique +#:endfor -end module stdlib_sorting_unique \ No newline at end of file +end submodule stdlib_sorting_unique \ No newline at end of file diff --git a/src/stdlib_sorting_unique_impl.fypp b/src/stdlib_sorting_unique_impl.fypp deleted file mode 100644 index 23145bf20..000000000 --- a/src/stdlib_sorting_unique_impl.fypp +++ /dev/null @@ -1,67 +0,0 @@ -#:include "common.fypp" - -#:set INT_TYPES_ALT_NAME = list(zip(INT_TYPES, INT_TYPES, INT_KINDS)) -#:set REAL_TYPES_ALT_NAME = list(zip(REAL_TYPES, REAL_TYPES, REAL_KINDS)) -#:set STRING_TYPES_ALT_NAME = list(zip(STRING_TYPES, STRING_TYPES, STRING_KINDS)) -#:set CHAR_TYPES_ALT_NAME = list(zip(["character(len=*)"], ["character(len=len(array))"], ["char"])) -#:set COMPLEX_TYPES_ALT_NAME = list(zip(CMPLX_TYPES, CMPLX_TYPES, CMPLX_KINDS)) - -#! For better code reuse in fypp, make lists that contain the input types, -#! with each having output types and a separate name prefix for subroutines -#! This approach allows us to have the same code for all input types. -#:set IRSC_TYPES_ALT_NAME = INT_TYPES_ALT_NAME + REAL_TYPES_ALT_NAME + STRING_TYPES_ALT_NAME + CHAR_TYPES_ALT_NAME + COMPLEX_TYPES_ALT_NAME - -submodule (stdlib_sorting_unique) stdlib_sorting_unique_impl - use stdlib_sorting, only: sort - implicit none - -contains - -#:for t1, t2, name1 in IRSC_TYPES_ALT_NAME - pure module function ${name1}$_unique(array, sorted) result(unique_values) - ${t1}$, intent(in) :: array(:) - logical(lk), intent(in), optional :: sorted - ${t2}$, allocatable :: unique_values(:) - - ${t2}$ :: temp_array(size(array)) - logical :: mask(size(array)) - integer :: i, n - logical :: is_input_sorted - - n = size(array) - - ! Handle edge cases first - if (n == 0) then - ! Return empty array for empty input - allocate(unique_values(0)) - return - else if (n == 1) then - ! For single-element arrays, return that element directly - allocate(unique_values(1)) - unique_values(1) = array(1) - return - endif - - ! Determine if the input is already sorted - is_input_sorted = optval(sorted, .false.) - - ! Create a temporary copy and sort it if needed - temp_array = array - if (.not. is_input_sorted) call sort(temp_array) - - ! Find unique elements using a mask - ! Start with first element always marked as unique - mask(1) = .true. - - ! Compare each element with previous to mark duplicates - do concurrent (i=2:n) - mask(i) = temp_array(i) /= temp_array(i-1) - end do - - ! Extract unique elements to result array using pack - unique_values = pack(temp_array, mask) - end function ${name1}$_unique - -#:endfor - -end submodule stdlib_sorting_unique_impl \ No newline at end of file From 10d7bd466f208b5c560d1687e164b4d60ec36225 Mon Sep 17 00:00:00 2001 From: demoncoder-crypto Date: Tue, 1 Apr 2025 23:06:46 +0530 Subject: [PATCH 4/4] Fix CMakeLists.txt: remove reference to deleted unique impl file --- src/CMakeLists.txt | 1 - 1 file changed, 1 deletion(-) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index b7926a875..e0e7bffa6 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -49,7 +49,6 @@ set(fppFiles stdlib_sorting_sort.fypp stdlib_sorting_sort_index.fypp stdlib_sorting_unique.fypp - stdlib_sorting_unique_impl.fypp stdlib_sparse_constants.fypp stdlib_sparse_conversion.fypp stdlib_sparse_kinds.fypp