Skip to content

Commit

Permalink
Merge pull request #297 from poseidon-framework/richer_list_apis
Browse files Browse the repository at this point in the history
new option to output all janno columns in list
  • Loading branch information
stschiff authored May 31, 2024
2 parents 466870a + fc9cbdc commit c6a765e
Show file tree
Hide file tree
Showing 11 changed files with 112 additions and 29 deletions.
3 changes: 3 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
- V 1.5.1.0:
- A new option `list --individuals --fullJanno` adds all standard columns from the Janno to the per-individual output.
- A new API option `/individuals?additionalJannoColumns=ALL` triggers the same behaviour for the Web API.
- V 1.5.0.1: Changed the release pipeline: `trident-macOS` was replaced by `trident-macOS-X64` and `trident-macOS-ARM64`.
- V 1.5.0.0
- Removed Josiah Carberry from `newPackageTemplate`, so that he doesn't get added any more to new packages created by `init` and `forge` - the contributor field is missing in the output of these commands now.
Expand Down
2 changes: 1 addition & 1 deletion poseidon-hs.cabal
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
name: poseidon-hs
version: 1.5.0.1
version: 1.5.1.0
synopsis: A package with tools for working with Poseidon Genotype Data
description: The tools in this package read and analyse Poseidon-formatted genotype databases, a modular system for storing genotype data from thousands of individuals.
license: MIT
Expand Down
38 changes: 26 additions & 12 deletions src/Poseidon/CLI/List.hs
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,8 @@ import Poseidon.Package (PackageReadOptions (..),
getExtendedIndividualInfo,
packagesToPackageInfos,
readPoseidonPackageCollection)
import Poseidon.ServerClient (ApiReturnData (..),
import Poseidon.ServerClient (AddJannoColSpec (..),
ApiReturnData (..),
ArchiveEndpoint (..),
ExtendedIndividualInfo (..),
GroupInfo (..), PackageInfo (..),
Expand All @@ -30,15 +31,15 @@ data ListOptions = ListOptions
{ _listRepoLocation :: RepoLocationSpec -- ^ the list of base directories to search for packages
, _listListEntity :: ListEntity -- ^ what to list
, _listRawOutput :: Bool -- ^ whether to output raw TSV instead of a nicely formatted table
, _listOnlyLatest :: Bool
, _listOnlyLatest :: Bool -- ^ whether to show only latest versions of packages
}

data RepoLocationSpec = RepoLocal [FilePath] | RepoRemote ArchiveEndpoint

-- | A datatype to represent the options what to list
data ListEntity = ListPackages
| ListGroups
| ListIndividuals [String]
| ListIndividuals AddJannoColSpec

-- | The main function running the list command
runList :: ListOptions -> PoseidonIO ()
Expand Down Expand Up @@ -89,25 +90,38 @@ runList (ListOptions repoLocation listEntity rawOutput onlyLatest) = do
True <- return (not onlyLatest || isLatest)
return [groupName, getPacName gi, showMaybeVersion (getPacVersion gi), show isLatest, show nrInds]
return (tableH, tableB)
ListIndividuals moreJannoColumns -> do
ListIndividuals addJannoColSpec -> do
extIndInfos <- case repoLocation of
RepoRemote (ArchiveEndpoint remoteURL archive) -> do
logInfo "Downloading individual data from server"
apiReturn <- processApiResponse (remoteURL ++ "/individuals" ++ qDefault archive ++ "&additionalJannoColumns=" ++ intercalate "," moreJannoColumns) False
let addJannoColFlag = case addJannoColSpec of
AddJannoColAll -> "&additionalJannoColumns=ALL"
AddJannoColList [] -> ""
AddJannoColList moreJannoColumns -> "&additionalJannoColumns=" ++ intercalate "," moreJannoColumns
apiReturn <- processApiResponse (remoteURL ++ "/individuals" ++ qDefault archive ++ addJannoColFlag) False
case apiReturn of
ApiReturnExtIndividualInfo indInfo -> return indInfo
_ -> error "should not happen"
RepoLocal baseDirs -> do
pacCollection <- readPoseidonPackageCollection pacReadOpts baseDirs
getExtendedIndividualInfo pacCollection moreJannoColumns
getExtendedIndividualInfo pacCollection addJannoColSpec

-- warning in case the additional Columns do not exist in the entire janno dataset
forM_ (zip [0..] moreJannoColumns) $ \(i, columnKey) -> do
-- check entries in all individuals for that key
let nonEmptyEntries = catMaybes [snd (entries !! i) | ExtendedIndividualInfo _ _ _ _ entries <- extIndInfos]
when (null nonEmptyEntries) . logWarning $ "Column Name " ++ columnKey ++ " not present in any individual"
let addJannoCols = case extIndInfos of -- get all add-column names from first extIndInfo
[] -> []
(e:_) -> map fst . extIndInfoAddCols $ e

let tableH = ["Individual", "Group", "Package", "PackageVersion", "Is Latest"] ++ moreJannoColumns
-- warning in case the additional Columns do not exist in the entire janno dataset,
-- we only output this warning if the columns were requested explicitly. Not if
-- all columns were requested. We consider such an "all" request to mean "all columns that are present".
case addJannoColSpec of
AddJannoColList (_:_) -> do
forM_ (zip [0..] addJannoCols) $ \(i, columnKey) -> do
-- check entries in all individuals for that key
let nonEmptyEntries = catMaybes [snd (entries !! i) | ExtendedIndividualInfo _ _ _ _ entries <- extIndInfos]
when (null nonEmptyEntries) . logWarning $ "Column Name " ++ columnKey ++ " not present in any individual"
_ -> return ()

let tableH = ["Individual", "Group", "Package", "PackageVersion", "Is Latest"] ++ addJannoCols
tableB = do
i@(ExtendedIndividualInfo name groups _ isLatest addColumnEntries) <- extIndInfos
True <- return (not onlyLatest || isLatest)
Expand Down
6 changes: 4 additions & 2 deletions src/Poseidon/CLI/OptparseApplicativeParsers.hs
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,8 @@ import Poseidon.GenotypeData (GenoDataSource (..),
GenotypeDataSpec (..),
GenotypeFormatSpec (..),
SNPSetSpec (..))
import Poseidon.ServerClient (ArchiveEndpoint (..))
import Poseidon.ServerClient (AddJannoColSpec (..),
ArchiveEndpoint (..))
import Poseidon.Utils (LogMode (..), TestMode (..))
import Poseidon.Version (VersionComponent (..),
parseVersion)
Expand Down Expand Up @@ -600,7 +601,8 @@ parseListEntity = parseListPackages <|> parseListGroups <|> (parseListIndividual
parseListIndividualsDummy = OP.flag' () (
OP.long "individuals" <>
OP.help "List all individuals/samples.")
parseListIndividualsExtraCols = ListIndividuals <$> OP.many parseExtraCol
parseListIndividualsExtraCols = ListIndividuals <$> (parseAllJannoCols <|> (AddJannoColList <$> OP.many parseExtraCol))
parseAllJannoCols = OP.flag' AddJannoColAll (OP.long "fullJanno" <> OP.help "output all Janno Columns")
parseExtraCol = OP.strOption (
OP.short 'j' <>
OP.long "jannoColumn" <>
Expand Down
8 changes: 5 additions & 3 deletions src/Poseidon/CLI/Serve.hs
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,8 @@ import Poseidon.Package (PackageReadOptions (..),
packagesToPackageInfos,
readPoseidonPackageCollection)
import Poseidon.PoseidonVersion (minimalRequiredClientVersion)
import Poseidon.ServerClient (ApiReturnData (..),
import Poseidon.ServerClient (AddJannoColSpec (..),
ApiReturnData (..),
ServerApiReturnType (..))
import Poseidon.Utils (LogA, PoseidonIO, envLogAction,
logDebug, logInfo, logWithEnv)
Expand Down Expand Up @@ -121,10 +122,11 @@ runServer (ServeOptions archBaseDirs maybeZipPath port ignoreChecksums certFiles
pacs <- getItemFromArchiveStore archiveStore
maybeAdditionalColumnsString <- (Just <$> param "additionalJannoColumns") `rescue` (\_ -> return Nothing)
indInfo <- case maybeAdditionalColumnsString of
Just "ALL" -> getExtendedIndividualInfo pacs AddJannoColAll -- Nothing means all Janno Columns
Just additionalColumnsString ->
let additionalColumnNames = splitOn "," additionalColumnsString
in getExtendedIndividualInfo pacs additionalColumnNames
Nothing -> getExtendedIndividualInfo pacs []
in getExtendedIndividualInfo pacs (AddJannoColList additionalColumnNames)
Nothing -> getExtendedIndividualInfo pacs (AddJannoColList [])
let retData = ApiReturnExtIndividualInfo indInfo
return $ ServerApiReturnType [] (Just retData)

Expand Down
18 changes: 11 additions & 7 deletions src/Poseidon/Package.hs
Original file line number Diff line number Diff line change
Expand Up @@ -47,15 +47,17 @@ import Poseidon.Janno (JannoLibraryBuilt (..),
JannoList (..), JannoRow (..),
JannoRows (..), JannoSex (..),
JannoUDG (..), createMinimalJanno,
getMaybeJannoList, readJannoFile)
getMaybeJannoList,
jannoHeaderString, readJannoFile)
import Poseidon.PoseidonVersion (asVersion, latestPoseidonVersion,
showPoseidonVersion,
validPoseidonVersions)
import Poseidon.SequencingSource (SSFLibraryBuilt (..), SSFUDG (..),
SeqSourceRow (..),
SeqSourceRows (..),
readSeqSourceFile)
import Poseidon.ServerClient (ExtendedIndividualInfo (..),
import Poseidon.ServerClient (AddJannoColSpec (..),
ExtendedIndividualInfo (..),
GroupInfo (..), PackageInfo (..))
import Poseidon.Utils (LogA, PoseidonException (..),
PoseidonIO, checkFile,
Expand Down Expand Up @@ -823,15 +825,17 @@ getJointIndividualInfo packages = do
return (map fst . concat $ indInfoLatestPairs, map snd . concat $ indInfoLatestPairs)


getExtendedIndividualInfo :: (MonadThrow m) => [PoseidonPackage] -> [String] -> m [ExtendedIndividualInfo]
getExtendedIndividualInfo allPackages additionalJannoColumns = sequence $ do -- list monad
getExtendedIndividualInfo :: (MonadThrow m) => [PoseidonPackage] -> AddJannoColSpec -> m [ExtendedIndividualInfo]
getExtendedIndividualInfo allPackages addJannoColSpec = sequence $ do -- list monad
pac <- allPackages -- outer loop (automatically concatenating over inner loops)
jannoRow <- getJannoRowsFromPac pac -- inner loop
let name = jPoseidonID jannoRow
groups = getJannoList . jGroupName $ jannoRow
additionalColumnEntries = case additionalJannoColumns of
[] -> []
colNames -> [(k, BSC.unpack <$> toNamedRecord jannoRow HM.!? BSC.pack k) | k <- colNames]
colNames = case addJannoColSpec of
AddJannoColAll -> jannoHeaderString \\ ["Poseidon_ID", "Group_Name"] -- Nothing means all Janno columns
-- except for these two which are already explicit
AddJannoColList c -> c
additionalColumnEntries = [(k, BSC.unpack <$> toNamedRecord jannoRow HM.!? BSC.pack k) | k <- colNames]
isLatest <- isLatestInCollection allPackages pac -- this lives in monad m
-- double-return for m and then list.
return . return $ ExtendedIndividualInfo name groups (makePacNameAndVersion pac) isLatest additionalColumnEntries
Expand Down
7 changes: 6 additions & 1 deletion src/Poseidon/ServerClient.hs
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,8 @@ module Poseidon.ServerClient (
ArchiveEndpoint(..),
PackageInfo (..), GroupInfo (..), ExtendedIndividualInfo(..),
extIndInfo2IndInfoCollection,
qDefault, qArchive, qPacVersion, (+&+)
qDefault, qArchive, qPacVersion, (+&+),
AddJannoColSpec(..)
) where

import Paths_poseidon_hs (version)
Expand Down Expand Up @@ -225,3 +226,7 @@ extIndInfo2IndInfoCollection extIndInfos =
let indInfos = [IndividualInfo n g p | ExtendedIndividualInfo n g p _ _ <- extIndInfos]
areLatest = map extIndInfoIsLatest extIndInfos
in (indInfos, areLatest)

-- type needed to specify additional Janno Columns to be queried from packages
data AddJannoColSpec = AddJannoColList [String] | AddJannoColAll

2 changes: 2 additions & 0 deletions test/PoseidonGoldenTests/GoldenTestCheckSumFile.txt
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ b18847f5498ae55882689b75916fdf64 list list/list2
63ef5f277f6f29163192382234211224 list list/list3
1c1f24de305405ece44393d378c0e15a list list/list4
bc636b9c03ea9359acd254a9911e5af3 list list/list5
ad5590b0ad65e64d6b2c8d874571c9f8 list list/list6
b197fb8dd883c7469a4791e4a677f1c0 summarise summarise/summarise1
d9e4b3f15d4e129a365d2064198d95b6 summarise summarise/summarise2
a1186fdad9ed555dff4dd61dc9838645 survey survey/survey1
Expand Down Expand Up @@ -113,6 +114,7 @@ b43da4d5734371c0648553120f812466 fetch fetch/multi_packages_2/Lamnidis_2018-1.0.
0ddad9ea097bca0253e0c3c6157efa68 listRemote listRemote/listRemote2
b2286cf9af7c6c8757b8109a1f58e2d9 listRemote listRemote/listRemote3
0433b2a80ee5a2eb5bf8c6404130e562 listRemote listRemote/listRemote4
8a13e5b31acabca6839100f411c38453 listRemote listRemote/listRemote5
282cedf121f37e81c1e45ec0dfb97560 jannocoalesce jannocoalesce/target1.janno
df34d0542c0a94cf9556619bff2e301d jannocoalesce jannocoalesce/target2.janno
a202f0c1636d55258454ad0a0dfea977 jannocoalesce jannocoalesce/target3.janno
Loading

0 comments on commit c6a765e

Please sign in to comment.