Skip to content

Commit

Permalink
Merge pull request #137 from poseidon-framework/minimalPackageOption
Browse files Browse the repository at this point in the history
Minimal package option
  • Loading branch information
nevrome authored Nov 8, 2021
2 parents 49ab8b9 + 3ca73ff commit 9368fd1
Show file tree
Hide file tree
Showing 14 changed files with 51 additions and 69 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
- V 0.22.0: Added a --minimal option for init and forge to create minimal packages without .janno and .bib (e.g. in automatic pipelines)
- V 0.21.3: Added a column name suggestion mechanism to the .janno file reading procedure
- V 0.21.2: Made trident survey more useful
- V 0.21.1: Simplified package creation in init and forge by enabling creation of deeper paths and by making the output package name argument optional
Expand Down
4 changes: 2 additions & 2 deletions poseidon-hs.cabal
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
name: poseidon-hs
version: 0.21.3
version: 0.22.0
synopsis: A package with tools for working with Poseidon Genotype Data
description: The tools in this package read and analyse Poseidon-formatted genotype databases, a modular system for storing genotype data from thousands of individuals.
license: MIT
license-file: LICENSE
author: Stephan Schiffels
author: Stephan Schiffels, Clemens Schmid
maintainer: stephan.schiffels@mac.com
category: Bioinformatics
build-type: Simple
Expand Down
6 changes: 6 additions & 0 deletions src-executables/Main-trident.hs
Original file line number Diff line number Diff line change
Expand Up @@ -157,6 +157,7 @@ initOptParser = InitOptions <$> parseInGenotypeFormat
<*> parseInIndFile
<*> parseOutPackagePath
<*> parseMaybeOutPackageName
<*> parseMakeMinimalPackage

listOptParser :: OP.Parser ListOptions
listOptParser = ListOptions <$> parseRepoLocation
Expand All @@ -180,6 +181,7 @@ forgeOptParser = ForgeOptions <$> parseBasePaths
<*> parseOutPackagePath
<*> parseMaybeOutPackageName
<*> parseOutFormat
<*> parseMakeMinimalPackage
<*> parseShowWarnings
<*> parseNoExtract

Expand Down Expand Up @@ -432,6 +434,10 @@ parseMaybeOutPackageName = OP.option (Just <$> OP.str) (
OP.value Nothing
)

parseMakeMinimalPackage :: OP.Parser Bool
parseMakeMinimalPackage = OP.switch (OP.long "minimal" <>
OP.help "should only a minimal output package be created?")

parseOutFormat :: OP.Parser GenotypeFormatSpec
parseOutFormat = parseEigenstratFormat <|> pure GenotypeFormatPlink
where
Expand Down
23 changes: 14 additions & 9 deletions src/Poseidon/CLI/Forge.hs
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ import Poseidon.Package (PackageReadOptions (..),
getIndividuals,
getJointGenotypeData,
newPackageTemplate,
newMinimalPackageTemplate,
readPoseidonPackageCollection,
writePoseidonPackage)
import Poseidon.Utils (PoseidonException (..))
Expand Down Expand Up @@ -48,6 +49,7 @@ data ForgeOptions = ForgeOptions
, _forgeOutPacPath :: FilePath
, _forgeOutPacName :: Maybe String
, _forgeOutFormat :: GenotypeFormatSpec
, _forgeOutMinimal :: Bool
, _forgeShowWarnings :: Bool
, _forgeNoExtract :: Bool
}
Expand All @@ -63,7 +65,7 @@ pacReadOpts = defaultPackageReadOptions {

-- | The main function running the forge command
runForge :: ForgeOptions -> IO ()
runForge (ForgeOptions baseDirs entitiesDirect entitiesFile intersect_ outPath maybeOutName outFormat showWarnings noExtract) = do
runForge (ForgeOptions baseDirs entitiesDirect entitiesFile intersect_ outPath maybeOutName outFormat minimal showWarnings noExtract) = do
-- compile entities
entitiesFromFile <- mapM readEntitiesFromFile entitiesFile
let entities = nub $ entitiesDirect ++ concat entitiesFromFile
Expand Down Expand Up @@ -108,12 +110,14 @@ runForge (ForgeOptions baseDirs entitiesDirect entitiesFile intersect_ outPath m
let genotypeData = GenotypeDataSpec outFormat outGeno Nothing outSnp Nothing outInd Nothing (Just newSNPSet)
-- create new package
hPutStrLn stderr "Creating new package entity"
pac <- newPackageTemplate outPath outName genotypeData (Just (Right relevantJannoRows)) relevantBibEntries
pac <- if minimal
then return $ newMinimalPackageTemplate outPath outName genotypeData
else newPackageTemplate outPath outName genotypeData (Just (Right relevantJannoRows)) relevantBibEntries
-- POSEIDON.yml
hPutStrLn stderr "Creating POSEIDON.yml"
writePoseidonPackage pac
-- bib
unless (null relevantBibEntries) $ do
unless (minimal || null relevantBibEntries) $ do
hPutStrLn stderr "Creating .bib file"
writeBibTeXFile (outPath </> outName <.> "bib") relevantBibEntries
-- genotype data
Expand Down Expand Up @@ -144,12 +148,13 @@ runForge (ForgeOptions baseDirs entitiesDirect entitiesFile intersect_ outPath m
P.foldM sumNonMissingSNPs startAcc return forgePipe
-- janno (with updated SNP numbers)
liftIO $ hPutStrLn stderr "Done"
hPutStrLn stderr "Creating .janno file"
autosomalSnpList <- VU.freeze newNrAutosomalSNPs
let jannoRowsWithNewSNPNumbers = zipWith (\x y -> x {jNrAutosomalSNPs = Just y})
relevantJannoRows
(VU.toList autosomalSnpList)
writeJannoFile (outPath </> outName <.> "janno") jannoRowsWithNewSNPNumbers
unless minimal $ do
hPutStrLn stderr "Creating .janno file"
autosomalSnpList <- VU.freeze newNrAutosomalSNPs
let jannoRowsWithNewSNPNumbers = zipWith (\x y -> x {jNrAutosomalSNPs = Just y})
relevantJannoRows
(VU.toList autosomalSnpList)
writeJannoFile (outPath </> outName <.> "janno") jannoRowsWithNewSNPNumbers


sumNonMissingSNPs :: VUM.IOVector Int -> (EigenstratSnpEntry, GenoLine) -> SafeT IO (VUM.IOVector Int)
Expand Down
23 changes: 14 additions & 9 deletions src/Poseidon/CLI/Init.hs
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,10 @@ import Poseidon.GenotypeData (GenotypeDataSpec (..),
import Poseidon.Janno (writeJannoFile)
import Poseidon.Package (PoseidonPackage (..),
newPackageTemplate,
newMinimalPackageTemplate,
writePoseidonPackage)

import Control.Monad (unless)
import System.Directory (createDirectoryIfMissing, copyFile)
import System.FilePath ((<.>), (</>), takeFileName, takeBaseName)
import System.IO (hPutStrLn, stderr)
Expand All @@ -23,10 +25,11 @@ data InitOptions = InitOptions
, _initIndFile :: FilePath
, _initPacPath :: FilePath
, _initPacName :: Maybe String
, _initMinimal :: Bool
}

runInit :: InitOptions -> IO ()
runInit (InitOptions format_ snpSet_ genoFile_ snpFile_ indFile_ outPath maybeOutName) = do
runInit (InitOptions format_ snpSet_ genoFile_ snpFile_ indFile_ outPath maybeOutName minimal) = do
-- create new directory
hPutStrLn stderr $ "Creating new package directory: " ++ outPath
createDirectoryIfMissing True outPath
Expand All @@ -46,14 +49,16 @@ runInit (InitOptions format_ snpSet_ genoFile_ snpFile_ indFile_ outPath maybeOu
Just x -> x
Nothing -> takeBaseName outPath
inds <- loadIndividuals outPath genotypeData
pac <- newPackageTemplate outPath outName genotypeData (Just (Left inds)) [dummyBibEntry]
pac <- if minimal
then return $ newMinimalPackageTemplate outPath outName genotypeData
else newPackageTemplate outPath outName genotypeData (Just (Left inds)) [dummyBibEntry]
-- POSEIDON.yml
hPutStrLn stderr "Creating POSEIDON.yml"
writePoseidonPackage pac
-- janno
hPutStrLn stderr "Creating minimal .janno file"
writeJannoFile (outPath </> outName <.> "janno") $ posPacJanno pac
-- bib
hPutStrLn stderr "Creating dummy .bib file"
writeBibTeXFile (outPath </> outName <.> "bib") $ posPacBib pac

unless minimal $ do
-- janno
hPutStrLn stderr "Creating minimal .janno file"
writeJannoFile (outPath </> outName <.> "janno") $ posPacJanno pac
-- bib
hPutStrLn stderr "Creating dummy .bib file"
writeBibTeXFile (outPath </> outName <.> "bib") $ posPacBib pac
14 changes: 8 additions & 6 deletions test/Poseidon/GoldenTestsRunCommands.hs
Original file line number Diff line number Diff line change
Expand Up @@ -99,8 +99,9 @@ testPipelineInit testDir checkFilePath testPacsDir = do
, _initGenoFile = testPacsDir </> "Schiffels_2016" </> "geno.txt"
, _initSnpFile = testPacsDir </> "Schiffels_2016" </> "snp.txt"
, _initIndFile = testPacsDir </> "Schiffels_2016" </> "ind.txt"
, _initPacPath = testDir </> "Schiffels"
, _initPacName = Just "Schiffels"
, _initPacPath = testDir </> "Schiffels"
, _initPacName = Just "Schiffels"
, _initMinimal = False
}
let action = runInit initOpts1 >> patchLastModified testDir ("Schiffels" </> "POSEIDON.yml")
runAndChecksumFiles checkFilePath testDir action "init" [
Expand All @@ -115,13 +116,13 @@ testPipelineInit testDir checkFilePath testPacsDir = do
, _initGenoFile = testPacsDir </> "Wang_Plink_test_2020" </> "Wang_2020.bed"
, _initSnpFile = testPacsDir </> "Wang_Plink_test_2020" </> "Wang_2020.bim"
, _initIndFile = testPacsDir </> "Wang_Plink_test_2020" </> "Wang_2020.fam"
, _initPacPath = testDir </> "Wang"
, _initPacName = Nothing
, _initPacPath = testDir </> "Wang"
, _initPacName = Nothing
, _initMinimal = True
}
let action2 = runInit initOpts2 >> patchLastModified testDir ("Wang" </> "POSEIDON.yml")
runAndChecksumFiles checkFilePath testDir action2 "init" [
"Wang" </> "POSEIDON.yml"
, "Wang" </> "Wang.janno"
, "Wang" </> "Wang_2020.bed"
]

Expand Down Expand Up @@ -285,6 +286,7 @@ testPipelineForge testDir checkFilePath = do
, _forgeOutPacPath = testDir </> "ForgePac1"
, _forgeOutPacName = Just "ForgePac1"
, _forgeOutFormat = GenotypeFormatEigenstrat
, _forgeOutMinimal = False
, _forgeShowWarnings = False
, _forgeNoExtract = False
}
Expand All @@ -302,14 +304,14 @@ testPipelineForge testDir checkFilePath = do
, _forgeOutPacPath = testDir </> "ForgePac2"
, _forgeOutPacName = Nothing
, _forgeOutFormat = GenotypeFormatPlink
, _forgeOutMinimal = True
, _forgeShowWarnings = False
, _forgeNoExtract = False
}
let action2 = runForge forgeOpts2 >> patchLastModified testDir ("ForgePac2" </> "POSEIDON.yml")
runAndChecksumFiles checkFilePath testDir action2 "forge" [
"ForgePac2" </> "POSEIDON.yml"
, "ForgePac2" </> "ForgePac2.bed"
, "ForgePac2" </> "ForgePac2.janno"
]

-- Note: We here use our test server (no SSL and different port). The reason is that
Expand Down
10 changes: 4 additions & 6 deletions test/testDat/poseidonHSGoldenTestCheckSumFile.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,7 @@ Automatically generated with: poseidon-devtools updateGoldenTests
04686d2b18f41b9137807a0d5521670d init Schiffels/Schiffels.janno
95b093eefacc1d6499afcfe89b15d56c init Schiffels/geno.txt
66fee695ba94e12688567db73c0f2830 init Schiffels/Schiffels.bib
e289be3b47db28adcdb1b9c079c3fe06 init Wang/POSEIDON.yml
51731d04de568ede6152a81e9c1420da init Wang/Wang.janno
3f6e348b16c2a4c0399af33ee3ee7c0c init Wang/POSEIDON.yml
ae66d851301f4a761b819f97ec28fa55 init Wang/Wang_2020.bed
4f054a4d5b9302ba0023a7c7ded8c6af validate validate1
4f054a4d5b9302ba0023a7c7ded8c6af validate validate2
Expand All @@ -17,8 +16,8 @@ c27895a4e15eb46f0d4473cd37488cae list list2
a9f5b04f5b40eeff3aabf1ad7bd22771 list list4
551620f20ad8b9f4e2717c54765fff74 summarise summarise1
fb074b54961035f8903f3055c0bbb24e summarise summarise2
b73a791bb27db976907d11eaa22f4833 survey survey1
c2546c037ad2b3aa423fb8a351e8b9a7 survey survey2
9037ce687dfeddab5d8b36d70098cc1f survey survey1
c0bbcc92185aa0795e0ef4714d5b26fa survey survey2
3feaa876c5d05fd000470c5ba840cc2d genoconvert Wang/Wang.geno
b46831b007c2d53a12b472484b7b00b4 genoconvert Wang/Wang.snp
2faf8a7b87037451d4fd5ae9cc9af460 genoconvert Wang/Wang.ind
Expand All @@ -34,9 +33,8 @@ def6f91177d68487802ba27e91e46f81 update Schiffels/POSEIDON.yml
80121e7c99b47e63529194f9a961b51f forge ForgePac1/POSEIDON.yml
814d71c65037eb20bb8a636e804c9dd9 forge ForgePac1/ForgePac1.geno
9485ef192ccee3846da2dafb36128f67 forge ForgePac1/ForgePac1.janno
87dfb1aa71425b7a1ed97d37bcd1dbae forge ForgePac2/POSEIDON.yml
494b4b6e467fede4453c1df45d7d565b forge ForgePac2/POSEIDON.yml
e8a7d4c9ac62234f36ea2d5b4d0a17df forge ForgePac2/ForgePac2.bed
9485ef192ccee3846da2dafb36128f67 forge ForgePac2/ForgePac2.janno
53edcde25d7b66b94ea9ee306a315864 fetch 2019_Nikitin_LBK/POSEIDON.yml
4dedd209cdc4d4a986c184d70dedecd1 fetch 2019_Nikitin_LBK/Nikitin_LBK.janno
3abe2144e4f5aea3aa0b83395ba9b355 fetch 2019_Nikitin_LBK/Nikitin_LBK.fam

This file was deleted.

4 changes: 0 additions & 4 deletions test/testDat/poseidonHSGoldenTestData/ForgePac2/POSEIDON.yml
Original file line number Diff line number Diff line change
@@ -1,15 +1,11 @@
poseidonVersion: 2.4.0
title: ForgePac2
description: Empty package template. Please add a description
contributor:
- name: John Doe
email: john@doe.net
packageVersion: 0.1.0
lastModified: 1970-01-01
genotypeData:
format: PLINK
genoFile: ForgePac2.bed
snpFile: ForgePac2.bim
indFile: ForgePac2.fam
snpSet: Other
jannoFile: ForgePac2.janno
5 changes: 0 additions & 5 deletions test/testDat/poseidonHSGoldenTestData/Wang/POSEIDON.yml
Original file line number Diff line number Diff line change
@@ -1,16 +1,11 @@
poseidonVersion: 2.4.0
title: Wang
description: Empty package template. Please add a description
contributor:
- name: John Doe
email: john@doe.net
packageVersion: 0.1.0
lastModified: 1970-01-01
genotypeData:
format: EIGENSTRAT
genoFile: Wang.geno
snpFile: Wang.snp
indFile: Wang.ind
snpSet: Other
jannoFile: Wang.janno
bibFile: Wang.bib
13 changes: 0 additions & 13 deletions test/testDat/poseidonHSGoldenTestData/Wang/Wang.bib

This file was deleted.

6 changes: 0 additions & 6 deletions test/testDat/poseidonHSGoldenTestData/Wang/Wang.janno

This file was deleted.

2 changes: 1 addition & 1 deletion test/testDat/poseidonHSGoldenTestData/survey1
Original file line number Diff line number Diff line change
Expand Up @@ -2,5 +2,5 @@
| Package | Survey |
:===========:==================================================:
| Schiffels | GB|█....|.....|.....|...██|.....|.....|.....|... |
| Wang | GB|█....|.....|.....|...██|.....|.....|.....|... |
| Wang | G.|█....|.....|.....|...██|.....|.....|.....|... |
'-----------'--------------------------------------------------'
2 changes: 1 addition & 1 deletion test/testDat/poseidonHSGoldenTestData/survey2
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
Schiffels GB|█....|.....|.....|...██|.....|.....|.....|...
Wang GB|█....|.....|.....|...██|.....|.....|.....|...
Wang G.|█....|.....|.....|...██|.....|.....|.....|...

0 comments on commit 9368fd1

Please sign in to comment.