Skip to content

Commit

Permalink
added VCF output golden tests
Browse files Browse the repository at this point in the history
  • Loading branch information
stschiff committed Feb 4, 2025
1 parent ca616d2 commit e5d6c49
Show file tree
Hide file tree
Showing 12 changed files with 109 additions and 15 deletions.
2 changes: 1 addition & 1 deletion src/Poseidon/CLI/Forge.hs
Original file line number Diff line number Diff line change
Expand Up @@ -294,7 +294,7 @@ runForge (
GenotypeVCF outG _ ->
let allJannoRows = getJannoRows $ getJointJanno relevantPackages
selJannoRows = map (allJannoRows !!) relevantIndices
in writeVCF logA selJannoRows outG newEigenstratIndEntries
in writeVCF logA selJannoRows (outPath </> outG)
let extractPipe = if packageWise then cat else P.map (selectIndices relevantIndices)
-- define main forge pipe including file output.
-- The final tee forwards the results to be used in the snpCounting-fold
Expand Down
2 changes: 1 addition & 1 deletion src/Poseidon/CLI/Genoconvert.hs
Original file line number Diff line number Diff line change
Expand Up @@ -141,7 +141,7 @@ convertGenoTo outFormat onlyGeno outPath removeOld outPlinkPopMode outZip pac =
(outFilesAbsTemp !! 1)
(outFilesAbsTemp !! 2)
(map (eigenstratInd2PlinkFam outPlinkPopMode) eigenstratIndEntries)
"VCF" -> writeVCF logA jannoRows (outFilesAbsTemp !! 0) eigenstratIndEntries
"VCF" -> writeVCF logA jannoRows (outFilesAbsTemp !! 0)
_ -> liftIO . throwIO $ illegalFormatException outFormat
runEffect $ eigenstratProd >-> printSNPCopyProgress logA currentTime >-> outConsumer
) (throwIO . PoseidonGenotypeExceptionForward errLength)
Expand Down
16 changes: 9 additions & 7 deletions src/Poseidon/GenotypeData.hs
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,14 @@
module Poseidon.GenotypeData where

import Paths_poseidon_hs (version)
import Poseidon.Janno (JannoGenotypePloidy (..), JannoRow (jGenotypePloidy, jPoseidonID))
import Poseidon.Janno (JannoGenotypePloidy (..),
JannoRow (jGenotypePloidy, jGroupName, jPoseidonID),
ListColumn (..), GroupName(..))
import Poseidon.Utils (LogA, PoseidonException (..),
PoseidonIO, checkFile,
envInputPlinkMode, logDebug,
logInfo, logWithEnv, padLeft,
logWarning)
logInfo, logWarning, logWithEnv,
padLeft)

import Control.Exception (throwIO)
import Control.Monad (forM, unless)
Expand Down Expand Up @@ -367,10 +369,10 @@ printSNPCopyProgress logA startTime = do
selectIndices :: [Int] -> (EigenstratSnpEntry, GenoLine) -> (EigenstratSnpEntry, GenoLine)
selectIndices indices (snpEntry, genoLine) = (snpEntry, V.fromList [genoLine V.! i | i <- indices])

writeVCF :: (MonadSafe m) => LogA -> [JannoRow] -> FilePath -> [EigenstratIndEntry] -> Consumer (EigenstratSnpEntry, GenoLine) m ()
writeVCF logA jannoRows vcfFile eigenstratIndEntries = do
let sampleNames = [n | EigenstratIndEntry n _ _ <- eigenstratIndEntries]
groupNames = [B.unpack g | EigenstratIndEntry _ _ g <- eigenstratIndEntries]
writeVCF :: (MonadSafe m) => LogA -> [JannoRow] -> FilePath -> Consumer (EigenstratSnpEntry, GenoLine) m ()
writeVCF logA jannoRows vcfFile = do
let sampleNames = map (B.pack . jPoseidonID) jannoRows
groupNames = map ((\(GroupName n) -> T.unpack n) . head . getListColumn . jGroupName) jannoRows
prog_name <- liftIO getProgName
prog_args <- liftIO getArgs
let command_line = prog_name ++ " " ++ unwords prog_args
Expand Down
5 changes: 5 additions & 0 deletions test/PoseidonGoldenTests/GoldenTestCheckSumFile.txt
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,11 @@ ad7e56177aad0a720f0bde13d47f2ac1 forge forge/ForgePac19/CHANGELOG.md
b7b649620cd37bd4a6d6f0f31c1c56da forge forge/ForgePac19/ForgePac19.janno
b36b3ca509c235d0f15571c96195e801 forge forge/ForgePac20/POSEIDON.yml
e375863bca9e4a91c9855396abde31c7 forge forge/ForgePac20/ForgePac20.janno
1f24e4ad0943c830a58e9ae168f9ffa6 forge forge/ForgePac21/POSEIDON.yml
33b1ac865fb6ae948a01f2b397f4edde forge forge/ForgePac21/ForgePac21.vcf
8846333d9a1de6510f25a3816cc70fef forge forge/ForgePac21/ForgePac21.janno
9089f5d5602937bb7713e1dc8d7a8f2d forge forge/ForgePac21/ForgePac21.ssf
b4f71aff4fbc11594008c3811781cc43 forge forge/ForgePac21/ForgePac21.bib
d4a05cfef045648238a94a9d621cf667 chronicle chronicle/chronicle1.yml
b43da4d5734371c0648553120f812466 timetravel timetravel/Lamnidis_2018-1.0.0/POSEIDON.yml
8d57ce1a1ab28c0d8a5f391dd790a59c timetravel timetravel/Lamnidis_2018-1.0.1/POSEIDON.yml
Expand Down
12 changes: 6 additions & 6 deletions test/PoseidonGoldenTests/GoldenTestData/chronicle/chronicle2.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,25 +5,25 @@ lastModified: 2025-02-04
packages:
- title: Lamnidis_2018
version: 1.0.0
commit: d3e6237bbabacb39bf5289fdf0a0da764b5f499a
commit: e80d54ce4c0dbdd86ed03d700ae3cfe280966700
path: Lamnidis_2018
- title: Lamnidis_2018
version: 1.0.1
commit: d3e6237bbabacb39bf5289fdf0a0da764b5f499a
commit: e80d54ce4c0dbdd86ed03d700ae3cfe280966700
path: Lamnidis_2018_newVersion
- title: Schiffels
version: 1.1.1
commit: 6107cf921031db0c6bbce600be9f69889933f373
commit: 4fb75ac7968c267ffe745a30511afead4ea263f1
path: Schiffels
- title: Schiffels_2016
version: 1.0.1
commit: d3e6237bbabacb39bf5289fdf0a0da764b5f499a
commit: e80d54ce4c0dbdd86ed03d700ae3cfe280966700
path: Schiffels_2016
- title: Schmid_2028
version: 1.0.0
commit: d3e6237bbabacb39bf5289fdf0a0da764b5f499a
commit: e80d54ce4c0dbdd86ed03d700ae3cfe280966700
path: Schmid_2028
- title: Wang_2020
version: 0.1.0
commit: d3e6237bbabacb39bf5289fdf0a0da764b5f499a
commit: e80d54ce4c0dbdd86ed03d700ae3cfe280966700
path: Wang_2020
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
@article{Schiffels2016,
title = {Test},
}

@book{TestBook2,
title = {TestBook},
}

@article{TestPaper1,
title = {TestPaper},
}

@article{Wang2020,
title = {Test},
}

Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
Poseidon_ID Genetic_Sex Group_Name Alternative_IDs Relation_To Relation_Degree Relation_Type Relation_Note Collection_ID Country Country_ISO Location Site Latitude Longitude Date_Type Date_C14_Labnr Date_C14_Uncal_BP Date_C14_Uncal_BP_Err Date_BC_AD_Start Date_BC_AD_Median Date_BC_AD_Stop Date_Note MT_Haplogroup Y_Haplogroup Source_Tissue Nr_Libraries Library_Names Capture_Type UDG Library_Built Genotype_Ploidy Data_Preparation_Pipeline_URL Endogenous Nr_SNPs Coverage_on_Target_SNPs Damage Contamination Contamination_Err Contamination_Meas Contamination_Note Genetic_Source_Accession_IDs Primary_Contact Publication Note Keywords AddCol1 AddCol2
XXX002 F POP2 n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a 9 n/a n/a n/a n/a n/a n/a n/a n/a Schiffels2016 n/a n/a v1 v2
XXX004 F POP2 n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a 9 n/a n/a n/a n/a n/a n/a n/a n/a Schiffels2016 n/a n/a v1 v2
XXX005 M POP2 n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a 7 n/a n/a n/a n/a n/a n/a n/a n/a Schiffels2016;TestPaper1 n/a n/a v1 v2
XXX006 F POP2 n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a 9 n/a n/a n/a n/a n/a n/a n/a n/a Schiffels2016;TestPaper1 n/a n/a v1 v2
SAMPLE2 F 3 n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a 7 n/a n/a n/a n/a n/a n/a n/a n/a Wang2020;TestPaper1 n/a n/a n/a n/a
SAMPLE4 F 5 n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a 7 n/a n/a n/a n/a n/a n/a n/a n/a Wang2020;TestPaper1;TestBook2 n/a n/a n/a n/a
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
poseidon_IDs udg library_built sample_accession study_accession run_accession sample_alias secondary_sample_accession first_public last_updated instrument_model library_layout library_source instrument_platform library_name library_strategy fastq_ftp fastq_aspera fastq_bytes fastq_md5 read_count submitted_ftp other_info_1 other_info_2
XXX001;XXX002 n/a n/a n/a n/a ERR3518150 n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a A B
XXX002;XXX004;XXX005 n/a n/a n/a n/a ERR3518151 n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a C D
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
##fileformat=VCFv4.2
##source=trident_v1.6.2.1
##command_line=poseidon-devtools golden -d .
##group_names=POP2,POP2,POP2,POP2,3,5
##INFO=<ID=NS,Number=1,Type=Integer,Description="Number of Samples With Data">
##FILTER=<ID=s50,Description="Less than 50% of samples have data">
##FILTER=<ID=s10,Description="Less than 10% of samples have data">
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT XXX002 XXX004 XXX005 XXX006 SAMPLE2 SAMPLE4
1 752566 1_752566 G A . PASS NS=4 GT 1/1 1/1 1/1 1/1 ./. ./.
1 842013 1_842013 T G . PASS NS=4 GT 1/1 0/0 0/0 0/0 ./. ./.
1 891021 1_891021 G A . PASS NS=3 GT 1/1 1/1 ./. 0/1 ./. ./.
1 949654 1_949654 A G . PASS NS=4 GT 0/1 1/1 0/1 1/1 ./. ./.
2 1018704 2_1018704 A G . PASS NS=3 GT 0/1 0/1 ./. 1/1 ./. ./.
2 1045331 2_1045331 G A . PASS NS=4 GT 0/0 0/0 0/0 0/0 ./. ./.
2 1048955 2_1048955 A G . PASS NS=4 GT 0/0 0/0 0/0 0/0 ./. ./.
2 1061166 2_1061166 T C . PASS NS=4 GT 0/0 0/1 0/0 0/0 ./. ./.
2 1108637 2_1108637 G A . PASS NS=4 GT 0/0 0/0 0/1 0/0 ./. ./.
11 0 rs0000 A C . s50 NS=2 GT ./. ./. ./. ./. 0/1 1/1
11 100000 rs1111 A G . s50 NS=2 GT ./. ./. ./. ./. 0/0 0/0
11 200000 rs2222 A T . s50 NS=2 GT ./. ./. ./. ./. 0/1 0/1
11 300000 rs3333 C A . s50 NS=2 GT ./. ./. ./. ./. 0/1 0/0
11 400000 rs4444 G A . s50 NS=2 GT ./. ./. ./. ./. 0/1 1/1
11 500000 rs5555 T A . s50 NS=2 GT ./. ./. ./. ./. 0/1 0/1
11 600000 rs6666 G T . s50 NS=2 GT ./. ./. ./. ./. 0/1 1/1
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
poseidonVersion: 2.7.1
title: ForgePac21
description: Empty package template. Please add a description
packageVersion: 0.1.0
lastModified: 1970-01-01
genotypeData:
format: VCF
genoFile: ForgePac21.vcf
jannoFile: ForgePac21.janno
sequencingSourceFile: ForgePac21.ssf
bibFile: ForgePac21.bib
Binary file not shown.
25 changes: 25 additions & 0 deletions test/PoseidonGoldenTests/GoldenTestsRunCommands.hs
Original file line number Diff line number Diff line change
Expand Up @@ -553,6 +553,17 @@ testPipelineGenoconvert testDir checkFilePath = do
, "genoconvert" </> "zip_roundtrip" </> "Schiffels_2016.bim"
, "genoconvert" </> "zip_roundtrip" </> "Schiffels_2016.fam"
]

let genoconvertOpts7 = GenoconvertOptions {
_genoconvertGenoSources = [PacBaseDir $ testPacsDir </> "Schiffels_2016"]
, _genoConvertOutFormat = "VCF"
, _genoMaybeOutPackagePath = Just $ testDir </> "genoconvert" </> "out_vcf"
, _genoconvertRemoveOld = False
, _genoconvertOutPlinkPopMode = PlinkPopNameAsFamily
, _genoconvertOnlyLatest = False
, _genoconvertOutZip = True
}
testLog $ runGenoconvert genoconvertOpts7

testPipelineRectify :: FilePath -> FilePath -> IO ()
testPipelineRectify testDir checkFilePath = do
Expand Down Expand Up @@ -1132,6 +1143,20 @@ testPipelineForge testDir checkFilePath = do
"forge" </> "ForgePac20" </> "ForgePac20.janno"
]

let forgeOpts21 = forgeOpts1 {
_forgeOutFormat = "VCF"
, _forgeOutPacPath = testDir </> "forge" </> "ForgePac21"
, _forgeOutPacName = Just "ForgePac21"
}
let action21 = testLog (runForge forgeOpts21) >> patchLastModified testDir ("forge" </> "ForgePac21" </> "POSEIDON.yml")
runAndChecksumFiles checkFilePath testDir action21 "forge" [
"forge" </> "ForgePac21" </> "POSEIDON.yml"
, "forge" </> "ForgePac21" </> "ForgePac21.vcf"
, "forge" </> "ForgePac21" </> "ForgePac21.janno"
, "forge" </> "ForgePac21" </> "ForgePac21.ssf"
, "forge" </> "ForgePac21" </> "ForgePac21.bib"
]


testPipelineChronicleAndTimetravel :: FilePath -> FilePath -> IO ()
testPipelineChronicleAndTimetravel testDir checkFilePath = do
Expand Down

0 comments on commit e5d6c49

Please sign in to comment.