Skip to content

Commit

Permalink
Excision: Remove all non-Condor code
Browse files Browse the repository at this point in the history
  • Loading branch information
DilumAluthge committed Feb 19, 2025
1 parent e683c53 commit 7245492
Show file tree
Hide file tree
Showing 18 changed files with 14 additions and 960 deletions.
62 changes: 0 additions & 62 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -66,68 +66,6 @@ jobs:
# If this PR is NOT from a fork, then DO fail CI if the Codecov upload errors.
# If this is not a PR, then DO fail CI if the Codecov upload errors.
fail_ci_if_error: ${{ github.event_name != 'pull_request' || github.repository == github.event.pull_request.head.repo.full_name }}
test-slurm:
if: false
runs-on: ubuntu-latest
timeout-minutes: 20
strategy:
fail-fast: false
matrix:
version:
# Please note: You must specify the full Julia version number (major.minor.patch).
# This is because the value here will be directly interpolated into a download URL.
# - '1.2.0' # minimum Julia version supported in Project.toml
- '1.6.7' # previous LTS
- '1.10.7' # current LTS
- '1.11.2' # currently the latest stable release
steps:
- uses: actions/checkout@v4
with:
persist-credentials: false
- name: Print Docker version
run: |
docker --version
docker version
# This next bit of code is taken from:
# https://github.com/kleinhenz/SlurmClusterManager.jl
# Original author: Joseph Kleinhenz
# License: MIT
- name: Setup Slurm inside Docker
run: |
docker version
docker compose version
docker build --build-arg "JULIA_VERSION=${MATRIX_JULIA_VERSION:?}" -t slurm-cluster-julia -f ci/Dockerfile .
docker compose -f ci/docker-compose.yml up -d
docker ps
env:
MATRIX_JULIA_VERSION: ${{matrix.version}}
- name: Print some information for debugging purposes
run: |
docker exec -t slurmctld pwd
docker exec -t slurmctld ls -la
docker exec -t slurmctld ls -la HTCondorClusterManager
- name: Instantiate package
run: docker exec -t slurmctld julia --project=HTCondorClusterManager -e 'import Pkg; @show Base.active_project(); Pkg.instantiate(); Pkg.status()'
- name: Run tests without a Slurm allocation
run: docker exec -t slurmctld julia --project=HTCondorClusterManager -e 'import Pkg; Pkg.test(; test_args=["slurm"])'
- name: Run tests inside salloc
run: docker exec -t slurmctld salloc -t 00:10:00 -n 2 julia --project=HTCondorClusterManager -e 'import Pkg; Pkg.test(; test_args=["slurm"], coverage=true)'
- name: Run tests inside sbatch
run: docker exec -t slurmctld HTCondorClusterManager/ci/run_my_sbatch.sh
- run: find . -type f -name '*.cov'
- name: Copy .cov files out of the Docker container
run: docker exec slurmctld /bin/bash -c 'cd /home/docker/HTCondorClusterManager && tar -cf - src/*.cov' | tar -xvf -
- run: find . -type f -name '*.cov'
# - run: find . -type f -name '*.cov' -exec cat {} \;
- uses: julia-actions/julia-processcoverage@v1
- uses: codecov/codecov-action@v5
with:
files: lcov.info
token: ${{ secrets.CODECOV_TOKEN }}
# If this PR is from a fork, then do NOT fail CI if the Codecov upload errors.
# If this PR is NOT from a fork, then DO fail CI if the Codecov upload errors.
# If this is not a PR, then DO fail CI if the Codecov upload errors.
fail_ci_if_error: ${{ github.event_name != 'pull_request' || github.repository == github.event.pull_request.head.repo.full_name }}
example-pull-gcr:
runs-on: ubuntu-latest
timeout-minutes: 20
Expand Down
21 changes: 0 additions & 21 deletions ci/Dockerfile

This file was deleted.

48 changes: 0 additions & 48 deletions ci/docker-compose.yml

This file was deleted.

14 changes: 0 additions & 14 deletions ci/my_sbatch.sh

This file was deleted.

14 changes: 0 additions & 14 deletions ci/run_my_sbatch.sh

This file was deleted.

70 changes: 0 additions & 70 deletions docs/sge.md

This file was deleted.

18 changes: 0 additions & 18 deletions slurm_test.jl

This file was deleted.

18 changes: 6 additions & 12 deletions src/HTCondorClusterManager.jl
Original file line number Diff line number Diff line change
@@ -1,23 +1,17 @@
module HTCondorClusterManager

using Distributed
using Sockets
using Pkg
import Distributed
import Sockets
import Pkg

using Distributed: launch, manage, kill, init_worker, connect

export launch, manage, kill, init_worker, connect
import Distributed: launch, manage, kill, init_worker, connect


worker_cookie() = begin Distributed.init_multi(); cluster_cookie() end
worker_arg() = `--worker=$(worker_cookie())`


# PBS doesn't have the same semantics as SGE wrt to file accumulate,
# a different solution will have to be found
include("qsub.jl")
include("scyld.jl")
include("condor.jl")
include("slurm.jl")
include("affinity.jl")
include("elastic.jl")

end
52 changes: 0 additions & 52 deletions src/affinity.jl

This file was deleted.

12 changes: 6 additions & 6 deletions src/condor.jl
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

export HTCManager, addprocs_htc

struct HTCManager <: ClusterManager
struct HTCManager <: Distributed.ClusterManager
np::Integer
end

Expand Down Expand Up @@ -51,7 +51,7 @@ function condor_script(portnum::Integer, np::Integer, params::Dict)
"$tdir/$jobname.sub"
end

function launch(manager::HTCManager, params::Dict, instances_arr::Array, c::Condition)
function Distributed.launch(manager::HTCManager, params::Dict, instances_arr::Array, c::Condition)
let
mgr_desc = "HTCondor"
msg = "The $(mgr_desc) functionality in ClusterManagers.jl is currently not actively maintained. " *
Expand All @@ -63,7 +63,7 @@ function launch(manager::HTCManager, params::Dict, instances_arr::Array, c::Cond
end
try
portnum = rand(8000:9000)
portnum, server = listenany(ip"0.0.0.0", portnum)
portnum, server = listenany(Distributed.ip"0.0.0.0", portnum)
np = manager.np

script = condor_script(portnum, np, params)
Expand All @@ -76,7 +76,7 @@ function launch(manager::HTCManager, params::Dict, instances_arr::Array, c::Cond

for i=1:np
conn = accept(server)
config = WorkerConfig()
config = Distributed.WorkerConfig()

config.io = conn

Expand All @@ -92,12 +92,12 @@ function launch(manager::HTCManager, params::Dict, instances_arr::Array, c::Cond
end
end

function kill(manager::HTCManager, id::Int64, config::WorkerConfig)
function Distributed.kill(manager::HTCManager, id::Int64, config::Distributed.WorkerConfig)
remotecall(exit,id)
close(config.io)
end

function manage(manager::HTCManager, id::Integer, config::WorkerConfig, op::Symbol)
function Distributed.manage(manager::HTCManager, id::Integer, config::Distributed.WorkerConfig, op::Symbol)
if op == :finalize
if !isnothing(config.io)
close(config.io)
Expand Down
Loading

0 comments on commit 7245492

Please sign in to comment.