Skip to content

Commit

Permalink
* split unit-test to regression and partial versions
Browse files Browse the repository at this point in the history
* set the trigger time of regression test to 7:00 on every Friday in Beijing time
* replace the unit test before with the partial test, which only test on the corresponding unit tests of changed files. If there is no corresponding unit tests, run all tests.
  • Loading branch information
HYLcool committed Feb 28, 2025
1 parent e283e8d commit 77d7b50
Show file tree
Hide file tree
Showing 4 changed files with 145 additions and 12 deletions.
77 changes: 77 additions & 0 deletions .github/workflows/unit-test-partial.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
# This workflow will install Python dependencies, run tests and lint with a single version of Python
# For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python

name: unittest-partial

on:
workflow_dispatch:
pull_request:
push:
branches:
- main

permissions:
contents: read

env:
ACTIONS_ALLOW_USE_UNSECURE_NODE_VERSION: true

jobs:
unittest-single:
runs-on: [GPU, unittest]
environment: Testing
steps:
- uses: actions/checkout@v3
with:
path: dj-${{ github.run_id }}

- name: Setup docker compose
working-directory: dj-${{ github.run_id }}/.github/workflows/docker
run: |
docker compose up -d
- name: Install data-juicer
working-directory: dj-${{ github.run_id }}/.github/workflows/docker
run: |
docker compose exec ray-head pip install -e .\[all\]
docker compose exec ray-worker pip install -e .\[all\]
- name: Clean dataset cache
working-directory: dj-${{ github.run_id }}/.github/workflows/docker
run: |
docker compose exec ray-head rm -rf /data/huggingface/dataset
- name: Run unittest standalone
working-directory: dj-${{ github.run_id }}/.github/workflows/docker
run: |
docker compose exec -e OPENAI_BASE_URL=${{ secrets.OPENAI_BASE_URL }} -e OPENAI_API_KEY=${{ secrets.OPENAI_API_KEY }} ray-head python tests/run.py --tag standalone --mode partial
- name: Upload coverage report of standalone
uses: actions/upload-artifact@v4
with:
name: coverage_report_standalone
include-hidden-files: true
path: dj-${{ github.run_id }}/coverage_report_standalone

- name: Run unittest ray
working-directory: dj-${{ github.run_id }}/.github/workflows/docker
run: |
docker compose exec ray-head python tests/run.py --tag ray --mode regression
- name: Upload coverage report of ray
uses: actions/upload-artifact@v4
with:
name: coverage_report_ray
include-hidden-files: true
path: dj-${{ github.run_id }}/coverage_report_ray

- name: Remove docker compose
working-directory: dj-${{ github.run_id }}/.github/workflows/docker
if: always()
run: |
docker compose down --remove-orphans
- name: Cleanup workspace
if: always()
run: |
rm -rf dj-${{ github.run_id }}
13 changes: 6 additions & 7 deletions .github/workflows/unit-test.yml
Original file line number Diff line number Diff line change
@@ -1,14 +1,13 @@
# This workflow will install Python dependencies, run tests and lint with a single version of Python
# For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python
# Test only on 7:00 morning of Friday each week in Beijing time, which is 23:00 of Thursday in UTC time.

name: unittest
name: unittest-regression

on:
workflow_dispatch:
pull_request:
push:
branches:
- main
schedule:
- cron: '0 23 * * 4'

permissions:
contents: read
Expand Down Expand Up @@ -44,7 +43,7 @@ jobs:
- name: Run unittest standalone
working-directory: dj-${{ github.run_id }}/.github/workflows/docker
run: |
docker compose exec -e OPENAI_BASE_URL=${{ secrets.OPENAI_BASE_URL }} -e OPENAI_API_KEY=${{ secrets.OPENAI_API_KEY }} ray-head python tests/run.py --tag standalone
docker compose exec -e OPENAI_BASE_URL=${{ secrets.OPENAI_BASE_URL }} -e OPENAI_API_KEY=${{ secrets.OPENAI_API_KEY }} ray-head python tests/run.py --tag standalone --mode regression
- name: Upload coverage report of standalone
uses: actions/upload-artifact@v4
Expand All @@ -56,7 +55,7 @@ jobs:
- name: Run unittest ray
working-directory: dj-${{ github.run_id }}/.github/workflows/docker
run: |
docker compose exec ray-head python tests/run.py --tag ray
docker compose exec ray-head python tests/run.py --tag ray --mode regression
- name: Upload coverage report of ray
uses: actions/upload-artifact@v4
Expand Down
37 changes: 37 additions & 0 deletions data_juicer/utils/unittest_utils.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import os
import shutil
import subprocess
import unittest

import numpy
Expand Down Expand Up @@ -122,3 +123,39 @@ def convert_record(rec):
first = sorted(first, key=lambda x: tuple(sorted(x.items())))
second = sorted(second, key=lambda x: tuple(sorted(x.items())))
return self.assertEqual(first, second)


# for partial unittest
def get_diff_files(prefix_filter=['data_juicer/', 'tests/']):
"""Get git diff files in target dirs except the __init__.py files"""
changed_files = subprocess.check_output(
['git', 'diff', '--name-only', '--diff-filter=ACMRT', 'origin/main'],
universal_newlines=True,
).strip().split('\n')
return [
f for f in changed_files
if any([f.startswith(prefix) for prefix in prefix_filter])
and f.endswith('.py') and not f.endswith('__init__.py')
]


def find_corresponding_test_file(file_path):
test_file = file_path.replace('data_juicer', 'tests')
basename = os.path.basename(test_file)
dir = os.path.dirname(test_file)
test_file = os.path.join(dir, 'test_' + basename)
if os.path.exists(test_file):
return test_file
else:
return None


def get_partial_test_cases():
diff_files = get_diff_files()
test_files = [
find_corresponding_test_file(file_path) for file_path in diff_files
]
if None in test_files:
# can't find corresponding test files for some changed files: run all
return None
return test_files
30 changes: 25 additions & 5 deletions tests/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@

from loguru import logger

from data_juicer.utils.unittest_utils import set_clear_model_flag
from data_juicer.utils.unittest_utils import set_clear_model_flag, get_partial_test_cases

file_dir = os.path.join(os.path.dirname(__file__), '..')
sys.path.append(file_dir)
Expand All @@ -24,6 +24,11 @@
default="standalone",
help="the tag of tests being run")
parser.add_argument('--pattern', default='test_*.py', help='test file pattern')
parser.add_argument('--mode', default='partial',
help='test mode. Should be one of the ["partial", '
'"regression"]. "partial" means only test on the '
'unit tests of the changed files. "regression" means '
'test on all unit tests.')
parser.add_argument('--test_dir',
default='tests',
help='directory to be tested')
Expand All @@ -37,9 +42,12 @@
set_clear_model_flag(args.clear_model)

class TaggedTestLoader(unittest.TestLoader):
def __init__(self, tag="standalone"):
def __init__(self, tag="standalone", included_test_files=None):
super().__init__()
self.tag = tag
if isinstance(included_test_files, str):
included_test_files = [included_test_files]
self.included_test_files = included_test_files

def loadTestsFromTestCase(self, testCaseClass):
# set tag to testcase class
Expand All @@ -53,9 +61,21 @@ def loadTestsFromTestCase(self, testCaseClass):
loaded_suite.addTest(test_case)
return loaded_suite

def gather_test_cases(test_dir, pattern, tag):
def _match_path(self, path, full_path, pattern):
# override this method to use alternative matching strategy
match = super()._match_path(path, full_path, pattern)
if self.included_test_files:
for included_test_file in self.included_test_files:
if included_test_file in full_path:
return match
return False
else:
return match

def gather_test_cases(test_dir, pattern, tag, mode='partial'):
test_to_run = unittest.TestSuite()
test_loader = TaggedTestLoader(tag)
partial_test_files = get_partial_test_cases() if mode == 'partial' else None
test_loader = TaggedTestLoader(tag, included_test_files=partial_test_files)
discover = test_loader.discover(test_dir, pattern=pattern, top_level_dir=None)
for suite_discovered in discover:
print('suite_discovered', suite_discovered)
Expand All @@ -76,7 +96,7 @@ def main():

runner = unittest.TextTestRunner()
test_suite = gather_test_cases(os.path.abspath(args.test_dir),
args.pattern, args.tag)
args.pattern, args.tag, args.mode)
res = runner.run(test_suite)

cov.stop()
Expand Down

0 comments on commit 77d7b50

Please sign in to comment.