diff --git a/README.md b/README.md index c2bb6df..1db3c0f 100644 --- a/README.md +++ b/README.md @@ -159,11 +159,11 @@ You can download and compile these solvers from the [MaxSAT 2023 evaluation](htt You can import Popper and use it in your Python code like so: ```python -from popper.util import Settings, print_prog_score +from popper.util import Settings from popper.loop import learn_solution settings = Settings(kbpath='input_dir') prog, score, stats = learn_solution(settings) if prog != None: - print_prog_score(prog, score) + settings.print_prog_score(prog, score) ``` diff --git a/popper.py b/popper.py index 7061e83..78df943 100755 --- a/popper.py +++ b/popper.py @@ -6,7 +6,7 @@ if __name__ == '__main__': settings = Settings(cmd_line=True) prog, score, stats = learn_solution(settings) - if prog != None: + if prog is not None: settings.print_prog_score(prog, score) else: print('NO SOLUTION') diff --git a/popper/abs_generate.py b/popper/abs_generate.py new file mode 100644 index 0000000..6cdc19e --- /dev/null +++ b/popper/abs_generate.py @@ -0,0 +1,56 @@ +import abc +from typing import FrozenSet, Tuple, TYPE_CHECKING, List + +import clingo + +if TYPE_CHECKING: + from . util import Settings + +Rule = Tuple['Literal', FrozenSet['Literal']] +RuleBase = FrozenSet[Rule] + +class Generator(abc.ABC): + settings: 'Settings' + + # @profile + def get_prog(self) -> RuleBase: + pass + + def gen_symbol(self, literal, backend): + pass + + def update_solver(self, size): + pass + self.update_number_of_literals(size) + + def update_number_of_literals(self, size): + pass + + def update_number_of_vars(self, size): + pass + + def update_number_of_rules(self, size): + pass + + def prune_size(self, size): + pass + + # @profile + def get_ground_rules(self, rule): + pass + + def parse_handles(self, new_handles): + pass + + def constrain(self, tmp_new_cons): + pass + + @abc.abstractmethod + def build_encoding(self, bkcons: List, settings: "Settings") -> str: + """Build and return a string for an ASP solver, used to generate hypotheses.""" + pass + + @abc.abstractmethod + def init_solver(self, encoding: str) -> clingo.Control: + """Incorporate the `encoding` into a new solver (`clingo.Control`) and return it.""" + pass \ No newline at end of file diff --git a/popper/bkcons.py b/popper/bkcons.py index ec8719d..c5fef49 100644 --- a/popper/bkcons.py +++ b/popper/bkcons.py @@ -1,16 +1,16 @@ +import numbers +import sys +import traceback +from collections import defaultdict +from itertools import product +from traceback import format_tb + import clingo import clingo.script -import numbers -import operator -import pkg_resources -import time -from . util import rule_is_recursive, format_rule, Constraint, order_prog, Literal, suppress_stdout_stderr from clingo import Function, Number, Tuple_ -from collections import defaultdict -from itertools import permutations, product -from pysat.card import * -from pysat.formula import CNF -from pysat.solvers import Solver + +from .util import suppress_stdout_stderr + clingo.script.enable_python() tmp_map = {} @@ -52,7 +52,8 @@ # return (head_pred, arity), body_preds -from itertools import permutations, combinations +from itertools import permutations + all_myvars = ['A','B','C','D','E','F','G','H'] def connected(xs, ys): @@ -628,7 +629,6 @@ def atom_to_symbol(pred, args): return Function(name = pred, arguments = xs) def deduce_bk_cons(settings, tester): - import re prog = [] lookup2 = {k: f'({v})' for k,v in tmp_map.items()} lookup1 = {k:v for k,v in lookup2.items()} @@ -1110,4 +1110,4 @@ def deduce_non_singletons(settings): cons.append(con) # exit() - return cons \ No newline at end of file + return cons diff --git a/popper/combine.py b/popper/combine.py index 4d095a0..e42d10c 100644 --- a/popper/combine.py +++ b/popper/combine.py @@ -258,7 +258,7 @@ def find_combination(self, timeout): else: if not self.settings.lex: - print("ERROR: Combining rec or pi programs not supported with MDL objective. Exiting.") + self.settings.logger.error("ERROR: Combining rec or pi programs not supported with MDL objective. Exiting.") assert(False) model_inconsistent = self.tester.test_prog_inconsistent(model_prog) diff --git a/popper/gen2.py b/popper/gen2.py index 96991a6..ef06a64 100644 --- a/popper/gen2.py +++ b/popper/gen2.py @@ -1,12 +1,16 @@ -import time +import numbers import re +from itertools import permutations +from typing import Any, Set, Tuple, Optional + import clingo -import numbers import clingo.script -import pkg_resources -from . util import Constraint, Literal from clingo import Function, Number, Tuple_ -from itertools import permutations + +from .util import Constraint, Literal, Settings +from . abs_generate import Generator as AbstractGenerator +from .resources import resource_string + def arg_to_symbol(arg): if isinstance(arg, tuple): @@ -15,10 +19,13 @@ def arg_to_symbol(arg): return Number(arg) if isinstance(arg, str): return Function(arg) + raise TypeError(f"Cannot translate {arg} to clingo Symbol") + -def atom_to_symbol(pred, args): +def atom_to_symbol(pred, args) -> Function: xs = tuple(arg_to_symbol(arg) for arg in args) - return Function(name = pred, arguments = xs) + return Function(name=pred, arguments=xs) + DEFAULT_HEURISTIC = """ #heuristic size(N). [1000-N,true] @@ -29,35 +36,55 @@ def atom_to_symbol(pred, args): program_size_at_least(M):- size(N), program_bounds(M), M <= N. """ -class Generator: - def __init__(self, settings, bkcons=[]): +class Generator(AbstractGenerator): + settings: Settings + model: Optional[clingo.Model] + solver: Optional[clingo.Control] + handler: Optional[clingo.SolveHandle] + + def __init__(self, settings: Settings, bkcons=[]): self.savings = 0 self.settings = settings self.cached_clingo_atoms = {} self.handle = None self.pruned_sizes = set() + encoding = self.build_encoding(bkcons, settings) + + with open('/tmp/ENCODING-GEN.pro', 'w') as f: + f.write(encoding) + + solver = self.init_solver(encoding) + self.solver = solver + + self.model = None + + def init_solver(self, encoding): + if self.settings.single_solve: + solver = clingo.Control(['--heuristic=Domain', '-Wnone']) + solver.configuration.solve.models = 0 + solver.add('base', [], encoding) + solver.ground([('base', [])]) + return solver + + def build_encoding(self, bkcons, settings): encoding = [] - alan = pkg_resources.resource_string(__name__, "lp/alan.pl").decode() + alan = resource_string(__name__, "lp/alan.pl").decode() encoding.append(alan) - with open(settings.bias_file) as f: bias_text = f.read() - bias_text = re.sub(r'max_vars\(\d*\).','', bias_text) - bias_text = re.sub(r'max_body\(\d*\).','', bias_text) - bias_text = re.sub(r'max_clauses\(\d*\).','', bias_text) - + bias_text = re.sub(r'max_vars\(\d*\).', '', bias_text) + bias_text = re.sub(r'max_body\(\d*\).', '', bias_text) + bias_text = re.sub(r'max_clauses\(\d*\).', '', bias_text) # AC: NEED TO COMPLETELY REFACTOR THIS CODE - for p,a in settings.pointless: + for p, a in settings.pointless: bias_text = re.sub(rf'body_pred\({p},\s*{a}\)\.', '', bias_text) bias_text = re.sub(rf'constant\({p},.*?\).*', '', bias_text, flags=re.MULTILINE) - encoding.append(bias_text) encoding.append(f'max_clauses({settings.max_rules}).') encoding.append(f'max_body({settings.max_body}).') encoding.append(f'max_vars({settings.max_vars}).') - # ADD VARS, DIRECTIONS, AND TYPES head_arity = len(settings.head_literal.arguments) encoding.append(f'head_vars({head_arity}, {tuple(range(head_arity))}).') @@ -68,52 +95,35 @@ def __init__(self, settings, bkcons=[]): encoding.append(f'vars({arity}, {tuple(xs)}).') for i, x in enumerate(xs): encoding.append(f'var_pos({x}, {tuple(xs)}, {i}).') - type_encoding = set() if self.settings.head_types: types = tuple(self.settings.head_types) - str_types = str(types).replace("'","") + str_types = str(types).replace("'", "") for i, x in enumerate(self.settings.head_types): type_encoding.add(f'type_pos({str_types}, {i}, {x}).') for pred, types in self.settings.body_types.items(): types = tuple(types) - str_types = str(types).replace("'","") + str_types = str(types).replace("'", "") for i, x in enumerate(types): type_encoding.add(f'type_pos({str_types}, {i}, {x}).') encoding.extend(type_encoding) - for pred, xs in self.settings.directions.items(): for i, v in xs.items(): if v == '+': encoding.append(f'direction_({pred}, {i}, in).') if v == '-': encoding.append(f'direction_({pred}, {i}, out).') - max_size = (1 + settings.max_body) * settings.max_rules if settings.max_literals < max_size: encoding.append(f'custom_max_size({settings.max_literals}).') - if settings.noisy: encoding.append(NOISY_ENCODING) - encoding.extend(bkcons) - if settings.single_solve: encoding.append(DEFAULT_HEURISTIC) - encoding = '\n'.join(encoding) - - # with open('ENCODING-GEN.pl', 'w') as f: - # f.write(encoding) - - if self.settings.single_solve: - solver = clingo.Control(['--heuristic=Domain','-Wnone']) - - solver.configuration.solve.models = 0 - solver.add('base', [], encoding) - solver.ground([('base', [])]) - self.solver = solver + return encoding def update_solver(self, size): # not used when learning programs without pi or recursion @@ -121,12 +131,12 @@ def update_solver(self, size): def get_prog(self): if self.handle is None: - self.handle = iter(self.solver.solve(yield_ = True)) + self.handle = iter(self.solver.solve(yield_=True)) self.model = next(self.handle, None) if self.model is None: return None - return self.parse_model_single_rule(self.model.symbols(shown = True)) + return self.parse_model_single_rule(self.model.symbols(shown=True)) def parse_model_single_rule(self, model): settings = self.settings @@ -155,13 +165,13 @@ def constrain(self, tmp_new_cons): if con_type == Constraint.GENERALISATION or con_type == Constraint.BANISH: con_size = None - if self.settings.noisy and len(xs)>2: + if self.settings.noisy and len(xs) > 2: con_size = xs[2] ground_rules2 = tuple(self.build_generalisation_constraint3(con_prog, con_size)) new_ground_cons.update(ground_rules2) elif con_type == Constraint.SPECIALISATION: con_size = None - if self.settings.noisy and len(xs)>2: + if self.settings.noisy and len(xs) > 2: con_size = xs[2] ground_rules2 = tuple(self.build_specialisation_constraint3(con_prog, con_size)) new_ground_cons.update(ground_rules2) @@ -242,12 +252,12 @@ def find_deep_bindings4(self, body): if len(body_types) == 0 or head_types is None: num_vars = len({var for atom in body for var in atom.arguments}) for xs in permutations(range(self.settings.max_vars), num_vars): - x = {i:xs[i] for i in range(num_vars)} + x = {i: xs[i] for i in range(num_vars)} yield x return # if there are types, only find type-safe permutations - var_type_lookup = {i:head_type for i, head_type in enumerate(head_types)} + var_type_lookup = {i: head_type for i, head_type in enumerate(head_types)} head_vars = set(range(len(self.settings.head_literal.arguments))) body_vars = set() @@ -273,7 +283,7 @@ def find_deep_bindings4(self, body): k = (x, y) bad_type_matching.add(k) - lookup = {x:i for i, x in enumerate(body_vars)} + lookup = {x: i for i, x in enumerate(body_vars)} for xs in permutations(range(self.settings.max_vars), len(lookup)): assignment = {} @@ -288,15 +298,17 @@ def find_deep_bindings4(self, body): continue yield assignment -def remap_variables(rule): + +def remap_variables(rule: Tuple[Literal,Any]): + head: Literal head, body = rule - head_vars = set() + head_vars: Set = set() if head: - head_vars.extend(head.arguments) + head_vars |= head.arguments next_var = len(head_vars) - lookup = {i:i for i in head_vars} + lookup = {i: i for i in head_vars} new_body = set() for pred, args in body: @@ -304,9 +316,9 @@ def remap_variables(rule): for var in args: if var not in lookup: lookup[var] = next_var - next_var+=1 + next_var += 1 new_args.append(lookup[var]) new_atom = Literal(pred, tuple(new_args)) new_body.add(new_atom) - return head, frozenset(new_body) \ No newline at end of file + return head, frozenset(new_body) diff --git a/popper/gen3.py b/popper/gen3.py index 116ff5c..6bd57f1 100644 --- a/popper/gen3.py +++ b/popper/gen3.py @@ -1,17 +1,18 @@ -import time +import numbers +import operator import re -from pysat.formula import CNF -from pysat.solvers import Solver -from pysat.card import * +from collections import defaultdict +from typing import Optional + import clingo -import operator -import numbers import clingo.script -import pkg_resources -from collections import defaultdict -from . util import rule_is_recursive, Constraint, Literal + +from .abs_generate import Generator as AbstractGenerator +from .resources import resource_string +from .util import rule_is_recursive, Constraint, Literal + clingo.script.enable_python() -from clingo import Function, Number, Tuple_ +from clingo import Function, Number, Tuple_, Model from itertools import permutations DEFAULT_HEURISTIC = """ @@ -30,7 +31,8 @@ def atom_to_symbol(pred, args): xs = tuple(arg_to_symbol(arg) for arg in args) return Function(name = pred, arguments = xs) -class Generator: +class Generator(AbstractGenerator): + model: Optional[Model] def __init__(self, settings, bkcons=[]): self.settings = settings @@ -53,20 +55,45 @@ def __init__(self, settings, bkcons=[]): self.new_seen_rules = set() self.new_ground_cons = set() + encoding = self.build_encoding(bkcons, settings) + + # with open('ENCODING-GEN.pl', 'w') as f: + # f.write(encoding) + + solver = self.init_solver(encoding) + self.solver = solver + + def init_solver(self, encoding): + if self.settings.single_solve: + solver = clingo.Control(['--heuristic=Domain', '-Wnone']) + else: + solver = clingo.Control(['-Wnone']) + NUM_OF_LITERALS = """ + %%% External atom for number of literals in the program %%%%% + #external size_in_literals(n). + :- + size_in_literals(n), + #sum{K+1,Clause : body_size(Clause,K)} != n. + """ + solver.add('number_of_literals', ['n'], NUM_OF_LITERALS) + solver.configuration.solve.models = 0 + solver.add('base', [], encoding) + solver.ground([('base', [])]) + return solver + + def build_encoding(self, bkcons, settings): encoding = [] - alan = pkg_resources.resource_string(__name__, "lp/alan-old.pl").decode() + alan = resource_string(__name__, "lp/alan-old.pl").decode() encoding.append(alan) - with open(settings.bias_file) as f: bias_text = f.read() - bias_text = re.sub(r'max_vars\(\d*\).','', bias_text) - bias_text = re.sub(r'max_body\(\d*\).','', bias_text) - bias_text = re.sub(r'max_clauses\(\d*\).','', bias_text) + bias_text = re.sub(r'max_vars\(\d*\).', '', bias_text) + bias_text = re.sub(r'max_body\(\d*\).', '', bias_text) + bias_text = re.sub(r'max_clauses\(\d*\).', '', bias_text) encoding.append(bias_text) encoding.append(f'max_clauses({settings.max_rules}).') encoding.append(f'max_body({settings.max_body}).') encoding.append(f'max_vars({settings.max_vars}).') - # ADD VARS, DIRECTIONS, AND TYPES head_arity = len(settings.head_literal.arguments) encoding.append(f'head_vars({head_arity}, {tuple(range(head_arity))}).') @@ -78,68 +105,38 @@ def __init__(self, settings, bkcons=[]): for i, x in enumerate(xs): encoding.append(f'var_pos({x}, {tuple(xs)}, {i}).') - # types = tuple(self.settings.head_types) # str_types = str(types).replace("'","") # for x, i in enumerate(self.settings.head_types): # encoding.append(f'type_pos({str_types}, {i}, {x}).') - # for pred, types in self.settings.body_types.items(): # types = tuple(types) # str_types = str(types).replace("'","") # for i, x in enumerate(types): - # encoding.append(f'type_pos({str_types}, {i}, {x}).') - # for pred, xs in self.settings.directions.items(): # for i, v in xs.items(): # if v == '+': # encoding.append(f'direction_({pred}, {i}, in).') # if v == '-': # encoding.append(f'direction_({pred}, {i}, out).') - max_size = (1 + settings.max_body) * settings.max_rules if settings.max_literals < max_size: encoding.append(f'custom_max_size({settings.max_literals}).') - if settings.pi_enabled: encoding.append(f'#show head_literal/4.') - if settings.noisy: encoding.append(""" program_bounds(0..K):- max_size(K). program_size_at_least(M):- size(N), program_bounds(M), M <= N. """) - # if settings.bkcons: encoding.extend(bkcons) - if settings.single_solve: if settings.order_space: encoding.append(DEFAULT_HEURISTIC) - encoding = '\n'.join(encoding) - - # with open('ENCODING-GEN.pl', 'w') as f: - # f.write(encoding) - - if self.settings.single_solve: - solver = clingo.Control(['--heuristic=Domain','-Wnone']) - else: - solver = clingo.Control(['-Wnone']) - NUM_OF_LITERALS = """ - %%% External atom for number of literals in the program %%%%% - #external size_in_literals(n). - :- - size_in_literals(n), - #sum{K+1,Clause : body_size(Clause,K)} != n. - """ - solver.add('number_of_literals', ['n'], NUM_OF_LITERALS) - - solver.configuration.solve.models = 0 - solver.add('base', [], encoding) - solver.ground([('base', [])]) - self.solver = solver + return encoding def get_prog(self): if self.handle is None: @@ -715,4 +712,4 @@ def remap_variables(rule): new_atom = Literal(atom.predicate, tuple(new_args)) new_body.add(new_atom) - return head, frozenset(new_body) \ No newline at end of file + return head, frozenset(new_body) diff --git a/popper/generate.py b/popper/generate.py index 6a98646..574ed19 100644 --- a/popper/generate.py +++ b/popper/generate.py @@ -1,19 +1,21 @@ -import time +import numbers +import operator import re -from pysat.formula import CNF -from pysat.solvers import Solver -from pysat.card import * +from collections import defaultdict +from typing import Optional, Sequence, Set + import clingo -import operator -import numbers import clingo.script -import pkg_resources -from collections import defaultdict -from . util import rule_is_recursive, Constraint, Literal, format_rule, remap_variables + +from .util import rule_is_recursive, Constraint, Literal, remap_variables + clingo.script.enable_python() -from clingo import Function, Number, Tuple_ +from clingo import Function, Number, Tuple_, Model, Symbol from itertools import permutations import dataclasses +from . abs_generate import Generator as AbstractGenerator +from . abs_generate import Rule, RuleBase +from .resources import resource_string @dataclasses.dataclass(frozen=True) class Var: @@ -75,8 +77,9 @@ def build_rule_literals(rule, rule_var, pi=False): if rule_is_recursive(rule): yield gteq(rule_var, 1) -class Generator: +class Generator(AbstractGenerator): + model: Optional[Model] def __init__(self, settings, bkcons=[]): self.savings = 0 self.settings = settings @@ -102,21 +105,65 @@ def __init__(self, settings, bkcons=[]): # TODO: dunno self.new_ground_cons = set() + encoding = self.build_encoding(bkcons, settings) + + # with open('ENCODING-GEN.pl', 'w') as f: + # f.write(encoding) + + solver = self.init_solver(encoding) + self.solver = solver + + def init_solver(self, encoding): + if self.settings.single_solve: + solver = clingo.Control(['--heuristic=Domain', '-Wnone']) + else: + solver = clingo.Control(['-Wnone']) + NUM_OF_LITERALS = """ + %%% External atom for number of literals in the program %%%%% + #external size_in_literals(n). + :- + size_in_literals(n), + #sum{K+1,Clause : body_size(Clause,K)} != n. + """ + solver.add('number_of_literals', ['n'], NUM_OF_LITERALS) + + if self.settings.no_bias: + NUM_OF_VARS = """ + %%% External atom for number of variables in the program %%%%% + #external size_in_vars(v). + :- + size_in_vars(v), + #max{V : clause_var(_,V)} != v - 1. + """ + solver.add('number_of_vars', ['v'], NUM_OF_VARS) + + NUM_OF_RULES = """ + %%% External atom for number of rules in the program %%%%% + #external size_in_rules(r). + :- + size_in_rules(r), + #max{R : clause(R)} != r - 1. + """ + solver.add('number_of_rules', ['r'], NUM_OF_RULES) + solver.configuration.solve.models = 0 + solver.add('base', [], encoding) + solver.ground([('base', [])]) + return solver + + def build_encoding(self, bkcons, settings): encoding = [] - alan = pkg_resources.resource_string(__name__, "lp/alan-old.pl").decode() + alan = resource_string(__name__, "lp/alan-old.pl").decode() # alan = pkg_resources.resource_string(__name__, "lp/alan.pl").decode() encoding.append(alan) - with open(settings.bias_file) as f: bias_text = f.read() - bias_text = re.sub(r'max_vars\(\d*\).','', bias_text) - bias_text = re.sub(r'max_body\(\d*\).','', bias_text) - bias_text = re.sub(r'max_clauses\(\d*\).','', bias_text) + bias_text = re.sub(r'max_vars\(\d*\).', '', bias_text) + bias_text = re.sub(r'max_body\(\d*\).', '', bias_text) + bias_text = re.sub(r'max_clauses\(\d*\).', '', bias_text) encoding.append(bias_text) encoding.append(f'max_clauses({settings.max_rules}).') encoding.append(f'max_body({settings.max_body}).') encoding.append(f'max_vars({settings.max_vars}).') - # ADD VARS, DIRECTIONS, AND TYPES head_arity = len(settings.head_literal.arguments) encoding.append(f'head_vars({head_arity}, {tuple(range(head_arity))}).') @@ -128,48 +175,39 @@ def __init__(self, settings, bkcons=[]): for i, x in enumerate(xs): encoding.append(f'var_pos({x}, {tuple(xs)}, {i}).') - # types = tuple(self.settings.head_types) # str_types = str(types).replace("'","") # for x, i in enumerate(self.settings.head_types): # encoding.append(f'type_pos({str_types}, {i}, {x}).') - # for pred, types in self.settings.body_types.items(): # types = tuple(types) # str_types = str(types).replace("'","") # for i, x in enumerate(types): - # encoding.append(f'type_pos({str_types}, {i}, {x}).') - # for pred, xs in self.settings.directions.items(): # for i, v in xs.items(): # if v == '+': # encoding.append(f'direction_({pred}, {i}, in).') # if v == '-': # encoding.append(f'direction_({pred}, {i}, out).') - max_size = (1 + settings.max_body) * settings.max_rules if settings.max_literals < max_size: encoding.append(f'custom_max_size({settings.max_literals}).') - if settings.pi_enabled: encoding.append(f'#show head_literal/4.') - if settings.noisy: encoding.append(""" program_bounds(0..K):- max_size(K). program_size_at_least(M):- size(N), program_bounds(M), M <= N. """) - # if settings.bkcons: encoding.extend(bkcons) - # FG Heuristic for single solve # - considering a default order of minimum rules, then minimum literals, and then minimum variables # - considering a preference for minimum hspace size parameters configuration if settings.single_solve: if settings.order_space: - assert(False) + assert (False) pass # horder = bias_order(settings, max_size) # iorder = 1 @@ -198,59 +236,17 @@ def __init__(self, settings, bkcons=[]): #heuristic size(N). [1000-N,true] """ encoding.append(DEFAULT_HEURISTIC) - encoding = '\n'.join(encoding) - - # with open('ENCODING-GEN.pl', 'w') as f: - # f.write(encoding) - - if self.settings.single_solve: - solver = clingo.Control(['--heuristic=Domain','-Wnone']) - else: - solver = clingo.Control(['-Wnone']) - NUM_OF_LITERALS = """ - %%% External atom for number of literals in the program %%%%% - #external size_in_literals(n). - :- - size_in_literals(n), - #sum{K+1,Clause : body_size(Clause,K)} != n. - """ - solver.add('number_of_literals', ['n'], NUM_OF_LITERALS) - - if self.settings.no_bias: - NUM_OF_VARS = """ - %%% External atom for number of variables in the program %%%%% - #external size_in_vars(v). - :- - size_in_vars(v), - #max{V : clause_var(_,V)} != v - 1. - """ - solver.add('number_of_vars', ['v'], NUM_OF_VARS) - - NUM_OF_RULES = """ - %%% External atom for number of rules in the program %%%%% - #external size_in_rules(r). - :- - size_in_rules(r), - #max{R : clause(R)} != r - 1. - """ - solver.add('number_of_rules', ['r'], NUM_OF_RULES) - - - - solver.configuration.solve.models = 0 - solver.add('base', [], encoding) - solver.ground([('base', [])]) - self.solver = solver + return encoding # @profile - def get_prog(self): + def get_prog(self) -> Optional[RuleBase]: if self.handle is None: self.handle = iter(self.solver.solve(yield_ = True)) self.model = next(self.handle, None) if self.model is None: return None - atoms = self.model.symbols(shown = True) + atoms: Sequence[Symbol] = self.model.symbols(shown = True) if self.settings.single_solve: return self.parse_model_single_rule(atoms) @@ -270,7 +266,7 @@ def gen_symbol(self, literal, backend): self.seen_symbols[k] = symbol return symbol - def parse_model_recursion(self, model): + def parse_model_recursion(self, model) -> RuleBase: settings = self.settings rule_index_to_body = defaultdict(set) head = settings.head_literal @@ -292,10 +288,10 @@ def parse_model_recursion(self, model): return frozenset(prog) - def parse_model_single_rule(self, model): + def parse_model_single_rule(self, model: Sequence[Symbol]) -> RuleBase: settings = self.settings - head = settings.head_literal - body = set() + head: Literal = settings.head_literal + body: Set[Literal] = set() cached_literals = settings.cached_literals for atom in model: args = atom.arguments @@ -303,10 +299,10 @@ def parse_model_single_rule(self, model): atom_args = tuple(args[3].arguments) literal = cached_literals[(predicate, atom_args)] body.add(literal) - rule = head, frozenset(body) + rule: Rule = head, frozenset(body) return frozenset([rule]) - def parse_model_pi(self, model): + def parse_model_pi(self, model) -> RuleBase: settings = self.settings # directions = defaultdict(lambda: defaultdict(lambda: '?')) rule_index_to_body = defaultdict(set) @@ -1198,4 +1194,4 @@ def gteq(a, b): return Literal('>=', (a,b)) def body_size_literal(clause_var, body_size): - return Literal('body_size', (clause_var, body_size)) \ No newline at end of file + return Literal('body_size', (clause_var, body_size)) diff --git a/popper/loop.py b/popper/loop.py index 8aeb69e..bb99e7a 100644 --- a/popper/loop.py +++ b/popper/loop.py @@ -1,15 +1,22 @@ import time from collections import defaultdict -from bitarray.util import subset, any_and, ones from functools import cache from itertools import chain, combinations, permutations -from . util import timeout, format_rule, rule_is_recursive, prog_is_recursive, prog_has_invention, calc_prog_size, format_literal, Constraint, mdl_score, suppress_stdout_stderr, get_raw_prog, Literal, remap_variables, format_prog -from . tester import Tester -from . bkcons import deduce_bk_cons, deduce_recalls, deduce_type_cons, deduce_non_singletons -from . combine import Combiner +from typing import Any, List, Optional, Set, Tuple +from bitarray.util import any_and, ones, subset -def load_solver(settings, tester, coverage_pos, coverage_neg, prog_lookup): +from .bkcons import deduce_bk_cons, deduce_non_singletons, deduce_recalls, deduce_type_cons +from .combine import Combiner +from .tester import Tester +from .util import Constraint, Literal, Settings, calc_prog_size, format_literal, format_prog, format_rule, get_raw_prog, \ + mdl_score, order_prog, prog_has_invention, prog_is_recursive, remap_variables, rule_is_recursive, timeout +from .abs_generate import Generator +from .generate import Generator as Generator1 +from .gen2 import Generator as Generator2 +from .gen3 import Generator as Generator3 + +def load_solver(settings: Settings, tester: Tester, coverage_pos, coverage_neg, prog_lookup): if settings.debug: settings.logger.debug(f'Load exact solver: {settings.solver}') @@ -24,12 +31,12 @@ def load_solver(settings, tester, coverage_pos, coverage_neg, prog_lookup): settings.exact_maxsat_solver = 'rc2' settings.old_format = False elif settings.solver == 'uwr': - settings.exact_maxsat_solver='uwrmaxsat' - settings.exact_maxsat_solver_params="-v0 -no-sat -no-bin -m -bm" + settings.exact_maxsat_solver = 'uwrmaxsat' + settings.exact_maxsat_solver_params = "-v0 -no-sat -no-bin -m -bm" settings.old_format = False else: - settings.exact_maxsat_solver='wmaxcdcl' - settings.exact_maxsat_solver_params="" + settings.exact_maxsat_solver = 'wmaxcdcl' + settings.exact_maxsat_solver_params = "" settings.old_format = True if settings.noisy: @@ -62,8 +69,14 @@ def load_solver(settings, tester, coverage_pos, coverage_neg, prog_lookup): return Combiner(settings, tester, coverage_pos, coverage_neg, prog_lookup) + class Popper(): - def __init__(self, settings, tester): + settings: Settings + tester: Tester + # the following 2 type hints are conjectural -- rpg + num_pos: int + num_neg: int + def __init__(self, settings: Settings, tester: Tester): self.settings = settings self.tester = tester self.pruned2 = set() @@ -74,6 +87,21 @@ def __init__(self, settings, tester): self.tmp = {} self.seen_allsat = set() + def select_generator(self, bkcons): + settings = self.settings + gen: type[Generator] + if settings.single_solve: + gen = Generator2 + settings.logger.debug("using generator 2") + + elif settings.max_rules == 2 and not settings.pi_enabled: + gen = Generator3 + settings.logger.debug("using generator 3") + else: + gen = Generator1 + settings.logger.debug("using generate") + return gen(settings, bkcons) + def run(self, bkcons): settings, tester = self.settings, self.tester @@ -98,13 +126,7 @@ def run(self, bkcons): # generator that builds programs # AC: all very hacky until the refactoring is complete with settings.stats.duration('init'): - if settings.single_solve: - from . gen2 import Generator - elif settings.max_rules == 2 and not settings.pi_enabled: - from . gen3 import Generator - else: - from . generate import Generator - generator = self.generator = Generator(settings, bkcons) + generator = self.generator = self.select_generator(bkcons) # track the success sets of tested hypotheses @@ -146,7 +168,7 @@ def run(self, bkcons): min_coverage = self.settings.min_coverage = 1 - for size in range(2, max_size+1): + for size in range(2, max_size + 1): if size > settings.max_literals: continue @@ -164,8 +186,10 @@ def run(self, bkcons): new_cons = [] # generate a program + # prog: Optional[Program] with settings.stats.duration('generate'): prog = generator.get_prog() + settings.logger.info("Generated prog: {}".format(prog)) if prog is None: break @@ -190,14 +214,16 @@ def run(self, bkcons): with settings.stats.duration('test'): if settings.noisy: - pos_covered, neg_covered, inconsistent, skipped, skip_early_neg = tester.test_prog_noisy(prog, prog_size) + pos_covered, neg_covered, inconsistent, skipped, skip_early_neg = tester.test_prog_noisy(prog, + prog_size) else: pos_covered, inconsistent = tester.test_prog(prog) + settings.logger.debug(f"pos_covered: {pos_covered}, inconsistent: {inconsistent}") # @CH: can you explain these? skipped, skip_early_neg = False, False tp = pos_covered.count(1) - fn = num_pos-tp + fn = num_pos - tp # if non-separable program covers all examples, stop if not inconsistent and tp == num_pos and not skipped: @@ -209,7 +235,7 @@ def run(self, bkcons): if settings.noisy and not skipped: fp = neg_covered.count(1) - tn = num_neg-fp + tn = num_neg - fp score = tp, fn, tn, fp, prog_size mdl = mdl_score(fn, fp, prog_size) if settings.debug: @@ -226,7 +252,7 @@ def run(self, bkcons): settings.best_prog_score = score settings.solution = prog settings.best_mdl = mdl - settings.max_literals = mdl-1 + settings.max_literals = mdl - 1 settings.print_incomplete_solution2(prog, tp, fn, tn, fp, prog_size) new_cons.extend(self.build_constraints_previous_hypotheses(mdl, prog_size)) @@ -247,7 +273,8 @@ def run(self, bkcons): if tp == num_pos: add_gen = True - # if the program does not cover any positive examples, check whether it is has an unsat core + # if the program does not cover any positive examples, check whether + # it has an unsat core if not has_invention: if tp < min_coverage or (settings.noisy and tp <= prog_size): with settings.stats.duration('find mucs'): @@ -255,17 +282,19 @@ def run(self, bkcons): new_cons.extend(cons_) pruned_more_general = len(cons_) > 0 - if tp > 0 and success_sets and (not settings.noisy or (settings.noisy and fp==0)): + if tp > 0 and success_sets and (not settings.noisy or (settings.noisy and fp == 0)): with settings.stats.duration('check subsumed and covers_too_few'): subsumed = pos_covered in success_sets or any(subset(pos_covered, xs) for xs in success_sets) subsumed_by_two = not subsumed and self.check_subsumed_by_two(pos_covered, prog_size) - covers_too_few = not subsumed and not subsumed_by_two and not settings.noisy and self.check_covers_too_few(prog_size, pos_covered) + covers_too_few = not subsumed and not subsumed_by_two and not settings.noisy and self.check_covers_too_few( + prog_size, pos_covered) if subsumed or subsumed_by_two or covers_too_few: add_spec = True noisy_subsumed = True - if not settings.noisy and not has_invention and not is_recursive and (subsumed or subsumed_by_two or covers_too_few): + if not settings.noisy and not has_invention and not is_recursive and ( + subsumed or subsumed_by_two or covers_too_few): # TODO: FIND MOST GENERAL SUBSUMED RECURSIVE PROGRAM # xs = self.subsumed_or_covers_too_few2(prog, check_coverage=False, check_subsumed=True) @@ -278,8 +307,8 @@ def run(self, bkcons): # print('\t', 'moo', format_rule(rule)) # new_cons.append((Constraint.SPECIALISATION, [functional_rename_vars(rule) for rule in x])) - - # If a program is subsumed or dioesn't cover enough examples, we search for the most general subprogram that also is also subsumed or doesn't cover enough examples + # If a program is subsumed or doesn't cover enough examples, we search for the most general + # subprogram that is also subsumed or doesn't cover enough examples # only applies to non-recursive and non-PI programs subsumed_progs = [] with settings.stats.duration('find most general subsumed/covers_too_few'): @@ -339,7 +368,8 @@ def run(self, bkcons): spec_size_ = min([tp, fp + prog_size]) if spec_size_ <= prog_size: add_spec = True - elif len(prog) == 1 and spec_size_ < settings.max_body + 1 and spec_size_ < settings.max_literals: + elif len( + prog) == 1 and spec_size_ < settings.max_body + 1 and spec_size_ < settings.max_literals: spec_size = spec_size_ elif len(prog) > 1 and spec_size_ < settings.max_literals: spec_size = spec_size_ @@ -353,7 +383,7 @@ def run(self, bkcons): gen_size = gen_size_ else: # only prune if the generalisation bounds are smaller than existing bounds - gen_size_ = min([fn + prog_size, num_pos-fp, settings.best_mdl - mdl + num_pos + prog_size]) + gen_size_ = min([fn + prog_size, num_pos - fp, settings.best_mdl - mdl + num_pos + prog_size]) if gen_size_ <= prog_size: add_gen = True if gen_size_ < settings.max_literals: @@ -413,17 +443,17 @@ def run(self, bkcons): add_spec = True add_to_combiner = False - if settings.noisy and not skipped and not skip_early_neg and not is_recursive and not has_invention and tp > prog_size+fp and fp+prog_size < settings.best_mdl and not noisy_subsumed: + if settings.noisy and not skipped and not skip_early_neg and not is_recursive and not has_invention and tp > prog_size + fp and fp + prog_size < settings.best_mdl and not noisy_subsumed: local_delete = set() ignore_this_prog = (pos_covered, neg_covered) in success_sets_noise - # if pos_covered is a subset and neg_covered is a superset of a previously seen program (which must be of equal size or smaller) then we can ignore this program if not ignore_this_prog: # find all progs where pos_covered is a subset of pos_covered_ of the other prog # s_pos_ = set.intersection(*(covered_by_pos[ex] for ex in pos_covered)) - s_pos = set.intersection(*(covered_by_pos[ex] for ex, ex_covered_ in enumerate(pos_covered) if ex_covered_ == 1)) + s_pos = set.intersection( + *(covered_by_pos[ex] for ex, ex_covered_ in enumerate(pos_covered) if ex_covered_ == 1)) # now check whether neg_covered is a superset of the other program for prog1 in s_pos: n1 = coverage_neg[prog1] @@ -433,9 +463,10 @@ def run(self, bkcons): # print('skip1') break - if not ignore_this_prog and (inconsistent or fp>0): + if not ignore_this_prog and (inconsistent or fp > 0): # neg_covered is a subset of all programs in s_neg - s_neg = set.intersection(*(covered_by_neg[ex] for ex, ex_covered_ in enumerate(neg_covered) if ex_covered_ == 1)) + s_neg = set.intersection( + *(covered_by_neg[ex] for ex, ex_covered_ in enumerate(neg_covered) if ex_covered_ == 1)) # if neg_covered(new) ⊆ neg_covered(old) for prog1 in s_neg: # if pos_covered(old) ⊆ pos_covered(new) @@ -447,21 +478,21 @@ def run(self, bkcons): local_delete.add(prog1) continue - if fp == fp1 and (tp-prog_size) < (tp1-size1): + if fp == fp1 and (tp - prog_size) < (tp1 - size1): # NEW tp:50 fp:1 size:6 memberofdomainregion(V0,V1):- synsetdomaintopicof(V2,V3),synsetdomaintopicof(V1,V3),hypernym(V2,V4),membermeronym(V0,V5),synsetdomaintopicof(V2,V4). # OLD tp:49 fp:1 size:4 memberofdomainregion(V0,V1):- synsetdomaintopicof(V1,V3),instancehypernym(V2,V3),membermeronym(V0,V4). # print('skip2') ignore_this_prog = True break - if tp == tp1 and (fp+prog_size) >= (fp1+size1): + if tp == tp1 and (fp + prog_size) >= (fp1 + size1): # NEW tp:10 fp:1 mdl:350 less_toxic(V0,V1):- ring_subst_3(V1,V4),ring_substitutions(V1,V3),alk_groups(V0,V3),x_subst(V0,V2,V5). # OLD tp:10 fp:2 mdl:350 less_toxic(V0,V1):- ring_substitutions(V0,V4),x_subst(V0,V3,V2),ring_subst_3(V1,V5). # print('skip3') ignore_this_prog = True break - if (tp-fp-prog_size) <= (tp1-fp1-size1): + if (tp - fp - prog_size) <= (tp1 - fp1 - size1): # NEW tp:12 fp:3 size:7 less_toxic(V0,V1):- gt(V2,V5),gt(V2,V3),ring_subst_2(V1,V4),ring_substitutions(V0,V3),alk_groups(V0,V2),ring_substitutions(V1,V5). # OLD tp:11 fp:4 size:3 less_toxic(V0,V1):- ring_subst_2(V1,V3),r_subst_3(V0,V2). # print('skip4') @@ -473,8 +504,11 @@ def run(self, bkcons): # new is consistent # if pos_covered(old) ⊆ pos_covered(new) and size(old) >= size(new) then ignore old not_covered = tester.pos_examples_ ^ pos_covered - progs_not_subsumed = set.union(*(covered_by_pos[ex] for ex, ex_covered_ in enumerate(not_covered) if ex_covered_ == 1)) - all_progs = set.union(*(covered_by_pos[ex] for ex, ex_covered_ in enumerate(tester.pos_examples_) if ex_covered_ == 1)) + progs_not_subsumed = set.union( + *(covered_by_pos[ex] for ex, ex_covered_ in enumerate(not_covered) if ex_covered_ == 1)) + all_progs = set.union( + *(covered_by_pos[ex] for ex, ex_covered_ in enumerate(tester.pos_examples_) if + ex_covered_ == 1)) s_pos = all_progs.difference(progs_not_subsumed) for prog1 in s_pos: size1, tp1, fp1 = scores[prog1] @@ -482,7 +516,7 @@ def run(self, bkcons): local_delete.add(prog1) for k in local_delete: - assert(k in combiner.saved_progs|to_combine) + assert (k in combiner.saved_progs | to_combine) if k in combiner.saved_progs: combiner.saved_progs.remove(k) elif k in to_combine: @@ -526,7 +560,7 @@ def run(self, bkcons): if p == pos_covered: continue # print(p, pos_covered, p|pos_covered) - paired_success_sets[s+prog_size].add(p|pos_covered) + paired_success_sets[s + prog_size].add(p | pos_covered) elif not settings.noisy: # if consistent, covers at least one example, is not subsumed, and has no redundancy, try to find a solution @@ -548,7 +582,7 @@ def run(self, bkcons): elif prog2_hash in combiner.saved_progs: combiner.saved_progs.remove(prog2_hash) else: - assert(False) + assert (False) del success_sets[pos_covered2] del success_sets_aux[pos_covered2] del coverage_pos[prog2_hash] @@ -566,7 +600,7 @@ def run(self, bkcons): for p, s in success_sets.items(): if p == pos_covered: continue - paired_success_sets[s+prog_size].add(p|pos_covered) + paired_success_sets[s + prog_size].add(p | pos_covered) if self.min_size is None: self.min_size = prog_size @@ -587,7 +621,7 @@ def run(self, bkcons): else: settings.solution = prog uncovered = uncovered & ~pos_covered - tp = num_pos- uncovered.count(1) + tp = num_pos - uncovered.count(1) fn = uncovered.count(1) tn = num_neg fp = 0 @@ -597,14 +631,14 @@ def run(self, bkcons): if not uncovered.any(): settings.solution_found = True - settings.max_literals = hypothesis_size-1 + settings.max_literals = hypothesis_size - 1 min_coverage = settings.min_coverage = 2 # if we have a solution with two rules then a new rule must entail all the examples if min_coverage != num_pos and len(settings.solution) == 2: min_coverage = settings.min_coverage = num_pos # AC: sometimes adding these size constraints can take longer - for i in range(settings.max_literals+1, max_size+1): + for i in range(settings.max_literals + 1, max_size + 1): generator.prune_size(i) call_combine = not uncovered.any() @@ -618,22 +652,22 @@ def run(self, bkcons): with settings.stats.duration('combine'): is_new_solution_found = combiner.update_best_prog(to_combine) - to_combine=set() + to_combine = set() - new_hypothesis_found = is_new_solution_found != None + new_hypothesis_found = is_new_solution_found is not None # if we find a new solution, update the maximum program size # if only adding nogoods, eliminate larger programs if new_hypothesis_found: # if settings.noisy: - # print('new_hypothesis_found', settings.best_mdl, combiner.best_cost) + # print('new_hypothesis_found', settings.best_mdl, combiner.best_cost) new_hypothesis, conf_matrix = is_new_solution_found tp, fn, tn, fp, hypothesis_size = conf_matrix settings.best_prog_score = conf_matrix settings.solution = new_hypothesis best_score = mdl_score(fn, fp, hypothesis_size) # if settings.noisy: - # print('new_hypothesis_found', settings.best_mdl, best_score) + # print('new_hypothesis_found', settings.best_mdl, best_score) # print('here???') settings.print_incomplete_solution2(new_hypothesis, tp, fn, tn, fp, hypothesis_size) @@ -644,12 +678,12 @@ def run(self, bkcons): new_cons.extend(self.build_constraints_previous_hypotheses(settings.best_mdl, prog_size)) if settings.single_solve: # AC: sometimes adding these size constraints can take longer - for i in range(best_score, max_size+1): + for i in range(best_score, max_size + 1): generator.prune_size(i) # print("HERE!!!", tp, fn, tn, fp) if not settings.noisy and fp == 0 and fn == 0: settings.solution_found = True - settings.max_literals = hypothesis_size-1 + settings.max_literals = hypothesis_size - 1 min_coverage = settings.min_coverage = 2 # if we have a solution with two rules then a new rule must entail all the examples @@ -662,7 +696,7 @@ def run(self, bkcons): return # AC: sometimes adding these size constraints can take longer - for i in range(hypothesis_size, max_size+1): + for i in range(hypothesis_size, max_size + 1): generator.prune_size(i) # BUILD CONSTRAINTS @@ -671,9 +705,10 @@ def run(self, bkcons): if not skipped: if settings.noisy and not add_spec and spec_size and not pruned_more_general: - if spec_size <= settings.max_literals and ((is_recursive or has_invention or spec_size <= settings.max_body)): + if spec_size <= settings.max_literals and ( + (is_recursive or has_invention or spec_size <= settings.max_body)): new_cons.append((Constraint.SPECIALISATION, prog, spec_size)) - self.seen_hyp_spec[fp+prog_size+mdl].append([prog, tp, fn, tn, fp, prog_size]) + self.seen_hyp_spec[fp + prog_size + mdl].append([prog, tp, fn, tn, fp, prog_size]) if add_gen and not pruned_sub_inconsistent: if settings.noisy or settings.recursion_enabled or settings.pi_enabled: @@ -684,9 +719,10 @@ def run(self, bkcons): new_cons.append((Constraint.GENERALISATION, prog)) if settings.noisy and not add_gen and gen_size and not pruned_sub_inconsistent: - if gen_size <= settings.max_literals and (settings.recursion_enabled or settings.pi_enabled) and not pruned_more_general: + if gen_size <= settings.max_literals and ( + settings.recursion_enabled or settings.pi_enabled) and not pruned_more_general: new_cons.append((Constraint.GENERALISATION, prog, gen_size)) - self.seen_hyp_gen[fn+prog_size+mdl].append([prog, tp, fn, tn, fp, prog_size]) + self.seen_hyp_gen[fn + prog_size + mdl].append([prog, tp, fn, tn, fp, prog_size]) if add_redund1 and not pruned_more_general: new_cons.append((Constraint.REDUNDANCY_CONSTRAINT1, prog)) @@ -709,9 +745,9 @@ def run(self, bkcons): # COMBINE with settings.stats.duration('combine'): is_new_solution_found = combiner.update_best_prog(to_combine) - to_combine=set() + to_combine = set() - new_hypothesis_found = is_new_solution_found != None + new_hypothesis_found = is_new_solution_found is not None # if we find a new solution, update the maximum program size # if only adding nogoods, eliminate larger programs @@ -725,7 +761,7 @@ def run(self, bkcons): if not settings.noisy and fp == 0 and fn == 0: settings.solution_found = True - settings.max_literals = hypothesis_size-1 + settings.max_literals = hypothesis_size - 1 min_coverage = settings.min_coverage = 2 # if we have a solution with two rules then a new rule must entail all the examples if min_coverage != num_pos and len(settings.solution) == 2: @@ -733,10 +769,10 @@ def run(self, bkcons): # if size >= settings.max_literals and not settings.order_space: if size >= settings.max_literals: - assert(False) + assert (False) if settings.single_solve: break - assert(len(to_combine) == 0) + assert (len(to_combine) == 0) def check_redundant_literal(self, prog): tester, settings = self.tester, self.settings @@ -756,7 +792,6 @@ def check_redundant_literal(self, prog): # loop through each body literal for i in range(len(body)): - # potential redundant literal redundant_literal = body[i] @@ -779,7 +814,6 @@ def check_redundant_literal_aux(self, body, literal, allsat1, not_all_sat1, dept if len(body) == 0: return out - # print('\tA \t\t\t',format_prog(prog), '\t\t', format_literal(literal)) # prog_key = (prog, literal) prog_key = (body, literal) @@ -801,9 +835,9 @@ def check_redundant_literal_aux(self, body, literal, allsat1, not_all_sat1, dept return out if not connected(body | frozenset([literal])): - for new_body in combinations(body, len(body)-1): + for new_body in combinations(body, len(body) - 1): new_body = frozenset(new_body) - out.extend(self.check_redundant_literal_aux(new_body, literal, allsat1, not_all_sat1, depth+1)) + out.extend(self.check_redundant_literal_aux(new_body, literal, allsat1, not_all_sat1, depth + 1)) return out if any(body.issubset(seen_body) for seen_body in not_all_sat1): @@ -823,9 +857,9 @@ def check_redundant_literal_aux(self, body, literal, allsat1, not_all_sat1, dept allsat1.add(body) - for new_body in combinations(body, len(body)-1): + for new_body in combinations(body, len(body) - 1): new_body = frozenset(new_body) - out.extend(self.check_redundant_literal_aux(new_body, literal, allsat1, not_all_sat1, depth+1)) + out.extend(self.check_redundant_literal_aux(new_body, literal, allsat1, not_all_sat1, depth + 1)) if len(out) > 0: return out @@ -851,7 +885,8 @@ def check_neg_reducible(self, prog): if len(body) == 1: # AC: SPECIAL CASE FOR A SINGLE BODY LITERAL IMPLIED BY THE HEAD - if settings.non_datalog_flag and literal_args.issubset(head_vars) and tester.diff_subs_single(literal): + if settings.non_datalog_flag and literal_args.issubset(head_vars) and tester.diff_subs_single( + literal): bad_rule = (head, frozenset([literal])) bad_prog = frozenset([bad_rule]) return bad_prog @@ -884,7 +919,7 @@ def filter_combine_programs(self, combiner, to_combine): # assert(False) xs = combiner.saved_progs | to_combine min_size = min(self.cached_prog_size[prog] for prog in xs) - must_beat = self.settings.best_mdl-min_size + must_beat = self.settings.best_mdl - min_size to_delete = set() # FILTER COMBINE PROGRAMS @@ -902,7 +937,7 @@ def filter_combine_programs(self, combiner, to_combine): def check_subsumed_by_two(self, pos_covered, prog_size): paired_success_sets = self.paired_success_sets - for i in range(2, prog_size+2): + for i in range(2, prog_size + 2): if pos_covered in paired_success_sets[i]: return True for x in paired_success_sets[i]: @@ -911,7 +946,7 @@ def check_subsumed_by_two(self, pos_covered, prog_size): return False def check_subsumed_by_two_v2(self, prog_size, prog2_size, pos_covered, pos_covered2): - space = prog2_size-prog_size + space = prog2_size - prog_size if space < self.min_size: return False @@ -961,8 +996,8 @@ def check_covers_too_few_(self, prog_size, pos_covered): return True # MAX RULES = 2 - elif ((prog_size + (min_size*2)) > max_literals): - space_remaining = max_literals-prog_size + elif ((prog_size + (min_size * 2)) > max_literals): + space_remaining = max_literals - prog_size if space_remaining > self.settings.search_depth: return False @@ -976,15 +1011,16 @@ def check_covers_too_few_(self, prog_size, pos_covered): return True # # MAX RULES = 3 - elif ((prog_size + (min_size*3)) > max_literals): - space_remaining = max_literals-prog_size + elif ((prog_size + (min_size * 3)) > max_literals): + space_remaining = max_literals - prog_size - if space_remaining-min_size > self.settings.search_depth: + if space_remaining - min_size > self.settings.search_depth: return False # uncovered = self.tester.pos_examples-pos_covered uncovered = self.tester.pos_examples_ & ~pos_covered - success_sets = sorted(((pos_covered_, size) for (pos_covered_, size) in self.success_sets.items()), key=lambda x: x[1]) + success_sets = sorted(((pos_covered_, size) for (pos_covered_, size) in self.success_sets.items()), + key=lambda x: x[1]) n = len(success_sets) for i in range(n): @@ -993,11 +1029,11 @@ def check_covers_too_few_(self, prog_size, pos_covered): break if subset(uncovered, pos_covered2): return False - space_remaining_ = space_remaining-size2 + space_remaining_ = space_remaining - size2 if space_remaining_ < min_size: continue uncovered2 = uncovered & ~pos_covered2 - for j in range(i+1, n): + for j in range(i + 1, n): pos_covered3, size3 = success_sets[j] if size3 > space_remaining_: break @@ -1006,17 +1042,18 @@ def check_covers_too_few_(self, prog_size, pos_covered): return True # # MAX RULES = 4 - elif prog_size + (min_size*4) > max_literals: - space_remaining = max_literals-prog_size - space_remaining -= (min_size*2) + elif prog_size + (min_size * 4) > max_literals: + space_remaining = max_literals - prog_size + space_remaining -= (min_size * 2) if space_remaining > self.settings.search_depth: return False missing = self.tester.pos_examples_ & ~pos_covered - success_sets = sorted(((pos_covered_, size) for (pos_covered_, size) in self.success_sets.items()), key=lambda x: x[1]) - space_remaining = max_literals-prog_size + success_sets = sorted(((pos_covered_, size) for (pos_covered_, size) in self.success_sets.items()), + key=lambda x: x[1]) + space_remaining = max_literals - prog_size n = len(success_sets) @@ -1026,23 +1063,23 @@ def check_covers_too_few_(self, prog_size, pos_covered): break if subset(missing, pos_covered2): return False - space_remaining_ = space_remaining-size2 + space_remaining_ = space_remaining - size2 if space_remaining_ < min_size: continue missing2 = missing & ~pos_covered2 - for j in range(i+1, n): + for j in range(i + 1, n): pos_covered3, size3 = success_sets[j] if size3 > space_remaining_: break # if pos_covered3.isdisjoint(missing2): - # continue + # continue if subset(missing2, pos_covered3): return False - space_remaining__ = space_remaining_-size3 + space_remaining__ = space_remaining_ - size3 if space_remaining__ < min_size: continue missing3 = missing2 & ~pos_covered3 - for k in range(j+1, n): + for k in range(j + 1, n): pos_covered4, size4 = success_sets[k] if size4 > space_remaining__: break @@ -1050,11 +1087,10 @@ def check_covers_too_few_(self, prog_size, pos_covered): return False return True # elif ((prog_size + (min_size*5)) > max_literals): - # print('MAX RULES IS 5!') + # print('MAX RULES IS 5!') return False - # find unsat cores def explain_incomplete(self, prog): @@ -1122,13 +1158,13 @@ def build_constraints_previous_hypotheses(self, score, best_size): seen_hyp_spec, seen_hyp_gen = self.seen_hyp_spec, self.seen_hyp_gen cons = [] # print(f"new best score {score}") - for k in [k for k in seen_hyp_spec if k > score+num_pos+best_size]: + for k in [k for k in seen_hyp_spec if k > score + num_pos + best_size]: to_delete = [] for prog, tp, fn, tn, fp, size in seen_hyp_spec[k]: # mdl = mdl_score(tuple((tp, fn, tn, fp, size))) mdl = mdl_score(fn, fp, size) - if score+num_pos+best_size < fp+size+mdl: - spec_size = score-mdl+num_pos+best_size + if score + num_pos + best_size < fp + size + mdl: + spec_size = score - mdl + num_pos + best_size if spec_size <= size: to_delete.append([prog, tp, fn, tn, fp, size]) # _, con = generator.build_specialisation_constraint(prog, rule_ordering, spec_size=spec_size) @@ -1154,7 +1190,7 @@ def build_constraints_previous_hypotheses(self, score, best_size): seen_hyp_gen[k].remove(to_del) return cons - def subsumed_or_covers_too_few(self, prog, seen=set()): + def subsumed_or_covers_too_few(self, prog, seen:Optional[Set] = None): tester, success_sets, settings = self.tester, self.success_sets, self.settings head, body = list(prog)[0] body = list(body) @@ -1163,11 +1199,13 @@ def subsumed_or_covers_too_few(self, prog, seen=set()): return [] out = set() + if seen is None: + seen = set() head_vars = set(head.arguments) # try the body with one literal removed (the literal at position i) for i in range(len(body)): - new_body = body[:i] + body[i+1:] + new_body = body[:i] + body[i + 1:] new_body = frozenset(new_body) if len(new_body) == 0: @@ -1183,7 +1221,8 @@ def subsumed_or_covers_too_few(self, prog, seen=set()): new_prog = frozenset({new_rule}) # ensure at least one head variable is in the body - if not settings.non_datalog_flag and not any(x in head_vars for literal in new_body for x in literal.arguments): + if not settings.non_datalog_flag and not any( + x in head_vars for literal in new_body for x in literal.arguments): continue # check whether we have pruned any subset (HORRIBLE CODE) @@ -1209,9 +1248,11 @@ def subsumed_or_covers_too_few(self, prog, seen=set()): sub_prog_pos_covered = tester.get_pos_covered(new_prog) # with self.settings.stats.duration('old'): - subsumed = sub_prog_pos_covered in success_sets or any(subset(sub_prog_pos_covered, xs) for xs in success_sets) + subsumed = sub_prog_pos_covered in success_sets or any( + subset(sub_prog_pos_covered, xs) for xs in success_sets) subsumed_by_two = not subsumed and self.check_subsumed_by_two(sub_prog_pos_covered, new_prog_size) - covers_too_few = not subsumed and not subsumed_by_two and self.check_covers_too_few(new_prog_size, sub_prog_pos_covered) + covers_too_few = not subsumed and not subsumed_by_two and self.check_covers_too_few(new_prog_size, + sub_prog_pos_covered) if not (subsumed or subsumed_by_two or covers_too_few): continue @@ -1233,7 +1274,7 @@ def subsumed_or_covers_too_few(self, prog, seen=set()): elif covers_too_few: out.add((new_prog, ('COVERS TOO FEW (GENERALISATION)'))) else: - assert(False) + assert (False) return out def find_variants(self, rule): @@ -1270,10 +1311,14 @@ def build_test_prog(self, subprog): def explain_totally_incomplete(self, prog): return list(self.explain_totally_incomplete_aux2(prog, set(), set())) - def explain_totally_incomplete_aux2(self, prog, unsat2=set(), unsat=set()): + def explain_totally_incomplete_aux2(self, prog, unsat2: Optional[Set] = None, unsat: Optional[Set] = None): has_recursion = prog_is_recursive(prog) out = [] + if unsat is None: + unsat = set() + if unsat2 is None: + unsat2 = set() for subprog in generalisations(prog, allow_headless=True, recursive=has_recursion): # print('---') @@ -1283,7 +1328,7 @@ def explain_totally_incomplete_aux2(self, prog, unsat2=set(), unsat=set()): # raw_prog2 = get_raw_prog2(subprog) # if raw_prog2 in self.seen_prog: - # continue + # continue subprog = frozenset(subprog) if hash(subprog) in self.seen_prog: @@ -1294,11 +1339,11 @@ def explain_totally_incomplete_aux2(self, prog, unsat2=set(), unsat=set()): self.seen_prog.add(hash(subprog)) self.seen_prog.add(hash(raw_prog)) - # self.seen_prog.add(raw_prog2) + # self.seen_prog.add(raw_prog2) # for rule in subprog: - # print('\t', 'B', format_rule(rule)) + # print('\t', 'B', format_rule(rule)) def should_skip(): if len(subprog) > 0: @@ -1337,8 +1382,7 @@ def should_skip(): # # pass # for rule in subprog: - # print('\t', 'C', format_rule(rule)) - + # print('\t', 'C', format_rule(rule)) if not self.prog_is_ok(subprog): xs = self.explain_totally_incomplete_aux2(subprog, unsat2, unsat) @@ -1346,14 +1390,13 @@ def should_skip(): continue # for rule in subprog: - # print('\t', 'D', format_rule(rule)) + # print('\t', 'D', format_rule(rule)) if self.tester.has_redundant_literal(frozenset(subprog)): xs = self.explain_totally_incomplete_aux2(subprog, unsat2, unsat) out.extend(xs) continue - # if len(subprog) > 2 and self.tester.has_redundant_rule(subprog): # xs = self.explain_totally_incomplete_aux2(subprog, directions, sat, unsat, noisy) # out.extend(xs) @@ -1364,7 +1407,6 @@ def should_skip(): # headless = is_headless(subprog) headless = any(head is None for head, body in subprog) - # print('\t\t\t testing',format_prog(subprog)) if headless: @@ -1391,7 +1433,6 @@ def should_skip(): out.append((subprog, headless)) return out - def has_valid_directions(self, rule): if self.settings.has_directions: return self.has_valid_directions_(rule) @@ -1495,22 +1536,21 @@ def prog_is_ok(self, prog): if not has_recursion: return False - if self.needs_datalog(prog) and not tmp(prog): return False return True def print_incomplete_solution2(self, prog, tp, fn, tn, fp, size): - self.logger.info('*'*20) + self.logger.info('*' * 20) self.logger.info('New best hypothesis:') if self.noisy: - self.logger.info(f'tp:{tp} fn:{fn} tn:{tn} fp:{fp} size:{size} mdl:{size+fn+fp}') + self.logger.info(f'tp:{tp} fn:{fn} tn:{tn} fp:{fp} size:{size} mdl:{size + fn + fp}') else: self.logger.info(f'tp:{tp} fn:{fn} tn:{tn} fp:{fp} size:{size}') for rule in order_prog(prog): - self.logger.info(format_rule(order_rule(rule))) - self.logger.info('*'*20) + self.logger.info(format_rule(self.settings.order_rule(rule))) + self.logger.info('*' * 20) def needs_datalog(self, prog): if not self.settings.has_directions: @@ -1533,28 +1573,34 @@ def needs_datalog(self, prog): return True return False + def popper(settings, tester, bkcons): Popper(settings, tester).run(bkcons) -def get_bk_cons(settings, tester): + +def get_bk_cons(settings: Settings, tester: Tester): bkcons = [] + recalls: Optional[List[str]] + pointless: Set[Tuple[str, Any]] with settings.stats.duration('find_pointless_relations'): pointless = settings.pointless = tester.find_pointless_relations() - for p,a in pointless: + for p, a in pointless: if settings.showcons: print('remove pointless relation', p, a) - settings.body_preds.remove((p,a)) + settings.body_preds.remove((p, a)) # if settings.datalog: settings.logger.debug(f'Loading recalls') with settings.stats.duration('recalls'): recalls = deduce_recalls(settings) - if recalls == None: + if recalls is None: + settings.logger.debug('No recalls; datalog is False') settings.datalog = False else: + settings.logger.debug('Non-empty recalls: datalog is True') settings.datalog = True if settings.showcons: for x in recalls: @@ -1569,14 +1615,12 @@ def get_bk_cons(settings, tester): print('singletons', x) bkcons.extend(xs) - type_cons = tuple(deduce_type_cons(settings)) if settings.showcons: for x in type_cons: print('type_con', x) bkcons.extend(type_cons) - if not settings.datalog: settings.logger.debug(f'Loading recalls FAILURE') else: @@ -1602,6 +1646,7 @@ def handler(signum, frame): bkcons.extend(xs) return bkcons + def learn_solution(settings): t1 = time.time() settings.nonoise = not settings.noisy @@ -1611,12 +1656,13 @@ def learn_solution(settings): tester = Tester(settings) bkcons = get_bk_cons(settings, tester) - time_so_far = time.time()-t1 - timeout(settings, popper, (settings, tester, bkcons), timeout_duration=int(settings.timeout-time_so_far),) + print("Have incorporated bk cons.") + time_so_far = time.time() - t1 + timeout(settings, popper, (settings, tester, bkcons), timeout_duration=int(settings.timeout - time_so_far), ) return settings.solution, settings.best_prog_score, settings.stats -def generalisations(prog, allow_headless=True, recursive=False): +def generalisations(prog, allow_headless=True, recursive=False): if len(prog) == 1: rule = list(prog)[0] head, body = rule @@ -1633,7 +1679,7 @@ def generalisations(prog, allow_headless=True, recursive=False): # do not remove recursive literals if recursive and body[i].predicate == head.predicate: continue - new_body = body[:i] + body[i+1:] + new_body = body[:i] + body[i + 1:] new_rule = (head, frozenset(new_body)) new_prog = [new_rule] yield new_prog @@ -1644,9 +1690,10 @@ def generalisations(prog, allow_headless=True, recursive=False): subrule = prog[i] recursive = rule_is_recursive(subrule) for new_subrule in generalisations([subrule], allow_headless=False, recursive=recursive): - new_prog = prog[:i] + new_subrule + prog[i+1:] + new_prog = prog[:i] + new_subrule + prog[i + 1:] yield new_prog + def tmp(prog): for rule in prog: head, body = rule @@ -1656,6 +1703,7 @@ def tmp(prog): return False return True + def explain_none_functional(settings, tester, prog): new_cons = [] @@ -1685,12 +1733,13 @@ def explain_none_functional(settings, tester, prog): for r1 in base: for r2 in rec: - subprog = frozenset([r1,r2]) + subprog = frozenset([r1, r2]) if tester.is_non_functional(subprog): new_cons.append((Constraint.GENERALISATION, subprog)) return new_cons + # TODO: THIS CHECK IS NOT COMPLETE # IT DOES NOT ACCOUNT FOR VARIABLE RENAMING # R1 = (None, frozenset({('c3', ('A',)), ('c2', ('A',))})) @@ -1703,13 +1752,16 @@ def rule_subsumes(r1, r2): return False return b1.issubset(b2) + # P1 subsumes P2 if for every rule R2 in P2 there is a rule R1 in P1 such that R1 subsumes R2 def theory_subsumes(prog1, prog2): return all(any(rule_subsumes(r1, r2) for r1 in prog1) for r2 in prog2) + def seen_more_general_unsat(prog, unsat): return any(theory_subsumes(seen, prog) for seen in unsat) + def head_connected(rule): head, body = rule head_connected_vars = set(head.arguments) @@ -1718,7 +1770,7 @@ def head_connected(rule): while body_literals: connected = [] for literal in body_literals: - if any (x in head_connected_vars for x in literal.arguments): + if any(x in head_connected_vars for x in literal.arguments): head_connected_vars.update(literal.arguments) connected.append(literal) if not connected and body_literals: @@ -1726,6 +1778,7 @@ def head_connected(rule): body_literals.difference_update(connected) return True + def connected(body): if len(body) == 1: return True @@ -1737,7 +1790,7 @@ def connected(body): while body_literals: connected = [] for literal in body_literals: - if any (x in connected_vars for x in literal.arguments): + if any(x in connected_vars for x in literal.arguments): connected_vars.update(literal.arguments) connected.append(literal) if not connected and body_literals: @@ -1745,6 +1798,7 @@ def connected(body): body_literals.difference_update(connected) return True + def non_empty_powerset(iterable): s = tuple(iterable) - return chain.from_iterable(combinations(s, r) for r in range(1, len(s)+1)) + return chain.from_iterable(combinations(s, r) for r in range(1, len(s) + 1)) diff --git a/popper/resources.py b/popper/resources.py new file mode 100644 index 0000000..1fd0de7 --- /dev/null +++ b/popper/resources.py @@ -0,0 +1,26 @@ +from typing import Dict, Any + + +try: + from pkg_resources import resource_string, resource_filename + def close_resource_file(ref: Any): + pass +except: + import importlib.resources + from contextlib import ExitStack + from importlib.resources.abc import Traversable + def resource_string(package: str, path: str) -> bytes: + ref = importlib.resources.files(package).joinpath(path) + with ref.open('rb') as fp: + my_bytes = fp.read() + return my_bytes + managers: Dict[Traversable, ExitStack] = {} + def resource_filename(pkg: str, path: str) -> Traversable: + manager = ExitStack() + ref = manager.enter_context(importlib.resources.files(pkg) / path) + managers[ref] = manager + return ref + def close_resource_file(ref: Traversable): + if ref in managers: + managers[ref].close() + del managers[ref] diff --git a/popper/tester.py b/popper/tester.py index 49a9ca5..2b65249 100644 --- a/popper/tester.py +++ b/popper/tester.py @@ -1,14 +1,17 @@ import os -import time -import pkg_resources -from janus_swi import query_once, consult -from functools import cache +from collections import defaultdict from contextlib import contextmanager -from . util import order_prog, prog_is_recursive, rule_is_recursive, calc_rule_size, calc_prog_size, prog_hash, format_rule, format_literal, Literal +from functools import cache +from itertools import product +from typing import Tuple + from bitarray import bitarray, frozenbitarray from bitarray.util import ones -from collections import defaultdict -from itertools import product +from janus_swi import consult, query_once + +from .util import Literal, calc_prog_size, calc_rule_size, format_rule, order_prog, prog_hash, prog_is_recursive +from .resources import resource_string, resource_filename, close_resource_file + def format_literal_janus(literal): args = ','.join(f'_V{i}' for i in literal.arguments) @@ -24,7 +27,8 @@ def __init__(self, settings): bk_pl_path = self.settings.bk_file exs_pl_path = self.settings.ex_file - test_pl_path = pkg_resources.resource_filename(__name__, "lp/test.pl") + test_pl_path = str(resource_filename(__name__, "lp/test.pl")) + assert isinstance(test_pl_path, str) if not settings.pi_enabled: consult('prog', f':- dynamic {settings.head_literal.predicate}/{len(settings.head_literal.arguments)}.') @@ -33,6 +37,7 @@ def __init__(self, settings): if os.name == 'nt': # if on Windows, SWI requires escaped directory separators x = x.replace('\\', '\\\\') consult(x) + close_resource_file(x) query_once('load_examples') @@ -84,7 +89,7 @@ def test_prog_noisy(self, prog, prog_size): settings = self.settings neg_covered = None skipped, skip_early_neg = False, False - inconsistent = False + inconsistent: bool = False if settings.recursion_enabled or settings.pi_enabled: pos_covered, neg_covered = self.test_prog_all(prog) @@ -111,8 +116,9 @@ def test_prog_noisy(self, prog, prog_size): return pos_covered, neg_covered, inconsistent, skipped, skip_early_neg - def test_prog(self, prog): + def test_prog(self, prog) -> Tuple[frozenbitarray, bool]: + # sourcery skip: no-conditionals-in-tests if self.settings.recursion_enabled or self.settings.pi_enabled: if len(prog) == 1: @@ -156,7 +162,7 @@ def test_prog(self, prog): self.cached_pos_covered[hash(prog)] = pos_covered return pos_covered, inconsistent - def test_prog_all(self, prog): + def test_prog_all(self, prog) -> Tuple[frozenbitarray, frozenbitarray]: if len(prog) == 1: atom_str, body_str = self.parse_single_rule(prog) @@ -174,15 +180,15 @@ def test_prog_all(self, prog): pos_covered_bits = bitarray(self.num_pos) pos_covered_bits[pos_covered] = 1 - pos_covered = frozenbitarray(pos_covered_bits) + pos_covered_arr: frozenbitarray = frozenbitarray(pos_covered_bits) neg_covered_bits = bitarray(self.num_neg) neg_covered_bits[neg_covered] = 1 - neg_covered = frozenbitarray(neg_covered_bits) + neg_covered_arr: frozenbitarray = frozenbitarray(neg_covered_bits) - return pos_covered, neg_covered + return pos_covered_arr, neg_covered_arr - def test_prog_pos(self, prog): + def test_prog_pos(self, prog) -> frozenbitarray: if len(prog) == 1: atom_str, body_str = self.parse_single_rule(prog) @@ -192,7 +198,7 @@ def test_prog_pos(self, prog): with self.using(prog): pos_covered = query_once('pos_covered(S)')['S'] - pos_covered_bits = bitarray(self.num_pos) + pos_covered_bits: bitarray = bitarray(self.num_pos) pos_covered_bits[pos_covered] = 1 pos_covered = frozenbitarray(pos_covered_bits) return pos_covered @@ -374,35 +380,35 @@ def has_redundant_literal(self, prog): # print(q, False) return False - # # WE ASSUME THAT THERE IS A REUNDANT RULE - def find_redundant_rule_(self, prog): - prog_ = [] - for i, (head, body) in enumerate(prog): - c = f"{i}-[{','.join(('not_'+ format_literal(head),) + tuple(format_literal(lit) for lit in body))}]" - prog_.append(c) - prog_ = f"[{','.join(prog_)}]" - prog_ = janus_format_rule(prog_) - q = f'find_redundant_rule({prog_}, K1, K2)' - res = query_once(q) - k1 = res['K1'] - k2 = res['K2'] - return prog[k1], prog[k2] - - def find_redundant_rules(self, prog): - # assert(False) - # AC: if the overhead of this call becomes too high, such as when learning programs with lots of clauses, we can improve it by not comparing already compared clauses - base = [] - step = [] - for rule in prog: - if rule_is_recursive(rule): - step.append(rule) - else: - base.append(rule) - if len(base) > 1 and self.has_redundant_rule(base): - return self.find_redundant_rule_(base) - if len(step) > 1 and self.has_redundant_rule(step): - return self.find_redundant_rule_(step) - return None + # # WE ASSUME THAT THERE IS A REDUNDANT RULE + # def find_redundant_rule_(self, prog): + # prog_ = [] + # for i, (head, body) in enumerate(prog): + # c = f"{i}-[{','.join(('not_'+ format_literal(head),) + tuple(format_literal(lit) for lit in body))}]" + # prog_.append(c) + # prog_ = f"[{','.join(prog_)}]" + # prog_ = janus_format_rule(prog_) + # q = f'find_redundant_rule({prog_}, K1, K2)' + # res = query_once(q) + # k1 = res['K1'] + # k2 = res['K2'] + # return prog[k1], prog[k2] + # + # def find_redundant_rules(self, prog): + # # assert(False) + # # AC: if the overhead of this call becomes too high, such as when learning programs with lots of clauses, we can improve it by not comparing already compared clauses + # base = [] + # step = [] + # for rule in prog: + # if rule_is_recursive(rule): + # step.append(rule) + # else: + # base.append(rule) + # if len(base) > 1 and self.has_redundant_rule(base): + # return self.find_redundant_rule_(base) + # if len(step) > 1 and self.has_redundant_rule(step): + # return self.find_redundant_rule_(step) + # return None def find_pointless_relations(self): settings = self.settings @@ -452,7 +458,7 @@ def find_pointless_relations(self): if query_once(query1)['truth'] or query_once(query2)['truth']: continue except Exception as Err: - print('ERROR detecting pointless relations', Err) + settings.logger.error(f'ERROR detecting pointless relations: {Err}') return pointless a, b = (p,pa), (q,qa) @@ -517,4 +523,4 @@ def deduce_neg_example_recalls(settings, atoms): return all_recalls def generate_binary_strings(bit_count): - return list(product((0,1), repeat=bit_count))[1:-1] \ No newline at end of file + return list(product((0,1), repeat=bit_count))[1:-1] diff --git a/popper/util.py b/popper/util.py index a756c05..3910ffc 100644 --- a/popper/util.py +++ b/popper/util.py @@ -1,36 +1,40 @@ -import clingo -import clingo.script -import signal import argparse -import os import logging -from itertools import permutations, chain, combinations +import signal from collections import defaultdict -from typing import NamedTuple -from time import perf_counter from contextlib import contextmanager +from itertools import permutations +from time import perf_counter +from typing import NamedTuple, Optional, Dict, Tuple, Any + +import clingo +import clingo.script + +from popper.abs_generate import Rule + class Literal(NamedTuple): predicate: str arguments: tuple + clingo.script.enable_python() -TIMEOUT=1200 -EVAL_TIMEOUT=0.001 -MAX_LITERALS=40 -MAX_SOLUTIONS=1 -CLINGO_ARGS='' -MAX_RULES=2 -MAX_VARS=6 -MAX_BODY=6 -MAX_EXAMPLES=10000 -BATCH_SIZE=20000 -ANYTIME_TIMEOUT=10 -BKCONS_TIMEOUT=10 +TIMEOUT = 1200 +EVAL_TIMEOUT = 0.001 +MAX_LITERALS = 40 +MAX_SOLUTIONS = 1 +CLINGO_ARGS = '' +MAX_RULES = 2 +MAX_VARS = 6 +MAX_BODY = 6 +MAX_EXAMPLES = 10000 +BATCH_SIZE = 20000 +ANYTIME_TIMEOUT = 10 +BKCONS_TIMEOUT = 10 +savings = 0 -savings=0 class Constraint: GENERALISATION = 1 @@ -41,26 +45,38 @@ class Constraint: TMP_ANDY = 6 BANISH = 7 + def parse_args(): parser = argparse.ArgumentParser(description='Popper is an ILP system based on learning from failures') parser.add_argument('kbpath', help='Path to files to learn from') parser.add_argument('--noisy', default=False, action='store_true', help='tell Popper that there is noise') # parser.add_argument('--bkcons', default=False, action='store_true', help='deduce background constraints from Datalog background (EXPERIMENTAL!)') - parser.add_argument('--timeout', type=float, default=TIMEOUT, help=f'Overall timeout in seconds (default: {TIMEOUT})') - parser.add_argument('--max-literals', type=int, default=MAX_LITERALS, help=f'Maximum number of literals allowed in program (default: {MAX_LITERALS})') - parser.add_argument('--max-body', type=int, default=None, help=f'Maximum number of body literals allowed in rule (default: {MAX_BODY})') - parser.add_argument('--max-vars', type=int, default=None, help=f'Maximum number of variables allowed in rule (default: {MAX_VARS})') - parser.add_argument('--max-rules', type=int, default=None, help=f'Maximum number of rules allowed in a recursive program (default: {MAX_RULES})') - parser.add_argument('--eval-timeout', type=float, default=EVAL_TIMEOUT, help=f'Prolog evaluation timeout in seconds (default: {EVAL_TIMEOUT})') + parser.add_argument('--timeout', type=float, default=TIMEOUT, + help=f'Overall timeout in seconds (default: {TIMEOUT})') + parser.add_argument('--max-literals', type=int, default=MAX_LITERALS, + help=f'Maximum number of literals allowed in program (default: {MAX_LITERALS})') + parser.add_argument('--max-body', type=int, default=None, + help=f'Maximum number of body literals allowed in rule (default: {MAX_BODY})') + parser.add_argument('--max-vars', type=int, default=None, + help=f'Maximum number of variables allowed in rule (default: {MAX_VARS})') + parser.add_argument('--max-rules', type=int, default=None, + help=f'Maximum number of rules allowed in a recursive program (default: {MAX_RULES})') + parser.add_argument('--eval-timeout', type=float, default=EVAL_TIMEOUT, + help=f'Prolog evaluation timeout in seconds (default: {EVAL_TIMEOUT})') parser.add_argument('--stats', default=False, action='store_true', help='Print statistics at end of execution') parser.add_argument('--quiet', '-q', default=False, action='store_true', help='Hide information during learning') parser.add_argument('--debug', default=False, action='store_true', help='Print debugging information to stderr') - parser.add_argument('--showcons', default=False, action='store_true', help='Show constraints deduced during the search') - parser.add_argument('--solver', default='rc2', choices=['rc2', 'uwr', 'wmaxcdcl'], help='Select a solver for the combine stage (default: rc2)') - parser.add_argument('--anytime-solver', default=None, choices=['wmaxcdcl', 'nuwls'], help='Select an anytime MaxSAT solver (default: None)') - parser.add_argument('--anytime-timeout', type=int, default=ANYTIME_TIMEOUT, help=f'Maximum timeout (seconds) for each anytime MaxSAT call (default: {ANYTIME_TIMEOUT})') - parser.add_argument('--batch-size', type=int, default=BATCH_SIZE, help=f'Combine batch size (default: {BATCH_SIZE})') + parser.add_argument('--showcons', default=False, action='store_true', + help='Show constraints deduced during the search') + parser.add_argument('--solver', default='rc2', choices=['clingo', 'rc2', 'uwr', 'wmaxcdcl'], + help='Select a solver for the combine stage (default: rc2)') + parser.add_argument('--anytime-solver', default=None, choices=['wmaxcdcl', 'nuwls'], + help='Select an anytime MaxSAT solver (default: None)') + parser.add_argument('--anytime-timeout', type=int, default=ANYTIME_TIMEOUT, + help=f'Maximum timeout (seconds) for each anytime MaxSAT call (default: {ANYTIME_TIMEOUT})') + parser.add_argument('--batch-size', type=int, default=BATCH_SIZE, + help=f'Combine batch size (default: {BATCH_SIZE})') parser.add_argument('--functional-test', default=False, action='store_true', help='Run functional test') # parser.add_argument('--datalog', default=False, action='store_true', help='EXPERIMENTAL FEATURE: use recall to order literals in rules') # parser.add_argument('--no-bias', default=False, action='store_true', help='EXPERIMENTAL FEATURE: do not use language bias') @@ -68,8 +84,12 @@ def parse_args(): return parser.parse_args() -def timeout(settings, func, args=(), kwargs={}, timeout_duration=1): + +def timeout(settings, func, args=(), kwargs: Optional[Dict] = None, timeout_duration=1): result = None + if kwargs is None: + kwargs = {} + class TimeoutError(Exception): pass @@ -94,14 +114,17 @@ def handler(signum, frame): return result + def load_kbpath(kbpath): def fix_path(filename): full_filename = os.path.join(kbpath, filename) return full_filename.replace('\\', '\\\\') if os.name == 'nt' else full_filename + return fix_path("bk.pl"), fix_path("exs.pl"), fix_path("bias.pl") + class Stats: - def __init__(self, info = False, debug = False): + def __init__(self, info=False, debug=False): self.exec_start = perf_counter() self.total_programs = 0 self.durations = {} @@ -114,7 +137,7 @@ def show(self): total_op_time = sum(summary.total for summary in self.duration_summary()) for summary in self.duration_summary(): - percentage = int((summary.total/total_op_time)*100) + percentage = int((summary.total / total_op_time) * 100) message += f'{summary.operation}:\n\tCalled: {summary.called} times \t ' + \ f'Total: {summary.total:0.2f} \t Mean: {summary.mean:0.4f} \t ' + \ f'Max: {summary.maximum:0.3f} \t Percentage: {percentage}%\n' @@ -125,11 +148,11 @@ def show(self): def duration_summary(self): summary = [] - stats = sorted(self.durations.items(), key = lambda x: sum(x[1]), reverse=True) + stats = sorted(self.durations.items(), key=lambda x: sum(x[1]), reverse=True) for operation, durations in stats: called = len(durations) total = sum(durations) - mean = sum(durations)/len(durations) + mean = sum(durations) / len(durations) maximum = max(durations) summary.append(DurationSummary(operation.title(), called, total, mean, maximum)) return summary @@ -150,13 +173,14 @@ def duration(self, operation): # def format_prog2(prog): - # return '\n'.join(format_rule(order_rule2(rule)) for rule in order_prog(prog)) +# return '\n'.join(format_rule(order_rule2(rule)) for rule in order_prog(prog)) def format_literal(literal): pred, args = literal args = ','.join(f'V{i}' for i in args) return f'{pred}({args})' + def format_rule(rule): head, body = rule head_str = '' @@ -165,13 +189,16 @@ def format_rule(rule): body_str = ','.join(format_literal(literal) for literal in body) return f'{head_str}:- {body_str}.' + def calc_prog_size(prog): return sum(calc_rule_size(rule) for rule in prog) + def calc_rule_size(rule): head, body = rule return 1 + len(body) + def reduce_prog(prog): reduced = {} for rule in prog: @@ -180,9 +207,11 @@ def reduce_prog(prog): reduced[k] = rule return reduced.values() + def order_prog(prog): return sorted(list(prog), key=lambda rule: (rule_is_recursive(rule), len(rule[1]))) + def rule_is_recursive(rule): head, body = rule head_pred, _head_args = head @@ -190,16 +219,19 @@ def rule_is_recursive(rule): return False return any(head_pred == pred for pred, _args in body) + def prog_is_recursive(prog): if len(prog) < 2: return False return any(rule_is_recursive(rule) for rule in prog) + def prog_has_invention(prog): if len(prog) < 2: return False return any(rule_is_invented(rule) for rule in prog) + def rule_is_invented(rule): head, body = rule if not head: @@ -207,9 +239,11 @@ def rule_is_invented(rule): head_pred, _head_arg = head return head_pred.startswith('inv') + def mdl_score(fn, fp, size): return fn + fp + size + class DurationSummary: def __init__(self, operation, called, total, mean, maximum): self.operation = operation @@ -218,11 +252,23 @@ def __init__(self, operation, called, total, mean, maximum): self.mean = mean self.maximum = maximum + def flatten(xs): return [item for sublist in xs for item in sublist] + class Settings: - def __init__(self, cmd_line=False, info=True, debug=False, show_stats=True, max_literals=MAX_LITERALS, timeout=TIMEOUT, quiet=False, eval_timeout=EVAL_TIMEOUT, max_examples=MAX_EXAMPLES, max_body=None, max_rules=None, max_vars=None, functional_test=False, kbpath=False, ex_file=False, bk_file=False, bias_file=False, showcons=False, no_bias=False, order_space=False, noisy=False, batch_size=BATCH_SIZE, solver='rc2', anytime_solver=None, anytime_timeout=ANYTIME_TIMEOUT): + + showcons: bool + datalog: bool + show_failures: bool # display detailed FP and FN information + cached_literals: Dict[Tuple[str, Any], Literal] + + def __init__(self, cmd_line=False, info=True, debug=False, show_stats=True, max_literals=MAX_LITERALS, + timeout=TIMEOUT, quiet=False, eval_timeout=EVAL_TIMEOUT, max_examples=MAX_EXAMPLES, max_body=None, + max_rules=None, max_vars=None, functional_test=False, kbpath=False, ex_file=False, bk_file=False, + bias_file=False, showcons=False, no_bias=False, order_space=False, noisy=False, batch_size=BATCH_SIZE, + solver='rc2', anytime_solver=None, anytime_timeout=ANYTIME_TIMEOUT, show_failures=False): if cmd_line: args = parse_args() @@ -261,14 +307,15 @@ def __init__(self, cmd_line=False, info=True, debug=False, show_stats=True, max_ self.logger = logging.getLogger("popper") if quiet: - pass + log_level = logging.ERROR elif debug: log_level = logging.DEBUG # logging.basicConfig(format='%(asctime)s %(message)s', level=log_level, datefmt='%H:%M:%S') - logging.basicConfig(format='%(message)s', level=log_level, datefmt='%H:%M:%S') elif info: log_level = logging.INFO - logging.basicConfig(format='%(asctime)s %(message)s', level=log_level, datefmt='%H:%M:%S') + else: + log_level = logging.WARNING + logging.basicConfig(format='%(message)s', level=log_level, datefmt='%H:%M:%S') self.info = info self.debug = debug @@ -331,8 +378,6 @@ def __init__(self, cmd_line=False, info=True, debug=False, show_stats=True, max_ for x in solver.symbolic_atoms.by_signature('non_datalog', arity=0): self.non_datalog_flag = True - - # read directions from bias file when there is no PI # if not self.pi_enabled: self.directions = directions = defaultdict(dict) @@ -348,7 +393,6 @@ def __init__(self, cmd_line=False, info=True, debug=False, show_stats=True, max_ arg_dir = '-' directions[pred][i] = arg_dir - self.max_arity = 0 for x in solver.symbolic_atoms.by_signature('head_pred', arity=2): self.max_arity = max(self.max_arity, x.symbol.arguments[1].number) @@ -395,13 +439,12 @@ def __init__(self, cmd_line=False, info=True, debug=False, show_stats=True, max_ exit() # self.body_modes[pred] = tuple(directions[pred][i] for i in range(arity)) - # TODO: EVENTUALLY # print(directions) self.cached_atom_args = {} - for i in range(1, self.max_arity+1): + for i in range(1, self.max_arity + 1): for args in permutations(range(0, self.max_vars), i): k = tuple(clingo.Number(x) for x in args) self.cached_atom_args[k] = args @@ -426,11 +469,13 @@ def __init__(self, cmd_line=False, info=True, debug=False, show_stats=True, max_ literal = Literal(pred, args) self.cached_literals[(pred, k)] = literal if self.has_directions: - self.literal_inputs[(pred, args)] = frozenset(arg for i, arg in enumerate(args) if directions[pred][i] == '+') - self.literal_outputs[(pred, args)] = frozenset(arg for i, arg in enumerate(args) if directions[pred][i] == '-') + self.literal_inputs[(pred, args)] = frozenset( + arg for i, arg in enumerate(args) if directions[pred][i] == '+') + self.literal_outputs[(pred, args)] = frozenset( + arg for i, arg in enumerate(args) if directions[pred][i] == '-') # for k, vs in self.literal_inputs.items(): - # print(k, vs) + # print(k, vs) # print('head_inputs', head_inputs) # print('head_outputs', head_outputs) # exit() @@ -452,18 +497,15 @@ def __init__(self, cmd_line=False, info=True, debug=False, show_stats=True, max_ self.head_types, self.body_types = load_types(self) - if len(self.body_types) > 0 or not self.head_types is None: if self.head_types is None: print('WARNING: MISSING HEAD TYPE') # exit() - for p,a in self.body_preds: + for p, a in self.body_preds: if p not in self.body_types: print(f'WARNING: MISSING BODY TYPE FOR {p}') # exit() - - self.single_solve = not (self.recursion_enabled or self.pi_enabled) self.logger.debug(f'Max rules: {self.max_rules}') @@ -473,34 +515,35 @@ def __init__(self, cmd_line=False, info=True, debug=False, show_stats=True, max_ self.single_solve = not (self.recursion_enabled or self.pi_enabled) def print_incomplete_solution2(self, prog, tp, fn, tn, fp, size): - self.logger.info('*'*20) + self.logger.info('*' * 20) self.logger.info('New best hypothesis:') if self.noisy: - self.logger.info(f'tp:{tp} fn:{fn} tn:{tn} fp:{fp} size:{size} mdl:{size+fn+fp}') + self.logger.info(f'tp:{tp} fn:{fn} tn:{tn} fp:{fp} size:{size} mdl:{size + fn + fp}') else: self.logger.info(f'tp:{tp} fn:{fn} tn:{tn} fp:{fp} size:{size}') for rule in order_prog(prog): self.logger.info(format_rule(self.order_rule(rule))) - self.logger.info('*'*20) + self.logger.info('*' * 20) - def print_prog_score(self, prog, score): + def print_prog_score(self, prog, score: Tuple[int, int, int, int, int]) -> None: tp, fn, tn, fp, size = score - precision = 'n/a' - if (tp+fp) > 0: - precision = f'{tp / (tp+fp):0.2f}' + precision: str = 'n/a' + if (tp + fp) > 0: + precision = f'{tp / (tp + fp):0.2f}' recall = 'n/a' - if (tp+fn) > 0: - recall = f'{tp / (tp+fn):0.2f}' - print('*'*10 + ' SOLUTION ' + '*'*10) + if (tp + fn) > 0: + recall = f'{tp / (tp + fn):0.2f}' + print('*' * 10 + ' SOLUTION ' + '*' * 10) if self.noisy: - print(f'Precision:{precision} Recall:{recall} TP:{tp} FN:{fn} TN:{tn} FP:{fp} Size:{size} MDL:{size+fn+fp}') + print( + f'Precision:{precision} Recall:{recall} TP:{tp} FN:{fn} TN:{tn} FP:{fp} Size:{size} MDL:{size + fn + fp}') else: - print(f'Precision:{precision} Recall:{recall} TP:{tp} FN:{fn} TN:{tn} FP:{fp} Size:{size}') + print(f'Precision:{precision} Recall:{recall} TP:{tp} FN:{fn} TN:{tn} FP:{fp} Size:{size}') # print(self.format_prog(order_prog(prog))) for rule in order_prog(prog): print(format_rule(self.order_rule(rule))) # print(self.format_prog(order_prog(prog))) - print('*'*30) + print('*' * 30) def order_rule(self, rule): head, body = rule @@ -514,7 +557,6 @@ def order_rule(self, rule): if not self.has_directions: return rule - ordered_body = [] grounded_variables = set() @@ -527,7 +569,6 @@ def order_rule(self, rule): body_literals = set(body) - while body_literals: selected_literal = None for literal in body_literals: @@ -595,6 +636,7 @@ def tmp_score_(self, seen_vars, literal): pred, args = literal return self.recall[pred, tuple(1 if x in seen_vars else 0 for x in args)] + # def non_empty_powerset(iterable): # s = tuple(iterable) # return chain.from_iterable(combinations(s, r) for r in range(1, len(s)+1)) @@ -632,6 +674,7 @@ def load_types(settings): return head_types, body_types + # def bias_order(settings, max_size): # if not (settings.no_bias or settings.order_space): @@ -704,6 +747,8 @@ def load_types(settings): # return True import os + + # AC: I do not know what this code below really does, but it works class suppress_stdout_stderr(object): ''' @@ -715,25 +760,27 @@ class suppress_stdout_stderr(object): exited (at least, I think that is why it lets exceptions through). ''' + def __init__(self): # Open a pair of null files - self.null_fds = [os.open(os.devnull,os.O_RDWR) for x in range(2)] + self.null_fds = [os.open(os.devnull, os.O_RDWR) for x in range(2)] # Save the actual stdout (1) and stderr (2) file descriptors. self.save_fds = [os.dup(1), os.dup(2)] def __enter__(self): # Assign the null pointers to stdout and stderr. - os.dup2(self.null_fds[0],1) - os.dup2(self.null_fds[1],2) + os.dup2(self.null_fds[0], 1) + os.dup2(self.null_fds[1], 2) def __exit__(self, *_): # Re-assign the real stdout/stderr back to (1) and (2) - os.dup2(self.save_fds[0],1) - os.dup2(self.save_fds[1],2) + os.dup2(self.save_fds[0], 1) + os.dup2(self.save_fds[1], 2) # Close all file descriptors for fd in self.null_fds + self.save_fds: os.close(fd) + def rename_variables(rule): head, body = rule if head: @@ -751,11 +798,12 @@ def rename_variables(rule): continue elif var not in lookup: lookup[var] = next_var - next_var+=1 + next_var += 1 new_args.append(lookup[var]) new_body.append((pred, tuple(new_args))) return (head, new_body) + def get_raw_prog(prog): xs = set() for rule in prog: @@ -763,18 +811,19 @@ def get_raw_prog(prog): xs.add((h, frozenset(b))) return frozenset(xs) + def prog_hash(prog): new_prog = get_raw_prog(prog) return hash(new_prog) -def remap_variables(rule): + +def remap_variables(rule: Tuple[Any, Any]) -> Rule: head, body = rule head_vars = frozenset(head.arguments) if head else frozenset() - next_var = len(head_vars) - lookup = {i:i for i in head_vars} + lookup = {i: i for i in head_vars} new_body = [] for pred, args in body: @@ -782,12 +831,13 @@ def remap_variables(rule): for var in args: if var not in lookup: lookup[var] = next_var - next_var+=1 + next_var += 1 new_args.append(lookup[var]) new_atom = Literal(pred, tuple(new_args)) new_body.append(new_atom) return head, frozenset(new_body) + def format_prog(prog): - return '\n'.join(format_rule(rule) for rule in prog) \ No newline at end of file + return '\n'.join(format_rule(rule) for rule in prog)