From 94880cb4acb15008264b2ce278e6e25914d90b1e Mon Sep 17 00:00:00 2001 From: Edi Muskardin Date: Tue, 26 Sep 2023 13:56:50 +0200 Subject: [PATCH 01/62] add initial setup for PDAs --- aalpy/SULs/AutomataSUL.py | 22 +- aalpy/automata/Pda.py | 215 ++++++++++++++ aalpy/automata/__init__.py | 1 + aalpy/learning_algs/__init__.py | 3 +- aalpy/learning_algs/vpda/VpdaLStar.py | 181 ++++++++++++ .../vpda/VpdaObservationTable.py | 219 +++++++++++++++ aalpy/learning_algs/vpda/__init__.py | 0 aalpy/utils/BenchmarkPdaModels.py | 264 ++++++++++++++++++ aalpy/utils/BenchmarkSULs.py | 2 +- aalpy/utils/FileHandler.py | 28 +- aalpy/utils/__init__.py | 1 + pda_main_experiments.py | 16 ++ 12 files changed, 946 insertions(+), 6 deletions(-) create mode 100644 aalpy/automata/Pda.py create mode 100644 aalpy/learning_algs/vpda/VpdaLStar.py create mode 100644 aalpy/learning_algs/vpda/VpdaObservationTable.py create mode 100644 aalpy/learning_algs/vpda/__init__.py create mode 100644 aalpy/utils/BenchmarkPdaModels.py create mode 100644 pda_main_experiments.py diff --git a/aalpy/SULs/AutomataSUL.py b/aalpy/SULs/AutomataSUL.py index 1982a56f..c0c0be72 100644 --- a/aalpy/SULs/AutomataSUL.py +++ b/aalpy/SULs/AutomataSUL.py @@ -1,5 +1,5 @@ from aalpy.base import SUL -from aalpy.automata import Dfa, MealyMachine, MooreMachine, Onfsm, Mdp, StochasticMealyMachine, MarkovChain +from aalpy.automata import Dfa, MealyMachine, MooreMachine, Onfsm, Mdp, StochasticMealyMachine, MarkovChain, Pda class DfaSUL(SUL): @@ -164,3 +164,23 @@ def post(self): def step(self, letter): return self.smm.step(letter) + + +class PdaSUL(SUL): + def __init__(self, pda: Pda, include_top=True): + super().__init__() + self.pda = pda + self.include_top = include_top + + def pre(self): + self.pda.reset_to_initial() + + def post(self): + pass + + def step(self, letter): + output = self.pda.step(letter) + top = self.pda.top() + if self.include_top: + return output, top + return output diff --git a/aalpy/automata/Pda.py b/aalpy/automata/Pda.py new file mode 100644 index 00000000..8d716f31 --- /dev/null +++ b/aalpy/automata/Pda.py @@ -0,0 +1,215 @@ +from collections import defaultdict + +from aalpy.base import Automaton, AutomatonState + + +class PdaState(AutomatonState): + """ + Single state of a deterministic finite automaton. + """ + + def __init__(self, state_id, is_accepting=False): + super().__init__(state_id) + self.transitions = defaultdict(list) + self.is_accepting = is_accepting + + +class PdaTransition: + def __init__(self, start: PdaState, target: PdaState, symbol, action, stack_guard=None): + self.start = start + self.target = target + self.symbol = symbol + self.action = action + self.stack_guard = stack_guard + + +class Pda(Automaton): + empty = "$" + error_state = PdaState("ErrorSinkState", False) + + def __init__(self, initial_state: PdaState, states): + super().__init__(initial_state, states) + self.initial_state = initial_state + self.states = states + self.current_state = None + self.stack = [] + + def reset_to_initial(self): + super().reset_to_initial() + self.reset() + + def reset(self): + self.current_state = self.initial_state + self.stack = [self.empty] + return self.current_state.is_accepting and self.top() == self.empty + + def top(self): + return self.stack[-1] + + def pop(self): + return self.stack.pop() + + def possible(self, letter): + if self.current_state == Pda.error_state: + return True + if letter is not None: + transitions = self.current_state.transitions[letter] + trans = [t for t in transitions if t.stack_guard is None or self.top() == t.stack_guard] + assert len(trans) < 2 + if len(trans) == 0: + return False + else: + return True + return False + + def step(self, letter): + if self.current_state == Pda.error_state or not self.possible(letter): + return False + if letter is not None: + transitions = self.current_state.transitions[letter] + trans = [t for t in transitions if t.stack_guard is None or self.top() == t.stack_guard][0] + self.current_state = trans.target + if trans.action == 'push': + self.stack.append(letter) + elif trans.action == 'pop': + if len(self.stack) <= 1: # empty stack elem should always be there + self.current_state = Pda.error_state + return False + self.stack.pop() + + return self.current_state.is_accepting and self.top() == self.empty + + # def compute_output_seq(self, state, sequence): + # if not sequence: + # return [state.is_accepting] + # return super(Dfa, self).compute_output_seq(state, sequence) + + def to_state_setup(self): + state_setup_dict = {} + + # ensure prefixes are computed + # self.compute_prefixes() + + sorted_states = sorted(self.states, key=lambda x: len(x.prefix)) + for s in sorted_states: + state_setup_dict[s.state_id] = ( + s.is_accepting, {k: (v.target.state_id, v.action) for k, v in s.transitions.items()}) + + return state_setup_dict + + @staticmethod + def from_state_setup(state_setup: dict, init_state_id): + """ + First state in the state setup is the initial state. + Example state setup: + state_setup = { + "a": (True, {"x": ("b1",PUSH), "y": ("a", NONE)}), + "b1": (False, {"x": ("b2", PUSH), "y": "a"}), + "b2": (True, {"x": "b3", "y": "a"}), + "b3": (False, {"x": "b4", "y": "a"}), + "b4": (False, {"x": "c", "y": "a"}), + "c": (True, {"x": "a", "y": "a"}), + } + + Args: + + state_setup: map from state_id to tuple(output and transitions_dict) + + Returns: + + PDA + """ + # state_setup should map from state_id to tuple(is_accepting and transitions_dict) + + # build states with state_id and output + states = {key: PdaState(key, val[0]) for key, val in state_setup.items()} + states[Pda.error_state.state_id] = Pda.error_state # PdaState(Pda.error_state,False) + # add transitions to states + for state_id, state in states.items(): + if state_id == Pda.error_state.state_id: + continue + for _input, trans_spec in state_setup[state_id][1].items(): + for (target_state_id, action, stack_guard) in trans_spec: + # action = Action[action_string] + trans = PdaTransition(start=state, target=states[target_state_id], symbol=_input, action=action, + stack_guard=stack_guard) + state.transitions[_input].append(trans) + + init_state = states[init_state_id] + # states to list + states = [state for state in states.values()] + + pda = Pda(init_state, states) + return pda + + +# def generate_data_from_pda(automaton, num_examples, lens=None, classify_states=False, stack_limit=None, +# break_on_impossible=False, possible_prob=0.75): +# input_al = automaton.get_input_alphabet() +# output_al = [False, True] +# if classify_states: +# output_al = [s.state_id for s in automaton.states] +# +# if lens is None: +# lens = list(range(1, 15)) +# +# sum_lens = sum(lens) +# # key is length, value is number of examples for said length +# ex_per_len = dict() +# +# additional_seq = 0 +# for l in lens: +# ex_per_len[l] = int(num_examples * (l / sum_lens)) + 1 +# if ex_per_len[l] > pow(len(input_al), l): +# additional_seq += ex_per_len[l] - pow(len(input_al), l) +# ex_per_len[l] = 'comb' +# +# additional_seq = additional_seq // len([i for i in ex_per_len.values() if i != 'comb']) +# +# training_data = [] +# for l in ex_per_len.keys(): +# seqs = [] +# if ex_per_len[l] == 'comb': +# seqs = list(product(input_al, repeat=l)) +# for seq in seqs: +# +# out = automaton.reset() +# nr_steps = 0 +# for inp in seq: +# if automaton.possible(inp) or not break_on_impossible: +# nr_steps += 1 +# if stack_limit and len(automaton.stack) > stack_limit: +# break +# if break_on_impossible and not automaton.possible(inp): +# break +# out = automaton.step(inp) +# seq = seq[:nr_steps] +# training_data.append((tuple(seq), out if not classify_states else automaton.current_state.state_id)) +# +# else: +# for _ in range(ex_per_len[l] + additional_seq): +# # seq = [random.choice(input_al) for _ in range(l)] +# out = automaton.reset() +# nr_steps = 0 +# seq = [] +# for i in range(l): +# possible_inp = [inp for inp in input_al if automaton.possible(inp)] +# if len(possible_inp) == 0: +# inp = random.choice(input_al) +# else: +# if random.random() <= possible_prob: +# inp = random.choice(possible_inp) +# else: +# inp = random.choice(input_al) +# seq.append(inp) +# if automaton.possible(inp) or not break_on_impossible: +# nr_steps += 1 +# if stack_limit and len(automaton.stack) > stack_limit: +# break +# if break_on_impossible and not automaton.possible(inp): +# break +# out = automaton.step(inp) +# seq = seq[:nr_steps] +# training_data.append((tuple(seq), out if not classify_states else automaton.current_state.state_id)) +# +# return training_data, input_al, output_al diff --git a/aalpy/automata/__init__.py b/aalpy/automata/__init__.py index be6f2f7d..ed053744 100644 --- a/aalpy/automata/__init__.py +++ b/aalpy/automata/__init__.py @@ -5,3 +5,4 @@ from .Onfsm import Onfsm, OnfsmState from .StochasticMealyMachine import StochasticMealyMachine, StochasticMealyState from .MarkovChain import MarkovChain, McState +from .Pda import Pda \ No newline at end of file diff --git a/aalpy/learning_algs/__init__.py b/aalpy/learning_algs/__init__.py index a57a090a..bbc2fe4a 100644 --- a/aalpy/learning_algs/__init__.py +++ b/aalpy/learning_algs/__init__.py @@ -7,4 +7,5 @@ from .stochastic_passive.Alergia import run_Alergia, run_JAlergia from .stochastic_passive.ActiveAleriga import run_active_Alergia from .deterministic_passive.RPNI import run_RPNI -from .deterministic_passive.active_RPNI import run_active_RPNI \ No newline at end of file +from .deterministic_passive.active_RPNI import run_active_RPNI +from .vpda.VpdaLStar import run_vpda_Lstar \ No newline at end of file diff --git a/aalpy/learning_algs/vpda/VpdaLStar.py b/aalpy/learning_algs/vpda/VpdaLStar.py new file mode 100644 index 00000000..951c9d5c --- /dev/null +++ b/aalpy/learning_algs/vpda/VpdaLStar.py @@ -0,0 +1,181 @@ +import time + +from aalpy.base import Oracle, SUL +from aalpy.utils.HelperFunctions import extend_set, print_learning_info, print_observation_table, all_prefixes +from ..deterministic.CounterExampleProcessing import longest_prefix_cex_processing, rs_cex_processing, \ + counterexample_successfully_processed +from .VpdaObservationTable import VpdaObservationTable +from ...base.SUL import CacheSUL + +counterexample_processing_strategy = [None, 'rs', 'longest_prefix'] +closedness_options = ['suffix_all', 'suffix_single'] +print_options = [0, 1, 2, 3] + + +def run_vpda_Lstar(alphabet: list, sul: SUL, eq_oracle: Oracle, automaton_type, samples=None, + closing_strategy='shortest_first', cex_processing='rs', + e_set_suffix_closed=False, all_prefixes_in_obs_table=True, + max_learning_rounds=None, cache_and_non_det_check=True, return_data=False, print_level=2): + """ + Executes L* algorithm. + + Args: + + alphabet: input alphabet + + sul: system under learning + + eq_oracle: equivalence oracle + + automaton_type: type of automaton to be learned. Either 'dfa', 'mealy' or 'moore'. + + samples: input output traces provided to the learning algorithm. They are added to cache and could reduce + total interaction with the system. Syntax: list of [(input_sequence, output_sequence)] or None + + closing_strategy: closing strategy used in the close method. Either 'longest_first', 'shortest_first' or + 'single' (Default value = 'shortest_first') + + cex_processing: Counterexample processing strategy. Either None, 'rs' (Riverst-Schapire) or 'longest_prefix'. + (Default value = 'rs') + + e_set_suffix_closed: True option ensures that E set is suffix closed, + False adds just a single suffix per counterexample. + + all_prefixes_in_obs_table: if True, entries of observation table will contain the whole output of the whole + suffix, otherwise just the last output meaning that all prefixes of the suffix will be added. + If False, just a single suffix will be added. + + max_learning_rounds: number of learning rounds after which learning will terminate (Default value = None) + + cache_and_non_det_check: Use caching and non-determinism checks (Default value = True) + + return_data: if True, a map containing all information(runtime/#queries/#steps) will be returned + (Default value = False) + + print_level: 0 - None, 1 - just results, 2 - current round and hypothesis size, 3 - educational/debug + (Default value = 2) + + Returns: + + automaton of type automaton_type (dict containing all information about learning if 'return_data' is True) + + """ + + assert cex_processing in counterexample_processing_strategy + assert print_level in print_options + + if cache_and_non_det_check or samples is not None: + # Wrap the sul in the CacheSUL, so that all steps/queries are cached + sul = CacheSUL(sul) + eq_oracle.sul = sul + + if samples: + for input_seq, output_seq in samples: + sul.cache.add_to_cache(input_seq, output_seq) + + start_time = time.time() + eq_query_time = 0 + learning_rounds = 0 + hypothesis = None + + observation_table = VpdaObservationTable(alphabet, sul, automaton_type, all_prefixes_in_obs_table) + + # Initial update of observation table, for empty row + observation_table.update_obs_table() + cex = None + + while True: + if max_learning_rounds and learning_rounds == max_learning_rounds: + break + + # Make observation table consistent (iff there is no counterexample processing) + if not cex_processing: + inconsistent_rows = observation_table.get_causes_of_inconsistency() + while inconsistent_rows is not None: + added_suffix = extend_set(observation_table.E, inconsistent_rows) + observation_table.update_obs_table(e_set=added_suffix) + inconsistent_rows = observation_table.get_causes_of_inconsistency() + + # Close observation table + rows_to_close = observation_table.get_rows_to_close(closing_strategy) + while rows_to_close is not None: + rows_to_query = [] + for row in rows_to_close: + observation_table.S.append(row) + rows_to_query.extend([row + (a,) for a in alphabet]) + observation_table.update_obs_table(s_set=rows_to_query) + rows_to_close = observation_table.get_rows_to_close(closing_strategy) + + # Generate hypothesis + hypothesis = observation_table.gen_hypothesis(no_cex_processing_used=cex_processing is None) + # Find counterexample if none has previously been found (first round) and cex is successfully processed + # (not a counterexample in the current hypothesis) + if cex is None or counterexample_successfully_processed(sul, cex, hypothesis): + learning_rounds += 1 + + if print_level > 1: + print(f'Hypothesis {learning_rounds}: {len(hypothesis.states)} states.') + + if print_level == 3: + print_observation_table(observation_table, 'det') + + eq_query_start = time.time() + cex = eq_oracle.find_cex(hypothesis) + eq_query_time += time.time() - eq_query_start + + # If no counterexample is found, return the hypothesis + if cex is None: + break + + # make sure counterexample is a tuple in case oracle returns a list + cex = tuple(cex) + + if print_level == 3: + print('Counterexample', cex) + + # Process counterexample and ask membership queries + if not cex_processing: + s_to_update = [] + added_rows = extend_set(observation_table.S, all_prefixes(cex)) + s_to_update.extend(added_rows) + for p in added_rows: + s_to_update.extend([p + (a,) for a in alphabet]) + + observation_table.update_obs_table(s_set=s_to_update) + continue + + elif cex_processing == 'longest_prefix': + cex_suffixes = longest_prefix_cex_processing(observation_table.S + list(observation_table.s_dot_a()), + cex, closedness='suffix') + else: + cex_suffixes = rs_cex_processing(sul, cex, hypothesis, e_set_suffix_closed, closedness='suffix') + + added_suffixes = extend_set(observation_table.E, cex_suffixes) + observation_table.update_obs_table(e_set=added_suffixes) + + total_time = round(time.time() - start_time, 2) + eq_query_time = round(eq_query_time, 2) + learning_time = round(total_time - eq_query_time, 2) + + info = { + 'learning_rounds': learning_rounds, + 'automaton_size': hypothesis.size, + 'queries_learning': sul.num_queries, + 'steps_learning': sul.num_steps, + 'queries_eq_oracle': eq_oracle.num_queries, + 'steps_eq_oracle': eq_oracle.num_steps, + 'learning_time': learning_time, + 'eq_oracle_time': eq_query_time, + 'total_time': total_time, + 'characterization_set': observation_table.E + } + if cache_and_non_det_check: + info['cache_saved'] = sul.num_cached_queries + + if print_level > 0: + print_learning_info(info) + + if return_data: + return hypothesis, info + + return hypothesis diff --git a/aalpy/learning_algs/vpda/VpdaObservationTable.py b/aalpy/learning_algs/vpda/VpdaObservationTable.py new file mode 100644 index 00000000..e0101fbc --- /dev/null +++ b/aalpy/learning_algs/vpda/VpdaObservationTable.py @@ -0,0 +1,219 @@ +from collections import defaultdict + +from aalpy.base import Automaton, SUL +from aalpy.automata import Dfa, DfaState, MealyState, MealyMachine, MooreMachine, MooreState + +aut_type = ['dfa', 'mealy', 'moore', 'pda'] +closing_options = ['shortest_first', 'longest_first', 'single', 'single_longest'] + + +class VpdaObservationTable: + def __init__(self, alphabet: list, sul: SUL, automaton_type, prefixes_in_cell=False): + """ + Constructor of the observation table. Initial queries are asked in the constructor. + + Args: + + alphabet: input alphabet + sul: system under learning + automaton_type: automaton type, one of ['dfa', 'mealy', 'moore'] + + Returns: + + """ + assert automaton_type in aut_type + assert alphabet is not None and sul is not None + self.automaton_type = automaton_type + + # If True add prefixes of each element of E set to a cell, else only add the output + self.prefixes_in_cell = prefixes_in_cell + + self.A = [tuple([a]) for a in alphabet] + self.S = list() # prefixes of S + # DFA's can also take whole alphabet in E, this convention follows Angluin's paper + self.E = [] if self.automaton_type != 'mealy' else [tuple([a]) for a in alphabet] + # For performance reasons, the T function maps S to a tuple where element at index i is the element of the E + # set of index i. Therefore it is important to keep E set ordered and ask membership queries only when needed + # and in correct order. It would make more sense to implement it as a defaultdict(dict) where you can access + # elements via self.T[s][e], but it causes significant performance hit. + self.T = defaultdict(tuple) + + self.sul = sul + empty_word = tuple() + self.S.append(empty_word) + + # DFAs and Moore machines use empty word for identification of accepting states/state outputs + if self.automaton_type == 'dfa' or self.automaton_type == 'moore': + self.E.insert(0, empty_word) + + def get_rows_to_close(self, closing_strategy='longest_first'): + """ + Get rows for that need to be closed. Row selection is done according to closing_strategy. + The length of the row is defined by the length of the prefix corresponding to the row in the S set. + longest_first -> get all rows that need to be closed and ask membership queries for the longest row first + shortest_first -> get all rows that need to be closed and ask membership queries for the shortest row first + single -> find and ask membership query for the single row + single_longest -> returns single longest row to close + + Args: + + closing_strategy: one of ['shortest_first', 'longest_first', 'single'] (Default value = 'longest_first') + + Returns: + + list if non-closed exist, None otherwise: rows that will be moved to S set and closed + + """ + assert closing_strategy in closing_options + rows_to_close = [] + row_values = set() + + s_rows = {self.T[s] for s in self.S} + + for t in self.s_dot_a(): + row_t = self.T[t] + if row_t not in s_rows and row_t not in row_values: + rows_to_close.append(t) + row_values.add(row_t) + + if closing_strategy == 'single': + return rows_to_close + + if not rows_to_close: + return None + + if 'longest' in closing_strategy: + rows_to_close.sort(key=len, reverse=True) + if closing_strategy == 'longest_first': + return rows_to_close + if closing_strategy == 'single_longest': + return [rows_to_close[0]] + + return rows_to_close + + def get_causes_of_inconsistency(self): + """ + If the two rows in the S set are the same, but their one letter extensions are not, this method founds + the cause of inconsistency and returns it. + :return: + + Returns: + + a+e values that are the causes of inconsistency + + """ + for i, s1 in enumerate(self.S): + for s2 in self.S[i + 1:]: + if self.T[s1] == self.T[s2]: + for a in self.A: + if self.T[s1 + a] != self.T[s2 + a]: + for index, e in enumerate(self.E): + if self.T[s1 + a][index] != self.T[s2 + a][index]: + return [(a + e)] + + return None + + def s_dot_a(self): + """ + Helper generator function that returns extended S, or S.A set. + """ + s_set = set(self.S) + for s in self.S: + for a in self.A: + if s + a not in s_set: + yield s + a + + def update_obs_table(self, s_set: list = None, e_set: list = None): + """ + Perform the membership queries. + + Args: + + s_set: Prefixes of S set on which to preform membership queries. If None, then whole S set will be used. + + e_set: Suffixes of E set on which to perform membership queries. If None, then whole E set will be used. + + Returns: + + """ + + update_S = s_set if s_set else list(self.S) + list(self.s_dot_a()) + update_E = e_set if e_set else self.E + + # This could save few queries + update_S.reverse() + + for s in update_S: + for e in update_E: + if len(self.T[s]) != len(self.E): + output = tuple(self.sul.query(s + e)) + if self.prefixes_in_cell and len(e) > 1: + obs_table_entry = tuple([output[-len(e):]],) + else: + obs_table_entry = (output[-1],) + self.T[s] += obs_table_entry + + def gen_hypothesis(self, no_cex_processing_used=False) -> Automaton: + """ + Generate automaton based on the values found in the observation table. + :return: + + Args: + + check_for_duplicate_rows: (Default value = False) + + Returns: + + Automaton of type `automaton_type` + + """ + state_distinguish = dict() + states_dict = dict() + initial_state = None + automaton_class = {'dfa': Dfa, 'mealy': MealyMachine, 'moore': MooreMachine} + + s_set = self.S + # Added check for the algorithm without counterexample processing + if no_cex_processing_used: + s_set = self._get_row_representatives() + + # create states based on S set + stateCounter = 0 + for prefix in s_set: + state_id = f's{stateCounter}' + + if self.automaton_type == 'dfa': + states_dict[prefix] = DfaState(state_id) + states_dict[prefix].is_accepting = self.T[prefix][0] + elif self.automaton_type == 'moore': + states_dict[prefix] = MooreState(state_id, output=self.T[prefix][0]) + else: + states_dict[prefix] = MealyState(state_id) + + states_dict[prefix].prefix = prefix + state_distinguish[tuple(self.T[prefix])] = states_dict[prefix] + + if not prefix: + initial_state = states_dict[prefix] + stateCounter += 1 + + # add transitions based on extended S set + for prefix in s_set: + for a in self.A: + state_in_S = state_distinguish[self.T[prefix + a]] + states_dict[prefix].transitions[a[0]] = state_in_S + if self.automaton_type == 'mealy': + states_dict[prefix].output_fun[a[0]] = self.T[prefix][self.E.index(a)] + + automaton = automaton_class[self.automaton_type](initial_state, list(states_dict.values())) + automaton.characterization_set = self.E + + return automaton + + def _get_row_representatives(self): + self.S.sort(key=len) + representatives = defaultdict(list) + for prefix in self.S: + representatives[self.T[prefix]].append(prefix) + + return [r[0] for r in representatives.values()] diff --git a/aalpy/learning_algs/vpda/__init__.py b/aalpy/learning_algs/vpda/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/aalpy/utils/BenchmarkPdaModels.py b/aalpy/utils/BenchmarkPdaModels.py new file mode 100644 index 00000000..8ed8bb6b --- /dev/null +++ b/aalpy/utils/BenchmarkPdaModels.py @@ -0,0 +1,264 @@ +from aalpy.automata.Pda import Pda + + +def pda_for_L1(): + # we always ensure that n >= 1 + state_setup = { + "q0": (False, {"a": [("q1", 'push', None)], "b": [(Pda.error_state.state_id, None, None)]}), + "q1": (False, {"a": [("q1", 'push', None)], "b": [("q2", 'pop', "a")]}), + "q2": (True, {"a": [(Pda.error_state.state_id, None, None)], "b": [("q2", 'pop', "a")]}), + } + pda = Pda.from_state_setup(state_setup, "q0") + return pda + + +def pda_for_L2(): + state_setup = { + "q0": (False, {"a": [("q1", 'push', None)], "b": [("q1", 'push', None)], + "c": [(Pda.error_state.state_id, None, None)], + "d": [(Pda.error_state.state_id, None, None)]}), + "q1": (False, {"a": [("q1", 'push', None)], "b": [("q1", 'push', None)], + "c": [("q2", 'pop', "a"), ("q2", 'pop', "b")], + "d": [("q2", 'pop', "a"), ("q2", 'pop', "b")]}), + "q2": (True, {"a": [(Pda.error_state.state_id, None, None)], + "b": [(Pda.error_state.state_id, None, None)], + "c": [("q2", 'pop', "a"), ("q2", 'pop', "b")], + "d": [("q2", 'pop', "a"), ("q2", 'pop', "b")]}), + } + pda = Pda.from_state_setup(state_setup, "q0") + return pda + + +def pda_for_L3(): + state_setup = { + "q0": (False, {"a": [("q0a", 'push', None)], + "c": [("q0c", 'push', None)], + }), + "q0a": (False, {"b": [("q1", 'push', None)]}), + "q0c": (False, {"d": [("q1", 'push', None)]}), + "q1": (False, {"a": [("q1a", 'push', None)], + "c": [("q1c", 'push', None)], + "e": [("q2e", 'pop', "b"), ("q2e", 'pop', "d")], + "g": [("q2g", 'pop', "b"), ("q2g", 'pop', "d")], # stack should actually be redundant + }), + "q1a": (False, {"b": [("q1", 'push', None)]}), + "q1c": (False, {"d": [("q1", 'push', None)]}), + "q2e": (False, {"f": [("q2", 'pop', "a"), ("q2", 'pop', "c")]}), + "q2g": (False, {"h": [("q2", 'pop', "a"), ("q2", 'pop', "c")]}), + "q2": (True, {"e": [("q2e", 'pop', "b"), ("q2e", 'pop', "d")], + "g": [("q2g", 'pop', "b"), ("q2g", 'pop', "d")]}) + } + pda = Pda.from_state_setup(state_setup, "q0") + return pda + + +def pda_for_L4(): + state_setup = { + "q0": (False, {"a": [("q01", 'push', None)], "b": [(Pda.error_state.state_id, None, None)]}), + "q01": (False, {"b": [("q1", 'push', None)], "a": [(Pda.error_state.state_id, None, None)]}), + + "q1": (False, {"a": [("q11", 'push', None)], "b": [(Pda.error_state.state_id, None, None)], + "c": [("q21", 'pop', "b")]}), + "q11": (False, {"b": [("q1", 'push', None)], "a": [(Pda.error_state.state_id, None, None)]}), + "q21": (False, {"d": [("q2", 'pop', "a")]}), + "q2": (True, {"c": [("q21", 'pop', "b")]}), + } + pda = Pda.from_state_setup(state_setup, "q0") + return pda + + +def pda_for_L5(): + state_setup = { + "q0": (False, {"a": [("q01", 'push', None)]}), + "q01": (False, {"b": [("q02", 'push', None)]}), + "q02": (False, {"c": [("q1", 'push', None)]}), + "q1": (False, {"a": [("q11", 'push', None)], + "d": [("q21", 'pop', "c")]}), + "q11": (False, {"b": [("q12", 'push', None)]}), + "q12": (False, {"c": [("q1", 'push', None)]}), + "q21": (False, {"e": [("q22", 'pop', "b")]}), + "q22": (False, {"f": [("q2", 'pop', "a")]}), + "q2": (True, {"d": [("q21", 'pop', "c")]}), + } + pda = Pda.from_state_setup(state_setup, "q0") + return pda + + +def pda_for_L7(): + # Dyck order 2 + state_setup = { + "q0": (False, {"(": [("q1", 'push', None)], + "[": [("q1", 'push', None)], # exclude empty seq + }), + "q1": (True, {"(": [("q1", 'push', None)], + "[": [("q1", 'push', None)], + ")": [("q1", 'pop', "(")], + "]": [("q1", 'pop', "[")] + }), + } + pda = Pda.from_state_setup(state_setup, "q0") + return pda + + +def pda_for_L8(): + # Dyck order 3 + state_setup = { + "q0": (False, {"(": [("q1", 'push', None)], + "[": [("q1", 'push', None)], + "{": [("q1", 'push', None)], + }), + "q1": (True, {"(": [("q1", 'push', None)], + "[": [("q1", 'push', None)], + "{": [("q1", 'push', None)], + ")": [("q1", 'pop', "(")], + "]": [("q1", 'pop', "[")], + "}": [("q1", 'pop', "{")], + }), + } + pda = Pda.from_state_setup(state_setup, "q0") + return pda + + +def pda_for_L9(): + # Dyck order 4 + state_setup = { + "q0": (False, {"(": [("q1", 'push', None)], + "[": [("q1", 'push', None)], + "{": [("q1", 'push', None)], + "<": [("q1", 'push', None)], + }), + "q1": (True, {"(": [("q1", 'push', None)], + "[": [("q1", 'push', None)], + "{": [("q1", 'push', None)], + "<": [("q1", 'push', None)], + ")": [("q1", 'pop', "(")], + "]": [("q1", 'pop', "[")], + "}": [("q1", 'pop', "{")], + ">": [("q1", 'pop', "{")], + }), + } + pda = Pda.from_state_setup(state_setup, "q0") + return pda + + +def pda_for_L10(): + # RE Dyck order 1 + state_setup = { + "q0": (False, {"a": [("qa", 'push', None)], + }), + "qa": (False, {"b": [("qb", None, None)], + }), + "qb": (False, {"c": [("qc", None, None)], + }), + "qc": (False, {"d": [("qd", None, None)], + }), + "qd": (False, {"e": [("q1", None, None)], + }), + "q1": (True, {"a": [("qa", 'push', None)], + "v": [("qv", 'pop', "a")]}), + "qv": (False, {"w": [("qw", None, None)]}), + "qw": (False, {"x": [("qx", None, None)]}), + "qx": (False, {"y": [("qy", None, None)]}), + "qy": (False, {"z": [("q1", None, None)]}) + } + pda = Pda.from_state_setup(state_setup, "q0") + return pda + + +def pda_for_L11(): + # RE Dyck order 1 + state_setup = { + "q0": (False, {"a": [("qa", 'push', None)], + "c": [("q1", 'push', None)], + }), + "qa": (False, {"b": [("q1", None, None)], + }), + "q1": (True, {"a": [("qa", 'push', None)], + "c": [("q1", 'push', None)], + "d": [("qd", 'pop', "a"), ("qd", 'pop', "c")], + "f": [("q1", 'pop', "a"), ("q1", 'pop', "c")]}), + "qd": (False, {"e": [("q1", None, None)]}) + } + pda = Pda.from_state_setup(state_setup, "q0") + return pda + + +def pda_for_L12(): + # Dyck order 2 (single-nested) + state_setup = { + "q0": (False, {"(": [("q1", 'push', None)], + "[": [("q1", 'push', None)], # exclude empty seq + }), + "q1": (False, {"(": [("q1", 'push', None)], + "[": [("q1", 'push', None)], + ")": [("q2", 'pop', "(")], + "]": [("q2", 'pop', "[")]}), + "q2": (True, { + ")": [("q2", 'pop', "(")], + "]": [("q2", 'pop', "[")] + }), + } + pda = Pda.from_state_setup(state_setup, "q0") + return pda + + +def pda_for_L13(): + # Dyck order 1 + state_setup = { + "q0": (False, {"(": [("q1", 'push', None)], + "a": [("q1", None, None)], + "b": [("q1", None, None)], + "c": [("q1", None, None)], # exclude empty seq + }), + "q1": (True, {"(": [("q1", 'push', None)], + ")": [("q1", 'pop', "(")], + "a": [("q1", None, None)], + "b": [("q1", None, None)], + "c": [("q1", None, None)] + }), + } + pda = Pda.from_state_setup(state_setup, "q0") + return pda + + +def pda_for_L14(): + # Dyck order 2 + state_setup = { + "q0": (False, {"(": [("q1", 'push', None)], + "[": [("q1", 'push', None)], + "a": [("q1", None, None)], + "b": [("q1", None, None)], + "c": [("q1", None, None)], # exclude empty seq + }), + "q1": (True, {"(": [("q1", 'push', None)], + "[": [("q1", 'push', None)], + ")": [("q1", 'pop', "(")], + "]": [("q1", 'pop', "[")], + "a": [("q1", None, None)], + "b": [("q1", None, None)], + "c": [("q1", None, None)] + }), + } + pda = Pda.from_state_setup(state_setup, "q0") + return pda + + +def pda_for_L15(): + # Dyck order 1 + state_setup = { + "q0": (False, {"(": [("q1", 'push', None)], + "a": [("qa", None, None)], + "d": [("q1", None, None)], # exclude empty seq + }), + "q1": (True, {"(": [("q1", 'push', None)], + ")": [("q1", 'pop', "(")], + "a": [("qa", None, None)], + "d": [("q1", None, None)], + }), + "qa": (False, {"b": [("qb", None, None)], + }), + "qb": (False, {"c": [("q1", None, None)], + }) + } + pda = Pda.from_state_setup(state_setup, "q0") + return pda diff --git a/aalpy/utils/BenchmarkSULs.py b/aalpy/utils/BenchmarkSULs.py index bb4bd36d..50f918b0 100644 --- a/aalpy/utils/BenchmarkSULs.py +++ b/aalpy/utils/BenchmarkSULs.py @@ -1,5 +1,5 @@ def get_Angluin_dfa(): - from aalpy.utils.AutomatonGenerators import dfa_from_state_setup + from aalpy.utils import dfa_from_state_setup anguin_dfa = { 'q0': (True, {'a': 'q1', 'b': 'q2'}), diff --git a/aalpy/utils/FileHandler.py b/aalpy/utils/FileHandler.py index 95898f46..7b2760b4 100644 --- a/aalpy/utils/FileHandler.py +++ b/aalpy/utils/FileHandler.py @@ -6,11 +6,11 @@ from pydot import Dot, Node, Edge, graph_from_dot_file from aalpy.automata import Dfa, MooreMachine, Mdp, Onfsm, MealyState, DfaState, MooreState, MealyMachine, \ - MdpState, StochasticMealyMachine, StochasticMealyState, OnfsmState, MarkovChain, McState + MdpState, StochasticMealyMachine, StochasticMealyState, OnfsmState, MarkovChain, McState, Pda file_types = ['dot', 'png', 'svg', 'pdf', 'string'] automaton_types = {Dfa: 'dfa', MealyMachine: 'mealy', MooreMachine: 'moore', Mdp: 'mdp', - StochasticMealyMachine: 'smm', Onfsm: 'onfsm', MarkovChain: 'mc'} + StochasticMealyMachine: 'smm', Onfsm: 'onfsm', MarkovChain: 'mc', Pda: 'pda'} def _wrap_label(label): @@ -30,7 +30,8 @@ def _get_node(state, automaton_type): if automaton_type == 'mealy': return Node(state.state_id, label=_wrap_label(state.state_id)) if automaton_type == 'moore': - return Node(state.state_id, label=_wrap_label(f'{state.state_id}|{state.output}'), shape='record', style='rounded') + return Node(state.state_id, label=_wrap_label(f'{state.state_id}|{state.output}'), shape='record', + style='rounded') if automaton_type == 'onfsm': return Node(state.state_id, label=_wrap_label(state.state_id)) if automaton_type == 'mc': @@ -39,6 +40,10 @@ def _get_node(state, automaton_type): return Node(state.state_id, label=_wrap_label(f'{state.output}')) if automaton_type == 'smm': return Node(state.state_id, label=_wrap_label(state.state_id)) + if automaton_type == 'pda': + if state.is_accepting: + return Node(state.state_id, label=_wrap_label(state.state_id), shape='doublecircle') + return Node(state.state_id, label=_wrap_label(state.state_id)) def _add_transition_to_graph(graph, state, automaton_type, display_same_state_trans, round_floats): @@ -81,6 +86,21 @@ def _add_transition_to_graph(graph, state, automaton_type, display_same_state_tr continue prob = round(s[2], round_floats) if round_floats else s[2] graph.add_edge(Edge(state.state_id, s[0].state_id, label=_wrap_label(f'{i}/{s[1]}:{prob}'))) + if automaton_type == 'pda': + for i in state.transitions.keys(): + transitions_list = state.transitions[i] + for transition in transitions_list: + if transition.action is None: + graph.add_edge(Edge(transition.start.state_id, transition.target.state_id, + label=_wrap_label(f'{transition.symbol}'))) + if transition.action == 'push': + graph.add_edge(Edge(transition.start.state_id, transition.target.state_id, + label=_wrap_label(f'{transition.symbol}/push(\'{transition.symbol}\')'))) + if transition.action == 'pop': + graph.add_edge(Edge(transition.start.state_id, transition.target.state_id, + label=_wrap_label(f'{transition.symbol}/pop(\'{transition.stack_guard}\')'))) + + def visualize_automaton(automaton, path="LearnedModel", file_type="pdf", display_same_state_trans=True): @@ -146,6 +166,8 @@ def save_automaton_to_file(automaton, path="LearnedModel", file_type="dot", graph = Dot(path.stem, graph_type='digraph') for state in automaton.states: + if automaton_type == 'pda' and state.state_id == 'ErrorSinkState': + continue graph.add_node(_get_node(state, automaton_type)) for state in automaton.states: diff --git a/aalpy/utils/__init__.py b/aalpy/utils/__init__.py index d06fa003..53cb7b67 100644 --- a/aalpy/utils/__init__.py +++ b/aalpy/utils/__init__.py @@ -2,6 +2,7 @@ generate_random_moore_machine, generate_random_markov_chain, generate_random_deterministic_automata from .AutomatonGenerators import generate_random_mdp, generate_random_ONFSM from .BenchmarkSULs import * +from .BenchmarkPdaModels import * from .DataHandler import DataHandler, CharacterTokenizer, DelimiterTokenizer, IODelimiterTokenizer from .FileHandler import save_automaton_to_file, load_automaton_from_file, visualize_automaton from .ModelChecking import model_check_experiment, mdp_2_prism_format, model_check_properties, get_properties_file, \ diff --git a/pda_main_experiments.py b/pda_main_experiments.py new file mode 100644 index 00000000..da8f83bd --- /dev/null +++ b/pda_main_experiments.py @@ -0,0 +1,16 @@ +from aalpy.SULs.AutomataSUL import PdaSUL +from aalpy.learning_algs import run_vpda_Lstar +from aalpy.oracles import RandomWMethodEqOracle +from aalpy.utils.BenchmarkPdaModels import pda_for_L12 + +pda = pda_for_L12() + +# pda.visualize() + +input_alphabet = pda.get_input_alphabet() +sul = PdaSUL(pda, include_top=True) + +eq_oracle = RandomWMethodEqOracle(alphabet=input_alphabet, sul=sul, walks_per_state=100, walk_len=10) +model = run_vpda_Lstar(alphabet=input_alphabet, sul=sul, eq_oracle=eq_oracle, automaton_type="dfa", print_level=3, + max_learning_rounds=3) +model.visualize() From 264cad8204633cd1fb1fffd2afb50c0631d97b3f Mon Sep 17 00:00:00 2001 From: Edi Muskardin Date: Tue, 26 Sep 2023 14:04:25 +0200 Subject: [PATCH 02/62] add initial setup for PDAs --- pda_main_experiments.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/pda_main_experiments.py b/pda_main_experiments.py index da8f83bd..bb7a4211 100644 --- a/pda_main_experiments.py +++ b/pda_main_experiments.py @@ -1,11 +1,11 @@ from aalpy.SULs.AutomataSUL import PdaSUL from aalpy.learning_algs import run_vpda_Lstar from aalpy.oracles import RandomWMethodEqOracle -from aalpy.utils.BenchmarkPdaModels import pda_for_L12 +from aalpy.utils.BenchmarkPdaModels import * pda = pda_for_L12() -# pda.visualize() +pda.visualize() input_alphabet = pda.get_input_alphabet() sul = PdaSUL(pda, include_top=True) @@ -13,4 +13,3 @@ eq_oracle = RandomWMethodEqOracle(alphabet=input_alphabet, sul=sul, walks_per_state=100, walk_len=10) model = run_vpda_Lstar(alphabet=input_alphabet, sul=sul, eq_oracle=eq_oracle, automaton_type="dfa", print_level=3, max_learning_rounds=3) -model.visualize() From 11d07c878974e96856d97532ffb7e96d15d2562c Mon Sep 17 00:00:00 2001 From: Edi Muskardin Date: Tue, 26 Sep 2023 17:48:46 +0200 Subject: [PATCH 03/62] fix pda step --- aalpy/SULs/AutomataSUL.py | 5 +- aalpy/automata/Pda.py | 148 ++++++++++++++++++++------------------ pda_main_experiments.py | 16 +++-- 3 files changed, 93 insertions(+), 76 deletions(-) diff --git a/aalpy/SULs/AutomataSUL.py b/aalpy/SULs/AutomataSUL.py index c0c0be72..22ac8c3a 100644 --- a/aalpy/SULs/AutomataSUL.py +++ b/aalpy/SULs/AutomataSUL.py @@ -167,10 +167,11 @@ def step(self, letter): class PdaSUL(SUL): - def __init__(self, pda: Pda, include_top=True): + def __init__(self, pda: Pda, include_top=True, check_balance=True): super().__init__() self.pda = pda self.include_top = include_top + self.check_balance = check_balance def pre(self): self.pda.reset_to_initial() @@ -182,5 +183,7 @@ def step(self, letter): output = self.pda.step(letter) top = self.pda.top() if self.include_top: + if self.check_balance and self.pda.call_balance < 0: + return output, '-' return output, top return output diff --git a/aalpy/automata/Pda.py b/aalpy/automata/Pda.py index 8d716f31..60b29100 100644 --- a/aalpy/automata/Pda.py +++ b/aalpy/automata/Pda.py @@ -32,6 +32,7 @@ def __init__(self, initial_state: PdaState, states): self.initial_state = initial_state self.states = states self.current_state = None + self.call_balance = 0 self.stack = [] def reset_to_initial(self): @@ -41,6 +42,7 @@ def reset_to_initial(self): def reset(self): self.current_state = self.initial_state self.stack = [self.empty] + self.call_balance = 0 return self.current_state.is_accepting and self.top() == self.empty def top(self): @@ -63,7 +65,10 @@ def possible(self, letter): return False def step(self, letter): - if self.current_state == Pda.error_state or not self.possible(letter): + if self.current_state == Pda.error_state: + return False + if not self.possible(letter): + self.current_state = Pda.error_state return False if letter is not None: transitions = self.current_state.transitions[letter] @@ -143,73 +148,74 @@ def from_state_setup(state_setup: dict, init_state_id): return pda -# def generate_data_from_pda(automaton, num_examples, lens=None, classify_states=False, stack_limit=None, -# break_on_impossible=False, possible_prob=0.75): -# input_al = automaton.get_input_alphabet() -# output_al = [False, True] -# if classify_states: -# output_al = [s.state_id for s in automaton.states] -# -# if lens is None: -# lens = list(range(1, 15)) -# -# sum_lens = sum(lens) -# # key is length, value is number of examples for said length -# ex_per_len = dict() -# -# additional_seq = 0 -# for l in lens: -# ex_per_len[l] = int(num_examples * (l / sum_lens)) + 1 -# if ex_per_len[l] > pow(len(input_al), l): -# additional_seq += ex_per_len[l] - pow(len(input_al), l) -# ex_per_len[l] = 'comb' -# -# additional_seq = additional_seq // len([i for i in ex_per_len.values() if i != 'comb']) -# -# training_data = [] -# for l in ex_per_len.keys(): -# seqs = [] -# if ex_per_len[l] == 'comb': -# seqs = list(product(input_al, repeat=l)) -# for seq in seqs: -# -# out = automaton.reset() -# nr_steps = 0 -# for inp in seq: -# if automaton.possible(inp) or not break_on_impossible: -# nr_steps += 1 -# if stack_limit and len(automaton.stack) > stack_limit: -# break -# if break_on_impossible and not automaton.possible(inp): -# break -# out = automaton.step(inp) -# seq = seq[:nr_steps] -# training_data.append((tuple(seq), out if not classify_states else automaton.current_state.state_id)) -# -# else: -# for _ in range(ex_per_len[l] + additional_seq): -# # seq = [random.choice(input_al) for _ in range(l)] -# out = automaton.reset() -# nr_steps = 0 -# seq = [] -# for i in range(l): -# possible_inp = [inp for inp in input_al if automaton.possible(inp)] -# if len(possible_inp) == 0: -# inp = random.choice(input_al) -# else: -# if random.random() <= possible_prob: -# inp = random.choice(possible_inp) -# else: -# inp = random.choice(input_al) -# seq.append(inp) -# if automaton.possible(inp) or not break_on_impossible: -# nr_steps += 1 -# if stack_limit and len(automaton.stack) > stack_limit: -# break -# if break_on_impossible and not automaton.possible(inp): -# break -# out = automaton.step(inp) -# seq = seq[:nr_steps] -# training_data.append((tuple(seq), out if not classify_states else automaton.current_state.state_id)) -# -# return training_data, input_al, output_al +def generate_data_from_pda(automaton, num_examples, lens=None, classify_states=False, stack_limit=None, + break_on_impossible=False, possible_prob=0.75): + import random + from itertools import product + + input_al = automaton.get_input_alphabet() + + if lens is None: + lens = list(range(1, 15)) + + sum_lens = sum(lens) + # key is length, value is number of examples for said length + ex_per_len = dict() + + additional_seq = 0 + for l in lens: + ex_per_len[l] = int(num_examples * (l / sum_lens)) + 1 + if ex_per_len[l] > pow(len(input_al), l): + additional_seq += ex_per_len[l] - pow(len(input_al), l) + ex_per_len[l] = 'comb' + + additional_seq = additional_seq // len([i for i in ex_per_len.values() if i != 'comb']) + + training_data = [] + for l in ex_per_len.keys(): + seqs = [] + if ex_per_len[l] == 'comb': + + seqs = list(product(input_al, repeat=l)) + for seq in seqs: + + out = automaton.reset() + nr_steps = 0 + for inp in seq: + if automaton.possible(inp) or not break_on_impossible: + nr_steps += 1 + if stack_limit and len(automaton.stack) > stack_limit: + break + if break_on_impossible and not automaton.possible(inp): + break + out = automaton.step(inp) + seq = seq[:nr_steps] + training_data.append((tuple(seq), out if not classify_states else automaton.current_state.state_id)) + + else: + for _ in range(ex_per_len[l] + additional_seq): + # seq = [random.choice(input_al) for _ in range(l)] + out = automaton.reset() + nr_steps = 0 + seq = [] + for i in range(l): + possible_inp = [inp for inp in input_al if automaton.possible(inp)] + if len(possible_inp) == 0: + inp = random.choice(input_al) + else: + if random.random() <= possible_prob: + inp = random.choice(possible_inp) + else: + inp = random.choice(input_al) + seq.append(inp) + if automaton.possible(inp) or not break_on_impossible: + nr_steps += 1 + if stack_limit and len(automaton.stack) > stack_limit: + break + if break_on_impossible and not automaton.possible(inp): + break + out = automaton.step(inp) + seq = seq[:nr_steps] + training_data.append((tuple(seq), out)) + + return training_data diff --git a/pda_main_experiments.py b/pda_main_experiments.py index bb7a4211..f2b62c39 100644 --- a/pda_main_experiments.py +++ b/pda_main_experiments.py @@ -1,15 +1,23 @@ from aalpy.SULs.AutomataSUL import PdaSUL +from aalpy.automata.Pda import generate_data_from_pda from aalpy.learning_algs import run_vpda_Lstar from aalpy.oracles import RandomWMethodEqOracle from aalpy.utils.BenchmarkPdaModels import * -pda = pda_for_L12() -pda.visualize() +pda = pda_for_L12() input_alphabet = pda.get_input_alphabet() -sul = PdaSUL(pda, include_top=True) +sul = PdaSUL(pda, include_top=True, check_balance=True) + +pda_sequances = generate_data_from_pda(pda, 10000) +accepting_seq, rejecting_seq = [x[0] for x in pda_sequances if x[1]], [x[0] for x in pda_sequances if not x[1]] +accepting_seq.sort(key=len) +print('Positive') +for i in range(10): + print(accepting_seq[i]) +exit() eq_oracle = RandomWMethodEqOracle(alphabet=input_alphabet, sul=sul, walks_per_state=100, walk_len=10) model = run_vpda_Lstar(alphabet=input_alphabet, sul=sul, eq_oracle=eq_oracle, automaton_type="dfa", print_level=3, - max_learning_rounds=3) + max_learning_rounds=1) From 1c7945aab0151134c231dff21dd45bcae5e34a95 Mon Sep 17 00:00:00 2001 From: Moritz Pistauer Date: Sat, 30 Sep 2023 17:29:55 +0200 Subject: [PATCH 04/62] Adaptions for VPA --- aalpy/SULs/AutomataSUL.py | 25 +- aalpy/automata/Vpa.py | 248 ++++++++++++++++++ aalpy/automata/__init__.py | 3 +- aalpy/learning_algs/vpda/VpdaLStar.py | 9 +- .../vpda/VpdaObservationTable.py | 96 +++++-- aalpy/utils/BenchmarkVpaModels.py | 41 +++ aalpy/utils/FileHandler.py | 27 +- pda_main_experiments.py | 3 +- vpa_main_experiments.py | 31 +++ 9 files changed, 455 insertions(+), 28 deletions(-) create mode 100644 aalpy/automata/Vpa.py create mode 100644 aalpy/utils/BenchmarkVpaModels.py create mode 100644 vpa_main_experiments.py diff --git a/aalpy/SULs/AutomataSUL.py b/aalpy/SULs/AutomataSUL.py index 22ac8c3a..1cf07e15 100644 --- a/aalpy/SULs/AutomataSUL.py +++ b/aalpy/SULs/AutomataSUL.py @@ -1,5 +1,5 @@ from aalpy.base import SUL -from aalpy.automata import Dfa, MealyMachine, MooreMachine, Onfsm, Mdp, StochasticMealyMachine, MarkovChain, Pda +from aalpy.automata import Dfa, MealyMachine, MooreMachine, Onfsm, Mdp, StochasticMealyMachine, MarkovChain, Pda, Vpa class DfaSUL(SUL): @@ -187,3 +187,26 @@ def step(self, letter): return output, '-' return output, top return output + + +class VpaSUL(SUL): + def __init__(self, vpa: Vpa, include_top=True, check_balance=True): + super().__init__() + self.vpa = vpa + self.include_top = include_top + self.check_balance = check_balance + + def pre(self): + self.vpa.reset_to_initial() + + def post(self): + pass + + def step(self, letter): + output = self.vpa.step(letter) + top = self.vpa.top() + if self.include_top: + if self.check_balance and self.vpa.call_balance < 0: + return output, '-' + return output, top + return output diff --git a/aalpy/automata/Vpa.py b/aalpy/automata/Vpa.py new file mode 100644 index 00000000..ad7fdb92 --- /dev/null +++ b/aalpy/automata/Vpa.py @@ -0,0 +1,248 @@ +from collections import defaultdict + +from aalpy.base import Automaton, AutomatonState + + +class VpaState(AutomatonState): + """ + Single state of a deterministic finite automaton. + """ + + def __init__(self, state_id, is_accepting=False): + super().__init__(state_id) + self.transitions = defaultdict(list) + self.is_accepting = is_accepting + + +class VpaTransition: + def __init__(self, start: VpaState, target: VpaState, symbol, action, stack_guard=None): + self.start = start + self.target = target + self.symbol = symbol + self.action = action + self.stack_guard = stack_guard + + def __str__(self): + return f"{self.symbol}: {self.start.state_id} --> {self.target.state_id} | {self.action}: {self.stack_guard}" + + +class Vpa(Automaton): + empty = "_" + error_state = VpaState("ErrorSinkState", False) + + def __init__(self, initial_state: VpaState, states, call_set, return_set, internal_set): + super().__init__(initial_state, states) + self.initial_state = initial_state + self.states = states + self.call_set = call_set + self.return_set = return_set + self.internal_set = internal_set + self.current_state = None + self.call_balance = 0 + self.stack = [] + + def reset_to_initial(self): + super().reset_to_initial() + self.reset() + + def reset(self): + self.current_state = self.initial_state + self.stack = [self.empty] + self.call_balance = 0 + return self.current_state.is_accepting and self.top() == self.empty + + def top(self): + return self.stack[-1] + + def pop(self): + return self.stack.pop() + + def possible(self, letter): + """ + Checks if a certain step on the automaton is possible + + TODO: Adaptation for Stack content ? + """ + if self.current_state == Vpa.error_state: + return True + if letter is not None: + transitions = self.current_state.transitions[letter] + trans = [t for t in transitions if t.stack_guard is None or self.top() == t.stack_guard] + assert len(trans) < 2 + if len(trans) == 0: + return False + else: + return True + return False + + def step(self, letter): + if self.current_state == Vpa.error_state: + return False + if not self.possible(letter): + self.current_state = Vpa.error_state + return False + if letter is not None: + transitions = self.current_state.transitions[letter] + trans = [t for t in transitions if t.stack_guard is None or self.top() == t.stack_guard][0] + self.current_state = trans.target + if trans.action == 'push': + assert(letter in self.call_set) # push letters must be in call set + self.stack.append(letter) + elif trans.action == 'pop': + assert(letter in self.return_set) # pop letters must be in return set + if len(self.stack) <= 1: # empty stack elem should always be there + self.current_state = Vpa.error_state + return False + self.stack.pop() + + return self.current_state.is_accepting and self.top() == self.empty + + # def compute_output_seq(self, state, sequence): + # if not sequence: + # return [state.is_accepting] + # return super(Dfa, self).compute_output_seq(state, sequence) + + def get_input_alphabet(self) -> list: + alphabet_list = list() + alphabet_list.append(self.call_set) + alphabet_list.append(self.return_set) + alphabet_list.append(self.internal_set) + return alphabet_list + + def get_input_alphabet_merged(self) -> list: + alphabet = list() + alphabet.extend(self.call_set) + alphabet.extend(self.return_set) + alphabet.extend(self.internal_set) + return alphabet + + def to_state_setup(self): + state_setup_dict = {} + + # ensure prefixes are computed + # self.compute_prefixes() + + sorted_states = sorted(self.states, key=lambda x: len(x.prefix)) + for s in sorted_states: + state_setup_dict[s.state_id] = ( + s.is_accepting, {k: (v.target.state_id, v.action) for k, v in s.transitions.items()}) + + return state_setup_dict + + @staticmethod + def from_state_setup(state_setup: dict, init_state_id, call_set, return_set, internal_set): + """ + First state in the state setup is the initial state. + Example state setup: + state_setup = { + "a": (True, {"x": ("b1",PUSH), "y": ("a", NONE)}), + "b1": (False, {"x": ("b2", PUSH), "y": "a"}), + "b2": (True, {"x": "b3", "y": "a"}), + "b3": (False, {"x": "b4", "y": "a"}), + "b4": (False, {"x": "c", "y": "a"}), + "c": (True, {"x": "a", "y": "a"}), + } + + Args: + + state_setup: map from state_id to tuple(output and transitions_dict) + + Returns: + + PDA + """ + # state_setup should map from state_id to tuple(is_accepting and transitions_dict) + + # build states with state_id and output + states = {key: VpaState(key, val[0]) for key, val in state_setup.items()} + states[Vpa.error_state.state_id] = Vpa.error_state # PdaState(Pda.error_state,False) + # add transitions to states + for state_id, state in states.items(): + if state_id == Vpa.error_state.state_id: + continue + for _input, trans_spec in state_setup[state_id][1].items(): + for (target_state_id, action, stack_guard) in trans_spec: + # action = Action[action_string] + trans = VpaTransition(start=state, target=states[target_state_id], symbol=_input, action=action, + stack_guard=stack_guard) + state.transitions[_input].append(trans) + + init_state = states[init_state_id] + # states to list + states = [state for state in states.values()] + + pda = Vpa(init_state, states, call_set, return_set, internal_set) + return pda + + +def generate_data_from_pda(automaton, num_examples, lens=None, classify_states=False, stack_limit=None, + break_on_impossible=False, possible_prob=0.75): + import random + from itertools import product + + input_al = automaton.get_input_alphabet() + + if lens is None: + lens = list(range(1, 15)) + + sum_lens = sum(lens) + # key is length, value is number of examples for said length + ex_per_len = dict() + + additional_seq = 0 + for l in lens: + ex_per_len[l] = int(num_examples * (l / sum_lens)) + 1 + if ex_per_len[l] > pow(len(input_al), l): + additional_seq += ex_per_len[l] - pow(len(input_al), l) + ex_per_len[l] = 'comb' + + additional_seq = additional_seq // len([i for i in ex_per_len.values() if i != 'comb']) + + training_data = [] + for l in ex_per_len.keys(): + seqs = [] + if ex_per_len[l] == 'comb': + + seqs = list(product(input_al, repeat=l)) + for seq in seqs: + + out = automaton.reset() + nr_steps = 0 + for inp in seq: + if automaton.possible(inp) or not break_on_impossible: + nr_steps += 1 + if stack_limit and len(automaton.stack) > stack_limit: + break + if break_on_impossible and not automaton.possible(inp): + break + out = automaton.step(inp) + seq = seq[:nr_steps] + training_data.append((tuple(seq), out if not classify_states else automaton.current_state.state_id)) + + else: + for _ in range(ex_per_len[l] + additional_seq): + # seq = [random.choice(input_al) for _ in range(l)] + out = automaton.reset() + nr_steps = 0 + seq = [] + for i in range(l): + possible_inp = [inp for inp in input_al if automaton.possible(inp)] + if len(possible_inp) == 0: + inp = random.choice(input_al) + else: + if random.random() <= possible_prob: + inp = random.choice(possible_inp) + else: + inp = random.choice(input_al) + seq.append(inp) + if automaton.possible(inp) or not break_on_impossible: + nr_steps += 1 + if stack_limit and len(automaton.stack) > stack_limit: + break + if break_on_impossible and not automaton.possible(inp): + break + out = automaton.step(inp) + seq = seq[:nr_steps] + training_data.append((tuple(seq), out)) + + return training_data diff --git a/aalpy/automata/__init__.py b/aalpy/automata/__init__.py index ed053744..0471a298 100644 --- a/aalpy/automata/__init__.py +++ b/aalpy/automata/__init__.py @@ -5,4 +5,5 @@ from .Onfsm import Onfsm, OnfsmState from .StochasticMealyMachine import StochasticMealyMachine, StochasticMealyState from .MarkovChain import MarkovChain, McState -from .Pda import Pda \ No newline at end of file +from .Pda import Pda +from .Vpa import Vpa, VpaState diff --git a/aalpy/learning_algs/vpda/VpdaLStar.py b/aalpy/learning_algs/vpda/VpdaLStar.py index 951c9d5c..ebe95e2c 100644 --- a/aalpy/learning_algs/vpda/VpdaLStar.py +++ b/aalpy/learning_algs/vpda/VpdaLStar.py @@ -64,6 +64,11 @@ def run_vpda_Lstar(alphabet: list, sul: SUL, eq_oracle: Oracle, automaton_type, assert cex_processing in counterexample_processing_strategy assert print_level in print_options + merged_alphabet = list() + merged_alphabet.extend(alphabet[0]) + merged_alphabet.extend(alphabet[1]) + merged_alphabet.extend(alphabet[2]) + if cache_and_non_det_check or samples is not None: # Wrap the sul in the CacheSUL, so that all steps/queries are cached sul = CacheSUL(sul) @@ -102,7 +107,7 @@ def run_vpda_Lstar(alphabet: list, sul: SUL, eq_oracle: Oracle, automaton_type, rows_to_query = [] for row in rows_to_close: observation_table.S.append(row) - rows_to_query.extend([row + (a,) for a in alphabet]) + rows_to_query.extend([row + (a,) for a in merged_alphabet]) observation_table.update_obs_table(s_set=rows_to_query) rows_to_close = observation_table.get_rows_to_close(closing_strategy) @@ -139,7 +144,7 @@ def run_vpda_Lstar(alphabet: list, sul: SUL, eq_oracle: Oracle, automaton_type, added_rows = extend_set(observation_table.S, all_prefixes(cex)) s_to_update.extend(added_rows) for p in added_rows: - s_to_update.extend([p + (a,) for a in alphabet]) + s_to_update.extend([p + (a,) for a in merged_alphabet]) observation_table.update_obs_table(s_set=s_to_update) continue diff --git a/aalpy/learning_algs/vpda/VpdaObservationTable.py b/aalpy/learning_algs/vpda/VpdaObservationTable.py index e0101fbc..ff89b550 100644 --- a/aalpy/learning_algs/vpda/VpdaObservationTable.py +++ b/aalpy/learning_algs/vpda/VpdaObservationTable.py @@ -1,9 +1,10 @@ from collections import defaultdict +from aalpy.automata.Vpa import VpaTransition from aalpy.base import Automaton, SUL -from aalpy.automata import Dfa, DfaState, MealyState, MealyMachine, MooreMachine, MooreState +from aalpy.automata import Vpa, VpaState -aut_type = ['dfa', 'mealy', 'moore', 'pda'] +aut_type = ['pda', 'vpa'] closing_options = ['shortest_first', 'longest_first', 'single', 'single_longest'] @@ -21,17 +22,31 @@ def __init__(self, alphabet: list, sul: SUL, automaton_type, prefixes_in_cell=Fa Returns: """ + assert automaton_type in aut_type assert alphabet is not None and sul is not None self.automaton_type = automaton_type + if self.automaton_type == 'vpa': + self.call_set = alphabet[0] + self.return_set = alphabet[1] + self.internal_set = alphabet[2] + self.merged_alphabet = list() + self.merged_alphabet.extend(alphabet[0]) + self.merged_alphabet.extend(alphabet[1]) + self.merged_alphabet.extend(alphabet[2]) + # If True add prefixes of each element of E set to a cell, else only add the output self.prefixes_in_cell = prefixes_in_cell - self.A = [tuple([a]) for a in alphabet] + if automaton_type == 'vpa': + self.A = [tuple(a) for a in self.merged_alphabet] + else: + self.A = [tuple([a]) for a in alphabet] + self.S = list() # prefixes of S # DFA's can also take whole alphabet in E, this convention follows Angluin's paper - self.E = [] if self.automaton_type != 'mealy' else [tuple([a]) for a in alphabet] + self.E = [] # For performance reasons, the T function maps S to a tuple where element at index i is the element of the E # set of index i. Therefore it is important to keep E set ordered and ask membership queries only when needed # and in correct order. It would make more sense to implement it as a defaultdict(dict) where you can access @@ -43,8 +58,7 @@ def __init__(self, alphabet: list, sul: SUL, automaton_type, prefixes_in_cell=Fa self.S.append(empty_word) # DFAs and Moore machines use empty word for identification of accepting states/state outputs - if self.automaton_type == 'dfa' or self.automaton_type == 'moore': - self.E.insert(0, empty_word) + self.E.insert(0, empty_word) def get_rows_to_close(self, closing_strategy='longest_first'): """ @@ -147,12 +161,37 @@ def update_obs_table(self, s_set: list = None, e_set: list = None): for e in update_E: if len(self.T[s]) != len(self.E): output = tuple(self.sul.query(s + e)) + # print(f'Output ({s} + {e}): {output}') if self.prefixes_in_cell and len(e) > 1: obs_table_entry = tuple([output[-len(e):]],) else: obs_table_entry = (output[-1],) self.T[s] += obs_table_entry + def get_action_type(self, letter) -> str: + if letter in self.call_set: + return 'push' + elif letter in self.return_set: + return 'pop' + elif letter in self.internal_set: + return '' + else: + assert False + + def get_stack_guard(self, prefix, letter, action): + """ + + TODO: Finish this + + """ + out = self.sul.query(prefix + letter) + # if action == 'push': + # print(f'Push {out}') + # elif action == 'pop': + # out_pre = self.sul.query(prefix) + # print(f'Out Pre: {out_pre} + Out Now: {out}') + return '?' + def gen_hypothesis(self, no_cex_processing_used=False) -> Automaton: """ Generate automaton based on the values found in the observation table. @@ -170,7 +209,7 @@ def gen_hypothesis(self, no_cex_processing_used=False) -> Automaton: state_distinguish = dict() states_dict = dict() initial_state = None - automaton_class = {'dfa': Dfa, 'mealy': MealyMachine, 'moore': MooreMachine} + automaton_class = {'vpa': Vpa} s_set = self.S # Added check for the algorithm without counterexample processing @@ -182,13 +221,17 @@ def gen_hypothesis(self, no_cex_processing_used=False) -> Automaton: for prefix in s_set: state_id = f's{stateCounter}' - if self.automaton_type == 'dfa': - states_dict[prefix] = DfaState(state_id) + # if self.automaton_type == 'dfa': + # states_dict[prefix] = DfaState(state_id) + # states_dict[prefix].is_accepting = self.T[prefix][0] + # elif self.automaton_type == 'moore': + # states_dict[prefix] = MooreState(state_id, output=self.T[prefix][0]) + # else: + # states_dict[prefix] = MealyState(state_id) + + if self.automaton_type == 'vpa': + states_dict[prefix] = VpaState(state_id) states_dict[prefix].is_accepting = self.T[prefix][0] - elif self.automaton_type == 'moore': - states_dict[prefix] = MooreState(state_id, output=self.T[prefix][0]) - else: - states_dict[prefix] = MealyState(state_id) states_dict[prefix].prefix = prefix state_distinguish[tuple(self.T[prefix])] = states_dict[prefix] @@ -198,14 +241,31 @@ def gen_hypothesis(self, no_cex_processing_used=False) -> Automaton: stateCounter += 1 # add transitions based on extended S set + # print("--- Creating Transitions for Hypothesis ---") for prefix in s_set: for a in self.A: - state_in_S = state_distinguish[self.T[prefix + a]] - states_dict[prefix].transitions[a[0]] = state_in_S - if self.automaton_type == 'mealy': - states_dict[prefix].output_fun[a[0]] = self.T[prefix][self.E.index(a)] + prev_state = state_distinguish[self.T[prefix]] + target_state = state_distinguish[self.T[prefix + a]] + action = self.get_action_type(a[0]) + stack_guard = self.get_stack_guard(prefix, a, action) + # print(f'Transition : {prefix} + {a[0]} --> {target_state.state_id}') + + trans = VpaTransition(start=prev_state, target=target_state, symbol=a[0], action=action, stack_guard=stack_guard) + + # trans = VpaTransition(start=state, target=states[target_state_id], symbol=_input, action=action, + # stack_guard=stack_guard) + # state.transitions[_input].append(trans) + + states_dict[prefix].transitions[a[0]].append(trans) + + # if self.automaton_type == 'mealy': + # states_dict[prefix].output_fun[a[0]] = self.T[prefix][self.E.index(a)] + + if self.automaton_type == 'vpa': + automaton = automaton_class[self.automaton_type](initial_state, list(states_dict.values()), self.call_set, self.return_set, self.internal_set) + else: + automaton = automaton_class[self.automaton_type](initial_state, list(states_dict.values())) - automaton = automaton_class[self.automaton_type](initial_state, list(states_dict.values())) automaton.characterization_set = self.E return automaton diff --git a/aalpy/utils/BenchmarkVpaModels.py b/aalpy/utils/BenchmarkVpaModels.py new file mode 100644 index 00000000..3878b299 --- /dev/null +++ b/aalpy/utils/BenchmarkVpaModels.py @@ -0,0 +1,41 @@ +from aalpy.automata.Vpa import Vpa + +def vpa_for_L1(): + # just a testing language + call_set = {'a'} + return_set = {'b'} + internal_set = {} + + state_setup = { + "q0": (False, {"a": [("q1", 'push', "$")]}), + "q1": (False, {"a": [("q1", 'push', "x")], + "b": [("q1", 'pop', "x"), ("q2", 'pop', "$")], + }), + "q2": (True, {}) + } + vpa = Vpa.from_state_setup(state_setup, "q0", call_set, return_set, internal_set) + return vpa + +def vpa_for_L13(): + # Dyck order 1 + + call_set = {'('} + return_set = {')'} + internal_set = {'a', 'b', 'c'} + + state_setup = { + "q0": (False, {"(": [("q1", 'push', None)], + "a": [("q1", None, None)], + "b": [("q1", None, None)], + "c": [("q1", None, None)], # exclude empty seq + }), + "q1": (True, {"(": [("q1", 'push', None)], + ")": [("q1", 'pop', "(")], + "a": [("q1", None, None)], + "b": [("q1", None, None)], + "c": [("q1", None, None)] + }), + } + vpa = Vpa.from_state_setup(state_setup, "q0", call_set, return_set, internal_set) + return vpa + diff --git a/aalpy/utils/FileHandler.py b/aalpy/utils/FileHandler.py index 7b2760b4..c63dc098 100644 --- a/aalpy/utils/FileHandler.py +++ b/aalpy/utils/FileHandler.py @@ -6,11 +6,11 @@ from pydot import Dot, Node, Edge, graph_from_dot_file from aalpy.automata import Dfa, MooreMachine, Mdp, Onfsm, MealyState, DfaState, MooreState, MealyMachine, \ - MdpState, StochasticMealyMachine, StochasticMealyState, OnfsmState, MarkovChain, McState, Pda + MdpState, StochasticMealyMachine, StochasticMealyState, OnfsmState, MarkovChain, McState, Pda, Vpa file_types = ['dot', 'png', 'svg', 'pdf', 'string'] automaton_types = {Dfa: 'dfa', MealyMachine: 'mealy', MooreMachine: 'moore', Mdp: 'mdp', - StochasticMealyMachine: 'smm', Onfsm: 'onfsm', MarkovChain: 'mc', Pda: 'pda'} + StochasticMealyMachine: 'smm', Onfsm: 'onfsm', MarkovChain: 'mc', Pda: 'pda', Vpa: 'vpa'} def _wrap_label(label): @@ -44,6 +44,10 @@ def _get_node(state, automaton_type): if state.is_accepting: return Node(state.state_id, label=_wrap_label(state.state_id), shape='doublecircle') return Node(state.state_id, label=_wrap_label(state.state_id)) + if automaton_type == 'vpa': + if state.is_accepting: + return Node(state.state_id, label=_wrap_label(state.state_id), shape='doublecircle') + return Node(state.state_id, label=_wrap_label(state.state_id)) def _add_transition_to_graph(graph, state, automaton_type, display_same_state_trans, round_floats): @@ -95,12 +99,23 @@ def _add_transition_to_graph(graph, state, automaton_type, display_same_state_tr label=_wrap_label(f'{transition.symbol}'))) if transition.action == 'push': graph.add_edge(Edge(transition.start.state_id, transition.target.state_id, - label=_wrap_label(f'{transition.symbol}/push(\'{transition.symbol}\')'))) + label=_wrap_label(f'{transition.symbol}/push(\'{transition.stack_guard}\')'))) if transition.action == 'pop': graph.add_edge(Edge(transition.start.state_id, transition.target.state_id, label=_wrap_label(f'{transition.symbol}/pop(\'{transition.stack_guard}\')'))) - - + if automaton_type == 'vpa': + for i in state.transitions.keys(): + transitions_list = state.transitions[i] + for transition in transitions_list: + if transition.action is None: + graph.add_edge(Edge(transition.start.state_id, transition.target.state_id, + label=_wrap_label(f'{transition.symbol}'))) + if transition.action == 'push': + graph.add_edge(Edge(transition.start.state_id, transition.target.state_id, + label=_wrap_label(f'{transition.symbol} | push({transition.stack_guard})'))) + if transition.action == 'pop': + graph.add_edge(Edge(transition.start.state_id, transition.target.state_id, + label=_wrap_label(f'{transition.symbol} | pop({transition.stack_guard})'))) def visualize_automaton(automaton, path="LearnedModel", file_type="pdf", display_same_state_trans=True): @@ -168,6 +183,8 @@ def save_automaton_to_file(automaton, path="LearnedModel", file_type="dot", for state in automaton.states: if automaton_type == 'pda' and state.state_id == 'ErrorSinkState': continue + elif automaton_type == 'vpa' and state.state_id == 'ErrorSinkState': + continue graph.add_node(_get_node(state, automaton_type)) for state in automaton.states: diff --git a/pda_main_experiments.py b/pda_main_experiments.py index f2b62c39..e6bd731c 100644 --- a/pda_main_experiments.py +++ b/pda_main_experiments.py @@ -1,3 +1,4 @@ + from aalpy.SULs.AutomataSUL import PdaSUL from aalpy.automata.Pda import generate_data_from_pda from aalpy.learning_algs import run_vpda_Lstar @@ -20,4 +21,4 @@ eq_oracle = RandomWMethodEqOracle(alphabet=input_alphabet, sul=sul, walks_per_state=100, walk_len=10) model = run_vpda_Lstar(alphabet=input_alphabet, sul=sul, eq_oracle=eq_oracle, automaton_type="dfa", print_level=3, - max_learning_rounds=1) + max_learning_rounds=1) \ No newline at end of file diff --git a/vpa_main_experiments.py b/vpa_main_experiments.py new file mode 100644 index 00000000..e06f6a2e --- /dev/null +++ b/vpa_main_experiments.py @@ -0,0 +1,31 @@ +from aalpy.SULs.AutomataSUL import VpaSUL +from aalpy.automata.Pda import generate_data_from_pda +from aalpy.learning_algs import run_vpda_Lstar +from aalpy.oracles import RandomWMethodEqOracle +from aalpy.utils.BenchmarkPdaModels import * +from aalpy.utils.BenchmarkVpaModels import * + + +vpa = vpa_for_L1() + +# vpa.visualize() + +input_alphabet = vpa.get_input_alphabet() +merged_input_alphabet = vpa.get_input_alphabet_merged() +# print("Call: " + str(input_alphabet[0]) + "\nReturn: " + str(input_alphabet[1]) + "\nInternal: " + str(input_alphabet[2])) + +sul = VpaSUL(vpa, include_top=True, check_balance=True) + +# pda_sequences = generate_data_from_pda(vpa, 10000) +# accepting_seq, rejecting_seq = [x[0] for x in pda_sequences if x[1]], [x[0] for x in pda_sequences if not x[1]] +# accepting_seq.sort(key=len) +# print('Positive') +# for i in range(10): +# print(accepting_seq[i]) + +eq_oracle = RandomWMethodEqOracle(alphabet=merged_input_alphabet, sul=sul, walks_per_state=100, walk_len=10) +model = run_vpda_Lstar(alphabet=input_alphabet, sul=sul, eq_oracle=eq_oracle, automaton_type="vpa", print_level=3, + max_learning_rounds=1) + +model.visualize() + From 13850973ea2cea513614f86927bad5ff299792d8 Mon Sep 17 00:00:00 2001 From: Moritz Pistauer Date: Wed, 4 Oct 2023 15:47:37 +0200 Subject: [PATCH 05/62] Basic Functionality VPA Lstar --- aalpy/automata/Vpa.py | 34 ++++++++++-- .../vpda/VpdaObservationTable.py | 52 ++++++++----------- aalpy/utils/FileHandler.py | 22 ++++---- vpa_main_experiments.py | 1 + 4 files changed, 63 insertions(+), 46 deletions(-) diff --git a/aalpy/automata/Vpa.py b/aalpy/automata/Vpa.py index ad7fdb92..e8ba6f00 100644 --- a/aalpy/automata/Vpa.py +++ b/aalpy/automata/Vpa.py @@ -67,9 +67,20 @@ def possible(self, letter): return True if letter is not None: transitions = self.current_state.transitions[letter] - trans = [t for t in transitions if t.stack_guard is None or self.top() == t.stack_guard] - assert len(trans) < 2 - if len(trans) == 0: + possible_trans = [] + for t in transitions: + if t.symbol in self.call_set: + possible_trans.append(t) + elif t.symbol in self.return_set: + if t.stack_guard == self.top(): + possible_trans.append(t) + elif t.symbol in self.internal_set: + possible_trans.append(t) + else: + assert False + # trans = [t for t in transitions if t.stack_guard is None or self.top() == t.stack_guard] + assert len(possible_trans) < 2 + if len(possible_trans) == 0: return False else: return True @@ -83,11 +94,24 @@ def step(self, letter): return False if letter is not None: transitions = self.current_state.transitions[letter] - trans = [t for t in transitions if t.stack_guard is None or self.top() == t.stack_guard][0] + possible_trans = [] + for t in transitions: + if t.symbol in self.call_set: + possible_trans.append(t) + elif t.symbol in self.return_set: + if t.stack_guard == self.top(): + possible_trans.append(t) + elif t.symbol in self.internal_set: + possible_trans.append(t) + else: + assert False + + assert len(possible_trans) < 2 + trans = possible_trans[0] self.current_state = trans.target if trans.action == 'push': assert(letter in self.call_set) # push letters must be in call set - self.stack.append(letter) + self.stack.append(trans.stack_guard) elif trans.action == 'pop': assert(letter in self.return_set) # pop letters must be in return set if len(self.stack) <= 1: # empty stack elem should always be there diff --git a/aalpy/learning_algs/vpda/VpdaObservationTable.py b/aalpy/learning_algs/vpda/VpdaObservationTable.py index ff89b550..e7146eb3 100644 --- a/aalpy/learning_algs/vpda/VpdaObservationTable.py +++ b/aalpy/learning_algs/vpda/VpdaObservationTable.py @@ -161,7 +161,6 @@ def update_obs_table(self, s_set: list = None, e_set: list = None): for e in update_E: if len(self.T[s]) != len(self.E): output = tuple(self.sul.query(s + e)) - # print(f'Output ({s} + {e}): {output}') if self.prefixes_in_cell and len(e) > 1: obs_table_entry = tuple([output[-len(e):]],) else: @@ -181,16 +180,26 @@ def get_action_type(self, letter) -> str: def get_stack_guard(self, prefix, letter, action): """ - TODO: Finish this + Gets the stack guard based on the action and word (prefix + letter) """ out = self.sul.query(prefix + letter) - # if action == 'push': - # print(f'Push {out}') - # elif action == 'pop': - # out_pre = self.sul.query(prefix) - # print(f'Out Pre: {out_pre} + Out Now: {out}') - return '?' + out_pre = self.sul.query(prefix) + if action == 'push': + if out_pre[-1][1] == out[-1][1] and out_pre[-1][1] == '_': # stack doesn't change on push action + stack_guard = '?' + else: # stack changed so we know the push action worked + stack_guard = out[-1][1] + elif action == 'pop': + if out_pre[-1][1] == out[-1][1]: # stack doesn't change on pop action + stack_guard = '?' + else: # stack changed so we know the pop operation worked + stack_guard = out_pre[-1][1] + else: + stack_guard = '' + + return stack_guard + def gen_hypothesis(self, no_cex_processing_used=False) -> Automaton: """ @@ -221,17 +230,8 @@ def gen_hypothesis(self, no_cex_processing_used=False) -> Automaton: for prefix in s_set: state_id = f's{stateCounter}' - # if self.automaton_type == 'dfa': - # states_dict[prefix] = DfaState(state_id) - # states_dict[prefix].is_accepting = self.T[prefix][0] - # elif self.automaton_type == 'moore': - # states_dict[prefix] = MooreState(state_id, output=self.T[prefix][0]) - # else: - # states_dict[prefix] = MealyState(state_id) - - if self.automaton_type == 'vpa': - states_dict[prefix] = VpaState(state_id) - states_dict[prefix].is_accepting = self.T[prefix][0] + states_dict[prefix] = VpaState(state_id) + states_dict[prefix].is_accepting = self.T[prefix][0][0] states_dict[prefix].prefix = prefix state_distinguish[tuple(self.T[prefix])] = states_dict[prefix] @@ -240,27 +240,17 @@ def gen_hypothesis(self, no_cex_processing_used=False) -> Automaton: initial_state = states_dict[prefix] stateCounter += 1 - # add transitions based on extended S set - # print("--- Creating Transitions for Hypothesis ---") for prefix in s_set: for a in self.A: prev_state = state_distinguish[self.T[prefix]] target_state = state_distinguish[self.T[prefix + a]] action = self.get_action_type(a[0]) stack_guard = self.get_stack_guard(prefix, a, action) - # print(f'Transition : {prefix} + {a[0]} --> {target_state.state_id}') - + if stack_guard == '?': + target_state = Vpa.error_state trans = VpaTransition(start=prev_state, target=target_state, symbol=a[0], action=action, stack_guard=stack_guard) - - # trans = VpaTransition(start=state, target=states[target_state_id], symbol=_input, action=action, - # stack_guard=stack_guard) - # state.transitions[_input].append(trans) - states_dict[prefix].transitions[a[0]].append(trans) - # if self.automaton_type == 'mealy': - # states_dict[prefix].output_fun[a[0]] = self.T[prefix][self.E.index(a)] - if self.automaton_type == 'vpa': automaton = automaton_class[self.automaton_type](initial_state, list(states_dict.values()), self.call_set, self.return_set, self.internal_set) else: diff --git a/aalpy/utils/FileHandler.py b/aalpy/utils/FileHandler.py index c63dc098..85fcdf07 100644 --- a/aalpy/utils/FileHandler.py +++ b/aalpy/utils/FileHandler.py @@ -107,15 +107,17 @@ def _add_transition_to_graph(graph, state, automaton_type, display_same_state_tr for i in state.transitions.keys(): transitions_list = state.transitions[i] for transition in transitions_list: - if transition.action is None: - graph.add_edge(Edge(transition.start.state_id, transition.target.state_id, - label=_wrap_label(f'{transition.symbol}'))) if transition.action == 'push': - graph.add_edge(Edge(transition.start.state_id, transition.target.state_id, - label=_wrap_label(f'{transition.symbol} | push({transition.stack_guard})'))) - if transition.action == 'pop': - graph.add_edge(Edge(transition.start.state_id, transition.target.state_id, - label=_wrap_label(f'{transition.symbol} | pop({transition.stack_guard})'))) + edge = Edge(transition.start.state_id, transition.target.state_id, label=_wrap_label(f'{transition.symbol} | push({transition.stack_guard})')) + elif transition.action == 'pop': + edge = Edge(transition.start.state_id, transition.target.state_id, label=_wrap_label(f'{transition.symbol} | pop({transition.stack_guard})')) + else: + edge = Edge(transition.start.state_id, transition.target.state_id, label=_wrap_label(f'{transition.symbol}')) + + if transition.target == Vpa.error_state: + edge.set_style('dashed') + + graph.add_edge(edge) def visualize_automaton(automaton, path="LearnedModel", file_type="pdf", display_same_state_trans=True): @@ -183,8 +185,8 @@ def save_automaton_to_file(automaton, path="LearnedModel", file_type="dot", for state in automaton.states: if automaton_type == 'pda' and state.state_id == 'ErrorSinkState': continue - elif automaton_type == 'vpa' and state.state_id == 'ErrorSinkState': - continue + # elif automaton_type == 'vpa' and state.state_id == 'ErrorSinkState': + # continue graph.add_node(_get_node(state, automaton_type)) for state in automaton.states: diff --git a/vpa_main_experiments.py b/vpa_main_experiments.py index e06f6a2e..8a3e9fa3 100644 --- a/vpa_main_experiments.py +++ b/vpa_main_experiments.py @@ -15,6 +15,7 @@ # print("Call: " + str(input_alphabet[0]) + "\nReturn: " + str(input_alphabet[1]) + "\nInternal: " + str(input_alphabet[2])) sul = VpaSUL(vpa, include_top=True, check_balance=True) +out = sul.query('a') # pda_sequences = generate_data_from_pda(vpa, 10000) # accepting_seq, rejecting_seq = [x[0] for x in pda_sequences if x[1]], [x[0] for x in pda_sequences if not x[1]] From 8b55a83616c957c83e7ff58afa6bb1aadb4c772d Mon Sep 17 00:00:00 2001 From: Moritz Pistauer Date: Sat, 7 Oct 2023 22:48:12 +0200 Subject: [PATCH 06/62] Explanation of theoretical stuff --- VPA.md | 54 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 54 insertions(+) create mode 100644 VPA.md diff --git a/VPA.md b/VPA.md new file mode 100644 index 00000000..25bffbfd --- /dev/null +++ b/VPA.md @@ -0,0 +1,54 @@ +# Theory of VPA +A VPA (Visible Pushdown Automata) is similar to a regular Pushdown Automata with the difference of a split-up alphabet. + +The alphabet of a VPA is a triple with: +- Σcall: call set (set of all call letters --> letters that are used for push actions on the automata) +- Σret: return set (set of all return letters --> letters that are used for pop actions on the automata) +- Σint: internal set (set of all internal letters --> letters that are used for internal transitions that don't alter the stack) + +The unification of all three sets is the alphabet. +The symmetric difference is the empty set. + +### The call/return balance +The call/return balance is a function β. The function maps a word of the language to an integer based on the composition of call and return letters. Here is how it works: +- Call letters add "1" to the balance +- Return letters subtract "1" from the balance +- Internal letters have no impact on the balance + +#### Example: +Imagine a language with: +- Σcall = {a, b} +- Σret = {c, d} +- Σint = {e, f} + +We would have the following balances for the following words: +- aabbccdd = 0 (1+1+1+1-1-1-1-1) +- abab = 4 (1+1+1+1) +- cccc = -4 (-1-1-1-1) +- cdeabef = 0 (-1-1+0+1+1+0+0) + +### Call-matched, return-matched, well-matched +By defining the call/return balance we can introduce the definition of call-matched, return-matched and well-matched words. The definition is as follows: +- **Return-matched** words have a β >= 0 (They have more call-letters and therefore the balance is greater-equal than 0) +The set of return matched words is called MR(Σ) +- **Call-matched** words have a β =< 0 (They have more return-letters and therefore the balance is greater-equal than 0) +The set of call matched words is called MC(Σ) +- **Well-matched** words have a β = 0 (The composition of call and return letters in well-matched words equalizes to zero) +The set of well matched words is called MW(Σ) + +### Context pairs (CP) +Furthermore we want to introduce the context pairs, the set of context pairs is CP(Σ). Context pairs are well matched words of the form u*v. +- u has the form of MR(Σ) * Σcall or is the empty word ε +This means if u is of the form MR(Σ) * Σcall it has at least a β >= 1 +- v is of the form MC(Σ) +- β(u) = -β(v). +- v is the matching word for u to be u*v ∈ WM(Σ) +#### Example: +If we get back at our last language with the following split: +- Σcall = {a, b} +- Σret = {c, d} +- Σint = {e, f} + +We have could have this types of context pairs: +- u = aa | v = cc +- u = aca | v = d \ No newline at end of file From f2f6db8f0775be45d3a53920cd18b0a5d8003115 Mon Sep 17 00:00:00 2001 From: Edi Muskardin Date: Fri, 13 Oct 2023 14:37:03 +0200 Subject: [PATCH 07/62] push todos --- .../vpda/VpdaClassificationTree.py | 316 ++++++++++++++++++ aalpy/learning_algs/vpda/VpdaKV.py | 145 ++++++++ 2 files changed, 461 insertions(+) create mode 100644 aalpy/learning_algs/vpda/VpdaClassificationTree.py create mode 100644 aalpy/learning_algs/vpda/VpdaKV.py diff --git a/aalpy/learning_algs/vpda/VpdaClassificationTree.py b/aalpy/learning_algs/vpda/VpdaClassificationTree.py new file mode 100644 index 00000000..787977de --- /dev/null +++ b/aalpy/learning_algs/vpda/VpdaClassificationTree.py @@ -0,0 +1,316 @@ +from collections import defaultdict + +from aalpy.automata import DfaState, Dfa, MealyState, MealyMachine, MooreState, MooreMachine +from aalpy.base import SUL + + +class CTNode: + __slots__ = ['parent', 'path_to_node'] + + def __init__(self, parent, path_to_node): + self.parent = parent + self.path_to_node = path_to_node + + def is_leaf(self): + pass + + +class CTInternalNode(CTNode): + __slots__ = ['context_pair', 'children'] + + def __init__(self, context_pair: tuple, parent, path_to_node): + super().__init__(parent, path_to_node) + self.context_pair = context_pair + self.children = defaultdict(None) # {True: None, False: None} + + def is_leaf(self): + return False + + +class CTLeafNode(CTNode): + __slots__ = ['access_string'] + + def __init__(self, access_string: tuple, parent, path_to_node): + super().__init__(parent, path_to_node) + self.access_string = access_string + + def __repr__(self): + return f"{self.__class__.__name__} '{self.access_string}'" + + @property + def output(self): + c, p = self, self.parent + while p.parent: + c = p + p = p.parent + for output, child in p.children.items(): + if child == c: + return output + assert False + + def is_leaf(self): + return True + + +class VpdaClassificationTree: + # TODO replace all dist. strings with context pairs appropriately + def __init__(self, alphabet: list, sul: SUL, cex: tuple): + self.sul = sul + self.alphabet = alphabet + + self.leaf_nodes = {} + self.query_cache = dict() + + self.sifting_cache = {} + + initial_output = sul.query(())[-1] + cex_output = sul.query(cex)[-1] + + self.query_cache[()] = initial_output + + self.root = CTInternalNode(context_pair=tuple([(), ()]), parent=None, path_to_node=None) + + initial_output_node = CTLeafNode(access_string=tuple(), parent=self.root, path_to_node=initial_output) + cex_output_node = CTLeafNode(access_string=cex, parent=self.root, path_to_node=cex_output) + + self.root.children[initial_output] = initial_output_node + self.root.children[cex_output] = cex_output_node + + self.leaf_nodes[tuple()] = initial_output_node + self.leaf_nodes[cex] = cex_output_node + + def _sift(self, word): + """ + Sifting a word into the classification tree. + Starting at the root, at every inner node (a CTInternalNode), + we branch into the child, depending on the result of the + membership query (word * node.distinguishing_string). Repeated until a leaf + (a CTLeafNode) is reached, which is the result of the sifting. + + Args: + + word: the word to sift into the discrimination tree (a tuple of all letters) + + Returns: + + the CTLeafNode that is reached by the sifting operation. + """ + for letter in word: + assert letter is None or letter in self.alphabet + + if word in self.sifting_cache: + return self.sifting_cache[word] + + node = self.root + while not node.is_leaf(): + + query = node.context_pair[0] + word + node.context_pair[1] + + if query not in self.query_cache.keys(): + mq_result = self.sul.query(query) + + mq_result = mq_result[-1] + self.query_cache[query] = mq_result + else: + mq_result = self.query_cache[query] + + if mq_result not in node.children.keys(): + new_leaf = CTLeafNode(access_string=word, parent=node, path_to_node=mq_result) + self.leaf_nodes[word] = new_leaf + node.children[mq_result] = new_leaf + + node = node.children[mq_result] + + self.sifting_cache[word] = node + assert node.is_leaf() + return node + + def gen_hypothesis(self): + # for each CTLeafNode of this CT, + # create a state in the hypothesis that is labeled by that + # node's access string. The start state is the empty word + + # TODO take a look at kv how it is done + + return None + + def _least_common_ancestor(self, node_1_id, node_2_id): + """ + Find the distinguishing string of the least common ancestor + of the leaf nodes node_1 and node_2. Both nodes have to exist. + Adapted from https://www.geeksforgeeks.org/lowest-common-ancestor-binary-tree-set-1/ + + Args: + + node_1_id: first leaf node's id + node_2_id: second leaf node's id + + Returns: + + the distinguishing string of the lca + + """ + + def ancestor(parent, node): + for child in parent.children.values(): + if child.is_leaf(): + if child.access_string == node: + return True + else: + next_ancestor = ancestor(child, node) + if next_ancestor: + return True + return False + + def findLCA(n1_id, n2_id): + node = self.leaf_nodes[n1_id] + parent = node.parent + while parent: + if ancestor(parent, n2_id): + return parent + if parent.parent: + parent = parent.parent + else: + return parent + return None + + return findLCA(node_1_id, node_2_id).context_pair + + def update(self, cex: tuple, hypothesis): + """ + Updates the classification tree based on a counterexample. + - For each prefix cex[:i] of the counterexample, get + s_i = self.sift(cex[:i]) and + s_star_i = id of the state with the access sequence cex[:i] + in the hypothesis + and let j be the least i such that s_i != s_star_i. + - Replace the CTLeafNode labeled with the access string of the state + that is reached by the sequence cex[:j-1] in the hypothesis + with an CTInternalNode with two CTLeafNodes: one keeps the old + access string, and one gets the new access string cex[:j-1]. + The internal node is labeled with the distinguishing string (cex[j-1],*d), + where d is the distinguishing string of the LCA of s_i and s_star_i. + + Args: + cex: the counterexample used to update the tree + hypothesis: the former (wrong) hypothesis + + """ + j = d = None + for i in range(1, len(cex) + 1): + s_i = self._sift(cex[:i]).access_string + hypothesis.execute_sequence(hypothesis.initial_state, cex[:i]) + s_star_i = hypothesis.current_state.prefix + if s_i != s_star_i: + j = i + d = self._least_common_ancestor(s_i, s_star_i) + break + if j is None and d is None: + j = len(cex) + d = [] + assert j is not None and d is not None + + hypothesis.execute_sequence(hypothesis.initial_state, cex[:j - 1] or tuple()) + + self._insert_new_leaf(discriminator=(cex[j - 1], *d), + old_leaf_access_string=hypothesis.current_state.prefix, + new_leaf_access_string=tuple(cex[:j - 1]) or tuple(), + new_leaf_position=self.sul.query((*cex[:j - 1], *(cex[j - 1], *d)))[-1]) + + def update_rs(self, cex: tuple, hypothesis): + """ + Updates the classification tree based on a counterexample, + using Rivest & Schapire's counterexample processing + - Replace the CTLeafNode labeled with the access string of the state + that is reached by the sequence cex[:j-1] in the hypothesis + with an CTInternalNode with two CTLeafNodes: one keeps the old + access string, and one gets the new access string cex[:j-1]. + The internal node is labeled with the distinguishing string (cex[j-1],*d), + where d is the distinguishing string of the LCA of s_i and s_star_i. + + Args: + cex: the counterexample used to update the tree + hypothesis: the former (wrong) hypothesis + + """ + from aalpy.learning_algs.deterministic.CounterExampleProcessing import rs_cex_processing + v = max(rs_cex_processing(self.sul, cex, hypothesis, suffix_closedness=True), key=len) + a = cex[len(cex) - len(v) - 1] + u = cex[:len(cex) - len(v) - 1] + assert (*u, a, *v) == cex + + hypothesis.execute_sequence(hypothesis.initial_state, u) + u_state = hypothesis.current_state.prefix + hypothesis.step(a) + ua_state = hypothesis.current_state.prefix + + new_leaf_position = not hypothesis.execute_sequence(hypothesis.initial_state, cex)[-1] + + self._insert_new_leaf(discriminator=v, + old_leaf_access_string=ua_state, + new_leaf_access_string=(*u_state, a), + new_leaf_position=new_leaf_position) + + def _insert_new_leaf(self, discriminator, old_leaf_access_string, new_leaf_access_string, new_leaf_position): + """ + Inserts a new leaf in the classification tree by: + - moving the leaf node specified by down one level + - inserting an internal node at the former position of the old node (i.e. as the parent of the old node) + - adding a new leaf node with as child of the new internal node / sibling of the old node + Could also be thought of as 'splitting' the old node into two (one of which keeps the old access string and one + of which gets the new one) with as the distinguishing string between the two. + + where one of the resulting nodes keeps the old + node's access string and the other gets new_leaf_access_string. + Args: + discriminator: The distinguishing string of the new internal node + old_leaf_access_string: The access string specifying the leaf node to be 'split' (or rather moved down) + new_leaf_access_string: The access string of the leaf node that will be created + new_leaf_position: The path from the new internal node to the new leaf node + + Returns: + + """ + other_leaf_position = not new_leaf_position + + old_leaf = self.leaf_nodes[old_leaf_access_string] + + # create an internal node at the same position as the old leaf node + # TODO is this context pair?? + discriminator_node = CTInternalNode(context_pair=discriminator, + parent=old_leaf.parent, path_to_node=old_leaf.path_to_node) + + # create the new leaf node and add it as child of the internal node + new_leaf = CTLeafNode(access_string=new_leaf_access_string, + parent=discriminator_node, + path_to_node=new_leaf_position) + self.leaf_nodes[new_leaf_access_string] = new_leaf + + # redirect the old nodes former parent to the internal node + old_leaf.parent.children[old_leaf.path_to_node] = discriminator_node + + # add the internal node as parent of the old leaf + old_leaf.parent = discriminator_node + old_leaf.path_to_node = other_leaf_position + + # set the two nodes as children of the internal node + discriminator_node.children[new_leaf_position] = new_leaf + discriminator_node.children[other_leaf_position] = old_leaf + + # sifting cache update + sifting_cache_outdated = [] + if old_leaf in self.sifting_cache.values(): + for prefix, node in self.sifting_cache.items(): + if old_leaf == node: + sifting_cache_outdated.append(prefix) + + for to_delete in sifting_cache_outdated: + del self.sifting_cache[to_delete] + + def _query_and_update_cache(self, word): + if word in self.query_cache.keys(): + output = self.query_cache[word] + else: + output = self.sul.query(word)[-1] + self.query_cache[word] = output + return output diff --git a/aalpy/learning_algs/vpda/VpdaKV.py b/aalpy/learning_algs/vpda/VpdaKV.py new file mode 100644 index 00000000..bf8c2945 --- /dev/null +++ b/aalpy/learning_algs/vpda/VpdaKV.py @@ -0,0 +1,145 @@ +import time + +from aalpy.automata import Dfa, DfaState, MealyState, MealyMachine, MooreState, MooreMachine +from aalpy.base import Oracle, SUL +from aalpy.utils.HelperFunctions import print_learning_info +from .VpdaClassificationTree import VpdaClassificationTree +from ..deterministic.CounterExampleProcessing import counterexample_successfully_processed +from ...base.SUL import CacheSUL + +print_options = [0, 1, 2, 3] +counterexample_processing_strategy = [None, 'rs'] + + +def run_KV_vpda(alphabet: list, sul: SUL, eq_oracle: Oracle, cex_processing='rs', + max_learning_rounds=None, cache_and_non_det_check=True, return_data=False, print_level=2): + """ + Executes the KV algorithm. + + Args: + + alphabet: input alphabet + + sul: system under learning + + eq_oracle: equivalence oracle + + cex_processing: None for no counterexample processing, or 'rs' for Rivest & Schapire counterexample processing + + max_learning_rounds: number of learning rounds after which learning will terminate (Default value = None) + + cache_and_non_det_check: Use caching and non-determinism checks (Default value = True) + + return_data: if True, a map containing all information(runtime/#queries/#steps) will be returned + (Default value = False) + + print_level: 0 - None, 1 - just results, 2 - current round and hypothesis size, 3 - educational/debug + (Default value = 2) + + + Returns: + + automaton of type automaton_type (dict containing all information about learning if 'return_data' is True) + + """ + + assert print_level in print_options + assert cex_processing in counterexample_processing_strategy + + start_time = time.time() + eq_query_time = 0 + learning_rounds = 0 + + if cache_and_non_det_check: + # Wrap the sul in the CacheSUL, so that all steps/queries are cached + sul = CacheSUL(sul) + eq_oracle.sul = sul + + empty_string_mq = sul.query(tuple())[-1] + + initial_state = DfaState(state_id='s0', is_accepting=empty_string_mq) + + initial_state.prefix = tuple() + + # TODO Create 1-SEVPA class + # When creating a hypothesis, infer call transition destinations based on (loc, call) pairs + + # TODO Create initial hypothesis + # Maybe move initialization of classification tree here + # Add a new method to it called generate_initial_hypothesis() + # Either -> one state and then procedure is same like in default KV (add cex later) + # Discover a new state + + hypothesis = None + # Perform an equivalence query on this automaton + eq_query_start = time.time() + cex = eq_oracle.find_cex(hypothesis) + + eq_query_time += time.time() - eq_query_start + if cex is not None: + cex = tuple(cex) + + # initialise the classification tree to have a root + # labeled with the empty word as the distinguishing string + # and two leaves labeled with access strings cex and empty word + classification_tree = VpdaClassificationTree(alphabet=alphabet, sul=sul, cex=cex) + + while True: + learning_rounds += 1 + if max_learning_rounds and learning_rounds - 1 == max_learning_rounds: + break + + hypothesis = classification_tree.gen_hypothesis() + + if print_level == 2: + print(f'\rHypothesis {learning_rounds}: {hypothesis.size} states.', end="") + + if print_level == 3: + # would be nice to have an option to print classification tree + print(f'Hypothesis {learning_rounds}: {hypothesis.size} states.') + + if counterexample_successfully_processed(sul, cex, hypothesis): + # Perform an equivalence query on this automaton + eq_query_start = time.time() + cex = eq_oracle.find_cex(hypothesis) + eq_query_time += time.time() - eq_query_start + + if cex is None: + break + else: + cex = tuple(cex) + + if print_level == 3: + print('Counterexample', cex) + + if cex_processing == 'rs': + classification_tree.update_rs(cex, hypothesis) + else: + classification_tree.update(cex, hypothesis) + + total_time = round(time.time() - start_time, 2) + eq_query_time = round(eq_query_time, 2) + learning_time = round(total_time - eq_query_time, 2) + + info = { + 'learning_rounds': learning_rounds, + 'automaton_size': hypothesis.size, + 'queries_learning': sul.num_queries, + 'steps_learning': sul.num_steps, + 'queries_eq_oracle': eq_oracle.num_queries, + 'steps_eq_oracle': eq_oracle.num_steps, + 'learning_time': learning_time, + 'eq_oracle_time': eq_query_time, + 'total_time': total_time, + 'cache_saved': sul.num_cached_queries, + } + + if print_level > 0: + if print_level == 2: + print("") + print_learning_info(info) + + if return_data: + return hypothesis, info + + return hypothesis From de89def6bf3d475e0ac72133c52a34409e26504c Mon Sep 17 00:00:00 2001 From: Moritz Pistauer Date: Mon, 16 Oct 2023 22:31:22 +0200 Subject: [PATCH 08/62] First implementation of VpaKV + Vpa/Pda EqChecks + Theory explanations --- VPA.md | 16 +- aalpy/SULs/AutomataSUL.py | 25 +- aalpy/automata/Pda.py | 4 +- aalpy/automata/Sevpa.py | 282 +++++++++++++ aalpy/automata/Vpa.py | 4 +- aalpy/automata/__init__.py | 1 + aalpy/learning_algs/__init__.py | 3 +- aalpy/learning_algs/deterministic/KV.py | 9 +- .../vpda/VpdaClassificationTree.py | 77 +++- aalpy/learning_algs/vpda/VpdaKV.py | 14 +- aalpy/utils/BenchmarkSULs.py | 8 +- aalpy/utils/BenchmarkSevpaModels.py | 378 ++++++++++++++++++ aalpy/utils/BenchmarkVpaModels.py | 322 ++++++++++++++- aalpy/utils/FileHandler.py | 4 +- aalpy/utils/HelperFunctions.py | 28 ++ aalpy/utils/__init__.py | 2 +- pda_main_experiments.py | 1 + sevpa_main_experiments.py | 49 +++ vpa_eq_checks.py | 60 +++ vpa_main_experiments.py | 2 +- 20 files changed, 1251 insertions(+), 38 deletions(-) create mode 100644 aalpy/automata/Sevpa.py create mode 100644 aalpy/utils/BenchmarkSevpaModels.py create mode 100644 sevpa_main_experiments.py create mode 100644 vpa_eq_checks.py diff --git a/VPA.md b/VPA.md index 25bffbfd..6dfb1518 100644 --- a/VPA.md +++ b/VPA.md @@ -37,7 +37,7 @@ The set of call matched words is called MC(Σ) The set of well matched words is called MW(Σ) ### Context pairs (CP) -Furthermore we want to introduce the context pairs, the set of context pairs is CP(Σ). Context pairs are well matched words of the form u*v. +Furthermore we want to introduce context pairs, the set of context pairs is CP(Σ). Context pairs are well matched words having the form of u*v. - u has the form of MR(Σ) * Σcall or is the empty word ε This means if u is of the form MR(Σ) * Σcall it has at least a β >= 1 - v is of the form MC(Σ) @@ -51,4 +51,16 @@ If we get back at our last language with the following split: We have could have this types of context pairs: - u = aa | v = cc -- u = aca | v = d \ No newline at end of file +- u = aca | v = d + +### Output functions +As Malte Isberner described in his paper we want to introduce the definition of output functions as they are used for several relations and congruences. +The intuitive meaning of the output function is if the word is part of the language respectively if the automata accepts or rejects the input. +The output function is of the form: +λ: Σ* --> B +Generally it will be assumed that the output function is a well-matched output function, which is of the form: +**λ: WM(Σ*) --> B** + +### Important Congruences + +#### Nerode-congruence diff --git a/aalpy/SULs/AutomataSUL.py b/aalpy/SULs/AutomataSUL.py index 1cf07e15..86ab2d73 100644 --- a/aalpy/SULs/AutomataSUL.py +++ b/aalpy/SULs/AutomataSUL.py @@ -1,5 +1,5 @@ from aalpy.base import SUL -from aalpy.automata import Dfa, MealyMachine, MooreMachine, Onfsm, Mdp, StochasticMealyMachine, MarkovChain, Pda, Vpa +from aalpy.automata import Dfa, MealyMachine, MooreMachine, Onfsm, Mdp, StochasticMealyMachine, MarkovChain, Pda, Vpa, Sevpa class DfaSUL(SUL): @@ -210,3 +210,26 @@ def step(self, letter): return output, '-' return output, top return output + + +class SevpaSUL(SUL): + def __init__(self, sevpa: Sevpa, include_top=True, check_balance=True): + super().__init__() + self.sevpa = sevpa + self.include_top = include_top + self.check_balance = check_balance + + def pre(self): + self.sevpa.reset_to_initial() + + def post(self): + pass + + def step(self, letter): + output = self.sevpa.step(letter) + # top = self.sevpa.top() + if self.include_top: + if self.check_balance and self.sevpa.call_balance < 0: + return output, '-' + return output + return output \ No newline at end of file diff --git a/aalpy/automata/Pda.py b/aalpy/automata/Pda.py index 60b29100..003315d7 100644 --- a/aalpy/automata/Pda.py +++ b/aalpy/automata/Pda.py @@ -24,7 +24,7 @@ def __init__(self, start: PdaState, target: PdaState, symbol, action, stack_guar class Pda(Automaton): - empty = "$" + empty = "_" error_state = PdaState("ErrorSinkState", False) def __init__(self, initial_state: PdaState, states): @@ -75,7 +75,7 @@ def step(self, letter): trans = [t for t in transitions if t.stack_guard is None or self.top() == t.stack_guard][0] self.current_state = trans.target if trans.action == 'push': - self.stack.append(letter) + self.stack.append(trans.stack_guard) elif trans.action == 'pop': if len(self.stack) <= 1: # empty stack elem should always be there self.current_state = Pda.error_state diff --git a/aalpy/automata/Sevpa.py b/aalpy/automata/Sevpa.py new file mode 100644 index 00000000..41963cb8 --- /dev/null +++ b/aalpy/automata/Sevpa.py @@ -0,0 +1,282 @@ +from collections import defaultdict + +from aalpy.base import Automaton, AutomatonState + + +class SevpaAlphabet: + def __init__(self, internal_alphabet, call_alphabet, return_alphabet): + self.internal_alphabet = internal_alphabet + self.call_alphabet = call_alphabet + self.return_alphabet = return_alphabet + + def __str__(self): + return f'Internal: {self.internal_alphabet} Call: {self.call_alphabet} Return: {self.return_alphabet}' + + def get_merged_alphabet(self) -> list: + alphabet = list() + alphabet.extend(self.internal_alphabet) + alphabet.extend(self.call_alphabet) + alphabet.extend(self.return_alphabet) + return alphabet + + +class SevpaState(AutomatonState): + """ + Single state of a deterministic finite automaton. + """ + + def __init__(self, state_id, is_accepting=False): + super().__init__(state_id) + self.transitions = defaultdict(list) + self.is_accepting = is_accepting + + +class SevpaTransition: + def __init__(self, start: SevpaState, target: SevpaState, symbol, action, stack_guard=None): + self.start = start + self.target = target + self.symbol = symbol + self.action = action + self.stack_guard = stack_guard + + def __str__(self): + return f"{self.symbol}: {self.start.state_id} --> {self.target.state_id} | {self.action}: {self.stack_guard}" + + +class Sevpa(Automaton): + empty = "_" + error_state = SevpaState("ErrorSinkState", False) + + def __init__(self, initial_state: SevpaState, states, input_alphabet: SevpaAlphabet): + super().__init__(initial_state, states) + self.initial_state = initial_state + self.states = states + self.input_alphabet = input_alphabet + self.current_state = None + self.call_balance = 0 + self.stack = [] + + def reset_to_initial(self): + super().reset_to_initial() + self.reset() + + def reset(self): + self.current_state = self.initial_state + self.stack = [self.empty] + self.call_balance = 0 + return self.current_state.is_accepting and self.top() == self.empty + + def top(self): + return self.stack[-1] + + def pop(self): + return self.stack.pop() + + def possible(self, letter): + """ + Checks if a certain step on the automaton is possible + + TODO: Adaptation for Stack content ? + """ + if self.current_state == Sevpa.error_state: + return True + if letter is not None: + transitions = self.current_state.transitions[letter] + possible_trans = [] + for t in transitions: + if t.symbol in self.input_alphabet.call_alphabet: + possible_trans.append(t) + elif t.symbol in self.input_alphabet.return_alphabet: + if t.stack_guard == self.top(): + possible_trans.append(t) + elif t.symbol in self.input_alphabet.internal_alphabet: + possible_trans.append(t) + else: + assert False + # trans = [t for t in transitions if t.stack_guard is None or self.top() == t.stack_guard] + assert len(possible_trans) < 2 + if len(possible_trans) == 0: + return False + else: + return True + return False + + def step(self, letter): + if self.current_state == Sevpa.error_state: + return False + if not self.possible(letter): + self.current_state = Sevpa.error_state + return False + if letter is not None: + transitions = self.current_state.transitions[letter] + possible_trans = [] + for t in transitions: + if t.symbol in self.input_alphabet.call_alphabet: + possible_trans.append(t) + elif t.symbol in self.input_alphabet.return_alphabet: + if t.stack_guard == self.top(): + possible_trans.append(t) + elif t.symbol in self.input_alphabet.internal_alphabet: + possible_trans.append(t) + else: + assert False + + assert len(possible_trans) < 2 + trans = possible_trans[0] + self.current_state = trans.target + if trans.action == 'push': + assert(letter in self.input_alphabet.call_alphabet) # push letters must be in call set + self.stack.append(trans.stack_guard) + elif trans.action == 'pop': + assert(letter in self.input_alphabet.return_alphabet) # pop letters must be in return set + if len(self.stack) <= 1: # empty stack elem should always be there + self.current_state = Sevpa.error_state + return False + self.stack.pop() + + return self.current_state.is_accepting and self.top() == self.empty + + def to_state_setup(self): + state_setup_dict = {} + + # ensure prefixes are computed + # self.compute_prefixes() + + sorted_states = sorted(self.states, key=lambda x: len(x.prefix)) + for s in sorted_states: + state_setup_dict[s.state_id] = ( + s.is_accepting, {k: (v.target.state_id, v.action) for k, v in s.transitions.items()}) + + return state_setup_dict + + @staticmethod + def from_state_setup(state_setup: dict, init_state_id, input_alphabet: SevpaAlphabet): + """ + First state in the state setup is the initial state. + Example state setup: + state_setup = { + "a": (True, {"x": ("b1",PUSH), "y": ("a", NONE)}), + "b1": (False, {"x": ("b2", PUSH), "y": "a"}), + "b2": (True, {"x": "b3", "y": "a"}), + "b3": (False, {"x": "b4", "y": "a"}), + "b4": (False, {"x": "c", "y": "a"}), + "c": (True, {"x": "a", "y": "a"}), + } + + Args: + + state_setup: map from state_id to tuple(output and transitions_dict) + + Returns: + + PDA + """ + # state_setup should map from state_id to tuple(is_accepting and transitions_dict) + + # build states with state_id and output + states = {key: SevpaState(key, val[0]) for key, val in state_setup.items()} + states[Sevpa.error_state.state_id] = Sevpa.error_state # PdaState(Pda.error_state,False) + # add transitions to states + for state_id, state in states.items(): + if state_id == Sevpa.error_state.state_id: + continue + for _input, trans_spec in state_setup[state_id][1].items(): + for (target_state_id, action, stack_guard) in trans_spec: + if action == 'pop': + assert stack_guard[0] in states + assert stack_guard[1] in input_alphabet.call_alphabet + stack_guard = f'{stack_guard[0]}{stack_guard[1]}' + trans = SevpaTransition(start=state, target=states[target_state_id], symbol=_input, + action=action, stack_guard=stack_guard) + elif action == 'push': # In SEVPA you can only define return transitions and internal transitions + assert False + else: + trans = SevpaTransition(start=state, target=states[target_state_id], symbol=_input, + action=None, stack_guard=None) + + state.transitions[_input].append(trans) + + # add call transitions + for call_letter in input_alphabet.call_alphabet: + trans = SevpaTransition(start=state, target=states[init_state_id], symbol=call_letter, action='push', stack_guard=f'{state_id}{call_letter}') + state.transitions[call_letter].append(trans) + + init_state = states[init_state_id] + # states to list + states = [state for state in states.values()] + + sevpa = Sevpa(init_state, states, input_alphabet) + return sevpa + + +def generate_data_from_pda(automaton, num_examples, lens=None, classify_states=False, stack_limit=None, + break_on_impossible=False, possible_prob=0.75): + import random + from itertools import product + + input_al = automaton.get_input_alphabet() + + if lens is None: + lens = list(range(1, 15)) + + sum_lens = sum(lens) + # key is length, value is number of examples for said length + ex_per_len = dict() + + additional_seq = 0 + for l in lens: + ex_per_len[l] = int(num_examples * (l / sum_lens)) + 1 + if ex_per_len[l] > pow(len(input_al), l): + additional_seq += ex_per_len[l] - pow(len(input_al), l) + ex_per_len[l] = 'comb' + + additional_seq = additional_seq // len([i for i in ex_per_len.values() if i != 'comb']) + + training_data = [] + for l in ex_per_len.keys(): + seqs = [] + if ex_per_len[l] == 'comb': + + seqs = list(product(input_al, repeat=l)) + for seq in seqs: + + out = automaton.reset() + nr_steps = 0 + for inp in seq: + if automaton.possible(inp) or not break_on_impossible: + nr_steps += 1 + if stack_limit and len(automaton.stack) > stack_limit: + break + if break_on_impossible and not automaton.possible(inp): + break + out = automaton.step(inp) + seq = seq[:nr_steps] + training_data.append((tuple(seq), out if not classify_states else automaton.current_state.state_id)) + + else: + for _ in range(ex_per_len[l] + additional_seq): + # seq = [random.choice(input_al) for _ in range(l)] + out = automaton.reset() + nr_steps = 0 + seq = [] + for i in range(l): + possible_inp = [inp for inp in input_al if automaton.possible(inp)] + if len(possible_inp) == 0: + inp = random.choice(input_al) + else: + if random.random() <= possible_prob: + inp = random.choice(possible_inp) + else: + inp = random.choice(input_al) + seq.append(inp) + if automaton.possible(inp) or not break_on_impossible: + nr_steps += 1 + if stack_limit and len(automaton.stack) > stack_limit: + break + if break_on_impossible and not automaton.possible(inp): + break + out = automaton.step(inp) + seq = seq[:nr_steps] + training_data.append((tuple(seq), out)) + + return training_data diff --git a/aalpy/automata/Vpa.py b/aalpy/automata/Vpa.py index e8ba6f00..d9cfd9aa 100644 --- a/aalpy/automata/Vpa.py +++ b/aalpy/automata/Vpa.py @@ -195,8 +195,8 @@ def from_state_setup(state_setup: dict, init_state_id, call_set, return_set, int # states to list states = [state for state in states.values()] - pda = Vpa(init_state, states, call_set, return_set, internal_set) - return pda + vpa = Vpa(init_state, states, call_set, return_set, internal_set) + return vpa def generate_data_from_pda(automaton, num_examples, lens=None, classify_states=False, stack_limit=None, diff --git a/aalpy/automata/__init__.py b/aalpy/automata/__init__.py index 0471a298..0f7e6799 100644 --- a/aalpy/automata/__init__.py +++ b/aalpy/automata/__init__.py @@ -7,3 +7,4 @@ from .MarkovChain import MarkovChain, McState from .Pda import Pda from .Vpa import Vpa, VpaState +from .Sevpa import Sevpa, SevpaState, SevpaAlphabet, SevpaTransition diff --git a/aalpy/learning_algs/__init__.py b/aalpy/learning_algs/__init__.py index bbc2fe4a..715df848 100644 --- a/aalpy/learning_algs/__init__.py +++ b/aalpy/learning_algs/__init__.py @@ -8,4 +8,5 @@ from .stochastic_passive.ActiveAleriga import run_active_Alergia from .deterministic_passive.RPNI import run_RPNI from .deterministic_passive.active_RPNI import run_active_RPNI -from .vpda.VpdaLStar import run_vpda_Lstar \ No newline at end of file +from .vpda.VpdaLStar import run_vpda_Lstar +from .vpda.VpdaKV import run_KV_vpda diff --git a/aalpy/learning_algs/deterministic/KV.py b/aalpy/learning_algs/deterministic/KV.py index 4014b2bd..77a211e5 100644 --- a/aalpy/learning_algs/deterministic/KV.py +++ b/aalpy/learning_algs/deterministic/KV.py @@ -2,7 +2,7 @@ from aalpy.automata import Dfa, DfaState, MealyState, MealyMachine, MooreState, MooreMachine from aalpy.base import Oracle, SUL -from aalpy.utils.HelperFunctions import print_learning_info +from aalpy.utils.HelperFunctions import print_learning_info, visualize_classification_tree from .ClassificationTree import ClassificationTree from .CounterExampleProcessing import counterexample_successfully_processed from ...base.SUL import CacheSUL @@ -117,7 +117,9 @@ def run_KV(alphabet: list, sul: SUL, eq_oracle: Oracle, automaton_type, cex_proc eq_query_time += time.time() - eq_query_start if cex is None: - break + if print_level == 3: + visualize_classification_tree(classification_tree.root) + break else: cex = tuple(cex) @@ -154,5 +156,4 @@ def run_KV(alphabet: list, sul: SUL, eq_oracle: Oracle, automaton_type, cex_proc if return_data: return hypothesis, info - return hypothesis - + return hypothesis \ No newline at end of file diff --git a/aalpy/learning_algs/vpda/VpdaClassificationTree.py b/aalpy/learning_algs/vpda/VpdaClassificationTree.py index 787977de..fff9c04d 100644 --- a/aalpy/learning_algs/vpda/VpdaClassificationTree.py +++ b/aalpy/learning_algs/vpda/VpdaClassificationTree.py @@ -1,6 +1,6 @@ from collections import defaultdict -from aalpy.automata import DfaState, Dfa, MealyState, MealyMachine, MooreState, MooreMachine +from aalpy.automata import SevpaState, SevpaAlphabet, SevpaTransition, Sevpa from aalpy.base import SUL @@ -16,11 +16,11 @@ def is_leaf(self): class CTInternalNode(CTNode): - __slots__ = ['context_pair', 'children'] + __slots__ = ['distinguishing_string', 'children'] - def __init__(self, context_pair: tuple, parent, path_to_node): + def __init__(self, distinguishing_string: tuple, parent, path_to_node): super().__init__(parent, path_to_node) - self.context_pair = context_pair + self.distinguishing_string = distinguishing_string self.children = defaultdict(None) # {True: None, False: None} def is_leaf(self): @@ -54,7 +54,7 @@ def is_leaf(self): class VpdaClassificationTree: # TODO replace all dist. strings with context pairs appropriately - def __init__(self, alphabet: list, sul: SUL, cex: tuple): + def __init__(self, alphabet: SevpaAlphabet, sul: SUL, cex: tuple): self.sul = sul self.alphabet = alphabet @@ -68,7 +68,7 @@ def __init__(self, alphabet: list, sul: SUL, cex: tuple): self.query_cache[()] = initial_output - self.root = CTInternalNode(context_pair=tuple([(), ()]), parent=None, path_to_node=None) + self.root = CTInternalNode(distinguishing_string=tuple([(), ()]), parent=None, path_to_node=None) initial_output_node = CTLeafNode(access_string=tuple(), parent=self.root, path_to_node=initial_output) cex_output_node = CTLeafNode(access_string=cex, parent=self.root, path_to_node=cex_output) @@ -79,6 +79,7 @@ def __init__(self, alphabet: list, sul: SUL, cex: tuple): self.leaf_nodes[tuple()] = initial_output_node self.leaf_nodes[cex] = cex_output_node + def _sift(self, word): """ Sifting a word into the classification tree. @@ -96,7 +97,7 @@ def _sift(self, word): the CTLeafNode that is reached by the sifting operation. """ for letter in word: - assert letter is None or letter in self.alphabet + assert letter is None or letter in self.alphabet.get_merged_alphabet() if word in self.sifting_cache: return self.sifting_cache[word] @@ -104,7 +105,7 @@ def _sift(self, word): node = self.root while not node.is_leaf(): - query = node.context_pair[0] + word + node.context_pair[1] + query = node.distinguishing_string[0] + word + node.distinguishing_string[1] if query not in self.query_cache.keys(): mq_result = self.sul.query(query) @@ -132,7 +133,65 @@ def gen_hypothesis(self): # TODO take a look at kv how it is done - return None + states = dict() + initial_state = None + state_counter = 0 + for node in self.leaf_nodes.values(): + + new_state = SevpaState(state_id=f'q{state_counter}', is_accepting=node.output) + + new_state.prefix = node.access_string + if new_state.prefix == (): + initial_state = new_state + states[new_state.prefix] = new_state + state_counter += 1 + assert initial_state is not None + + # for each state + # open + # internals = > state.acc + internal + # open + # returns + # for all call + # for all other_state + # for all return + # open -> other_state.acc + call + state.acc + + # return + # if other_state != state + # open -> state.acc + call + other_state.acc + ret + + states_for_transitions = list(states.values()) + for state in states_for_transitions: + # Check internal transitions + for internal_letter in self.alphabet.internal_alphabet: + transition_target_node = self._sift(state.prefix + (internal_letter, )) + transition_target_access_string = transition_target_node.access_string + + assert transition_target_access_string in states # TODO: trigger this + trans = SevpaTransition(start=state, target=states[transition_target_access_string], symbol=internal_letter, action=None) + state.transitions[internal_letter].append(trans) + + # Add call transitions + for call_letter in self.alphabet.call_alphabet: + trans = SevpaTransition(start=state, target=initial_state, symbol=call_letter, action='push', stack_guard=f'{state.state_id}{call_letter}') + state.transitions[call_letter].append(trans) + for other_state in states_for_transitions: + # Add return transitions + for return_letter in self.alphabet.return_alphabet: + transition_target_node = self._sift(other_state.prefix + (call_letter, ) + state.prefix + (return_letter, )) + transition_target_access_string = transition_target_node.access_string + # call_letter_node = self._sift((call_letter,)) + # call_letter_access_string = call_letter_node.access_string + stack_guard = f'{other_state.state_id}{call_letter}' + trans = SevpaTransition(start=state, target=states[transition_target_access_string], symbol=return_letter, + action='pop', stack_guard=stack_guard) + state.transitions[return_letter].append(trans) + + + + states = [state for state in states.values()] + + return Sevpa(initial_state=initial_state, states=states, input_alphabet=self.alphabet) def _least_common_ancestor(self, node_1_id, node_2_id): """ diff --git a/aalpy/learning_algs/vpda/VpdaKV.py b/aalpy/learning_algs/vpda/VpdaKV.py index bf8c2945..4b6fd35a 100644 --- a/aalpy/learning_algs/vpda/VpdaKV.py +++ b/aalpy/learning_algs/vpda/VpdaKV.py @@ -1,8 +1,8 @@ import time -from aalpy.automata import Dfa, DfaState, MealyState, MealyMachine, MooreState, MooreMachine +from aalpy.automata import Sevpa, SevpaState from aalpy.base import Oracle, SUL -from aalpy.utils.HelperFunctions import print_learning_info +from aalpy.utils.HelperFunctions import print_learning_info, visualize_classification_tree from .VpdaClassificationTree import VpdaClassificationTree from ..deterministic.CounterExampleProcessing import counterexample_successfully_processed from ...base.SUL import CacheSUL @@ -57,7 +57,7 @@ def run_KV_vpda(alphabet: list, sul: SUL, eq_oracle: Oracle, cex_processing='rs' empty_string_mq = sul.query(tuple())[-1] - initial_state = DfaState(state_id='s0', is_accepting=empty_string_mq) + initial_state = SevpaState(state_id='s0', is_accepting=empty_string_mq) initial_state.prefix = tuple() @@ -70,11 +70,13 @@ def run_KV_vpda(alphabet: list, sul: SUL, eq_oracle: Oracle, cex_processing='rs' # Either -> one state and then procedure is same like in default KV (add cex later) # Discover a new state - hypothesis = None + hypothesis = Sevpa(initial_state=initial_state, states=[], input_alphabet=alphabet) # Perform an equivalence query on this automaton eq_query_start = time.time() cex = eq_oracle.find_cex(hypothesis) + print(f'Counterexample: {cex}') + eq_query_time += time.time() - eq_query_start if cex is not None: cex = tuple(cex) @@ -83,6 +85,7 @@ def run_KV_vpda(alphabet: list, sul: SUL, eq_oracle: Oracle, cex_processing='rs' # labeled with the empty word as the distinguishing string # and two leaves labeled with access strings cex and empty word classification_tree = VpdaClassificationTree(alphabet=alphabet, sul=sul, cex=cex) + visualize_classification_tree(classification_tree.root) while True: learning_rounds += 1 @@ -90,6 +93,7 @@ def run_KV_vpda(alphabet: list, sul: SUL, eq_oracle: Oracle, cex_processing='rs' break hypothesis = classification_tree.gen_hypothesis() + return hypothesis if print_level == 2: print(f'\rHypothesis {learning_rounds}: {hypothesis.size} states.', end="") @@ -105,6 +109,8 @@ def run_KV_vpda(alphabet: list, sul: SUL, eq_oracle: Oracle, cex_processing='rs' eq_query_time += time.time() - eq_query_start if cex is None: + if print_level == 3: + visualize_classification_tree(classification_tree.root) break else: cex = tuple(cex) diff --git a/aalpy/utils/BenchmarkSULs.py b/aalpy/utils/BenchmarkSULs.py index 50f918b0..b53b0eff 100644 --- a/aalpy/utils/BenchmarkSULs.py +++ b/aalpy/utils/BenchmarkSULs.py @@ -1,15 +1,15 @@ def get_Angluin_dfa(): - from aalpy.utils import dfa_from_state_setup + from aalpy.automata import Dfa - anguin_dfa = { + angluin_dfa = { 'q0': (True, {'a': 'q1', 'b': 'q2'}), 'q1': (False, {'a': 'q0', 'b': 'q3'}), 'q2': (False, {'a': 'q3', 'b': 'q0'}), 'q3': (False, {'a': 'q2', 'b': 'q1'}) } + dfa = Dfa.from_state_setup(angluin_dfa) - return dfa_from_state_setup(anguin_dfa) - + return dfa def get_benchmark_ONFSM(): """ diff --git a/aalpy/utils/BenchmarkSevpaModels.py b/aalpy/utils/BenchmarkSevpaModels.py new file mode 100644 index 00000000..4307bbed --- /dev/null +++ b/aalpy/utils/BenchmarkSevpaModels.py @@ -0,0 +1,378 @@ +from aalpy.automata.Sevpa import Sevpa, SevpaAlphabet + + +# def sevpa_for_L1(): +# # we always ensure that n >= 1 +# +# call_set = {'a'} +# return_set = {'b'} +# internal_set = {} +# +# state_setup = { +# "q0": (False, {"a": [("q1", 'push', None)], "b": [(Sevpa.error_state.state_id, None, None)]}), +# "q1": (False, {"a": [("q1", 'push', None)], "b": [("q2", 'pop', "a")]}), +# "q2": (True, {"a": [(Sevpa.error_state.state_id, None, None)], "b": [("q2", 'pop', "a")]}), +# } +# sevpa = Sevpa.from_state_setup(state_setup, "q0", call_set, return_set, internal_set) +# return sevpa + + +# def vpa_for_L2(): +# +# call_set = {'a', 'b'} +# return_set = {'c', 'd'} +# internal_set = {} +# +# state_setup = { +# "q0": (False, {"a": [("q1", 'push', None)], "b": [("q1", 'push', None)], +# "c": [(Vpa.error_state.state_id, None, None)], +# "d": [(Vpa.error_state.state_id, None, None)]}), +# "q1": (False, {"a": [("q1", 'push', None)], "b": [("q1", 'push', None)], +# "c": [("q2", 'pop', "a"), ("q2", 'pop', "b")], +# "d": [("q2", 'pop', "a"), ("q2", 'pop', "b")]}), +# "q2": (True, {"a": [(Vpa.error_state.state_id, None, None)], +# "b": [(Vpa.error_state.state_id, None, None)], +# "c": [("q2", 'pop', "a"), ("q2", 'pop', "b")], +# "d": [("q2", 'pop', "a"), ("q2", 'pop', "b")]}), +# } +# vpa = Vpa.from_state_setup(state_setup, "q0", call_set, return_set, internal_set) +# return vpa +# +# +# def vpa_for_L3(): +# +# call_set = {'a', 'c', 'b', 'd'} +# return_set = {'e', 'g', 'f', 'h'} +# internal_set = {} +# +# state_setup = { +# "q0": (False, {"a": [("q0a", 'push', None)], +# "c": [("q0c", 'push', None)], +# }), +# "q0a": (False, {"b": [("q1", 'push', None)]}), +# "q0c": (False, {"d": [("q1", 'push', None)]}), +# "q1": (False, {"a": [("q1a", 'push', None)], +# "c": [("q1c", 'push', None)], +# "e": [("q2e", 'pop', "b"), ("q2e", 'pop', "d")], +# "g": [("q2g", 'pop', "b"), ("q2g", 'pop', "d")], # stack should actually be redundant +# }), +# "q1a": (False, {"b": [("q1", 'push', None)]}), +# "q1c": (False, {"d": [("q1", 'push', None)]}), +# "q2e": (False, {"f": [("q2", 'pop', "a"), ("q2", 'pop', "c")]}), +# "q2g": (False, {"h": [("q2", 'pop', "a"), ("q2", 'pop', "c")]}), +# "q2": (True, {"e": [("q2e", 'pop', "b"), ("q2e", 'pop', "d")], +# "g": [("q2g", 'pop', "b"), ("q2g", 'pop', "d")]}) +# } +# vpa = Vpa.from_state_setup(state_setup, "q0", call_set, return_set, internal_set) +# return vpa +# +# +# def vpa_for_L4(): +# +# call_set = {'a', 'b'} +# return_set = {'c', 'd'} +# internal_set = {} +# +# state_setup = { +# "q0": (False, {"a": [("q01", 'push', None)], "b": [(Vpa.error_state.state_id, None, None)]}), +# "q01": (False, {"b": [("q1", 'push', None)], "a": [(Vpa.error_state.state_id, None, None)]}), +# +# "q1": (False, {"a": [("q11", 'push', None)], "b": [(Vpa.error_state.state_id, None, None)], +# "c": [("q21", 'pop', "b")]}), +# "q11": (False, {"b": [("q1", 'push', None)], "a": [(Vpa.error_state.state_id, None, None)]}), +# "q21": (False, {"d": [("q2", 'pop', "a")]}), +# "q2": (True, {"c": [("q21", 'pop', "b")]}), +# } +# vpa = Vpa.from_state_setup(state_setup, "q0", call_set, return_set, internal_set) +# return vpa +# +# +# def vpa_for_L5(): +# +# call_set = {'a', 'b', 'c'} +# return_set = {'d', 'e', 'f'} +# internal_set = {} +# +# state_setup = { +# "q0": (False, {"a": [("q01", 'push', None)]}), +# "q01": (False, {"b": [("q02", 'push', None)]}), +# "q02": (False, {"c": [("q1", 'push', None)]}), +# "q1": (False, {"a": [("q11", 'push', None)], +# "d": [("q21", 'pop', "c")]}), +# "q11": (False, {"b": [("q12", 'push', None)]}), +# "q12": (False, {"c": [("q1", 'push', None)]}), +# "q21": (False, {"e": [("q22", 'pop', "b")]}), +# "q22": (False, {"f": [("q2", 'pop', "a")]}), +# "q2": (True, {"d": [("q21", 'pop', "c")]}), +# } +# vpa = Vpa.from_state_setup(state_setup, "q0", call_set, return_set, internal_set) +# return vpa +# +# +# def vpa_for_L7(): +# # Dyck order 2 +# +# call_set = {'(', '['} +# return_set = {')', ']'} +# internal_set = {} +# +# state_setup = { +# "q0": (False, {"(": [("q1", 'push', None)], +# "[": [("q1", 'push', None)], # exclude empty seq +# }), +# "q1": (True, {"(": [("q1", 'push', None)], +# "[": [("q1", 'push', None)], +# ")": [("q1", 'pop', "(")], +# "]": [("q1", 'pop', "[")] +# }), +# } +# vpa = Vpa.from_state_setup(state_setup, "q0", call_set, return_set, internal_set) +# return vpa +# +# +# def vpa_for_L8(): +# # Dyck order 3 +# +# call_set = {'(', '[', '{'} +# return_set = {')', ']', '}'} +# internal_set = {} +# +# state_setup = { +# "q0": (False, {"(": [("q1", 'push', None)], +# "[": [("q1", 'push', None)], +# "{": [("q1", 'push', None)], +# }), +# "q1": (True, {"(": [("q1", 'push', None)], +# "[": [("q1", 'push', None)], +# "{": [("q1", 'push', None)], +# ")": [("q1", 'pop', "(")], +# "]": [("q1", 'pop', "[")], +# "}": [("q1", 'pop', "{")], +# }), +# } +# vpa = Vpa.from_state_setup(state_setup, "q0", call_set, return_set, internal_set) +# return vpa +# +# +# def vpa_for_L9(): +# # Dyck order 4 +# +# call_set = {'(', '[', '{', '<'} +# return_set = {')', ']', '}', '>'} +# internal_set = {} +# +# state_setup = { +# "q0": (False, {"(": [("q1", 'push', None)], +# "[": [("q1", 'push', None)], +# "{": [("q1", 'push', None)], +# "<": [("q1", 'push', None)], +# }), +# "q1": (True, {"(": [("q1", 'push', None)], +# "[": [("q1", 'push', None)], +# "{": [("q1", 'push', None)], +# "<": [("q1", 'push', None)], +# ")": [("q1", 'pop', "(")], +# "]": [("q1", 'pop', "[")], +# "}": [("q1", 'pop', "{")], +# ">": [("q1", 'pop', "{")], +# }), +# } +# vpa = Vpa.from_state_setup(state_setup, "q0", call_set, return_set, internal_set) +# return vpa +# +# +# def vpa_for_L10(): +# # RE Dyck order 1 +# +# call_set = {'a'} +# return_set = {'v'} +# internal_set = {'b', 'c', 'd', ' e', 'w', 'x', 'y', 'z'} +# +# state_setup = { +# "q0": (False, {"a": [("qa", 'push', None)], +# }), +# "qa": (False, {"b": [("qb", None, None)], +# }), +# "qb": (False, {"c": [("qc", None, None)], +# }), +# "qc": (False, {"d": [("qd", None, None)], +# }), +# "qd": (False, {"e": [("q1", None, None)], +# }), +# "q1": (True, {"a": [("qa", 'push', None)], +# "v": [("qv", 'pop', "a")]}), +# "qv": (False, {"w": [("qw", None, None)]}), +# "qw": (False, {"x": [("qx", None, None)]}), +# "qx": (False, {"y": [("qy", None, None)]}), +# "qy": (False, {"z": [("q1", None, None)]}) +# } +# vpa = Vpa.from_state_setup(state_setup, "q0", call_set, return_set, internal_set) +# return vpa +# +# +# def vpa_for_L11(): +# # RE Dyck order 1 +# +# call_set = {'a', 'c'} +# return_set = {'d', 'f'} +# internal_set = {'b', 'e'} +# +# state_setup = { +# "q0": (False, {"a": [("qa", 'push', None)], +# "c": [("q1", 'push', None)], +# }), +# "qa": (False, {"b": [("q1", None, None)], +# }), +# "q1": (True, {"a": [("qa", 'push', None)], +# "c": [("q1", 'push', None)], +# "d": [("qd", 'pop', "a"), ("qd", 'pop', "c")], +# "f": [("q1", 'pop', "a"), ("q1", 'pop', "c")]}), +# "qd": (False, {"e": [("q1", None, None)]}) +# } +# vpa = Vpa.from_state_setup(state_setup, "q0", call_set, return_set, internal_set) +# return vpa +# +# +# def vpa_for_L12(): +# # Dyck order 2 (single-nested) +# +# call_set = {'(', '['} +# return_set = {')', ']'} +# internal_set = {} +# +# state_setup = { +# "q0": (False, {"(": [("q1", 'push', None)], +# "[": [("q1", 'push', None)], # exclude empty seq +# }), +# "q1": (False, {"(": [("q1", 'push', None)], +# "[": [("q1", 'push', None)], +# ")": [("q2", 'pop', "(")], +# "]": [("q2", 'pop', "[")]}), +# "q2": (True, { +# ")": [("q2", 'pop', "(")], +# "]": [("q2", 'pop', "[")] +# }), +# } +# vpa = Vpa.from_state_setup(state_setup, "q0", call_set, return_set, internal_set) +# return vpa +# +# +# def vpa_for_L13(): +# # Dyck order 1 +# +# call_set = {'('} +# return_set = {')'} +# internal_set = {'a', 'b', 'c'} +# +# state_setup = { +# "q0": (False, {"(": [("q1", 'push', None)], +# "a": [("q1", None, None)], +# "b": [("q1", None, None)], +# "c": [("q1", None, None)], # exclude empty seq +# }), +# "q1": (True, {"(": [("q1", 'push', None)], +# ")": [("q1", 'pop', "(")], +# "a": [("q1", None, None)], +# "b": [("q1", None, None)], +# "c": [("q1", None, None)] +# }), +# } +# vpa = Vpa.from_state_setup(state_setup, "q0", call_set, return_set, internal_set) +# return vpa +# +# +# def vpa_for_L14(): +# # Dyck order 2 +# +# call_set = {'(', '['} +# return_set = {')', ']'} +# internal_set = {'a', 'b', 'c'} +# +# state_setup = { +# "q0": (False, {"(": [("q1", 'push', None)], +# "[": [("q1", 'push', None)], +# "a": [("q1", None, None)], +# "b": [("q1", None, None)], +# "c": [("q1", None, None)], # exclude empty seq +# }), +# "q1": (True, {"(": [("q1", 'push', None)], +# "[": [("q1", 'push', None)], +# ")": [("q1", 'pop', "(")], +# "]": [("q1", 'pop', "[")], +# "a": [("q1", None, None)], +# "b": [("q1", None, None)], +# "c": [("q1", None, None)] +# }), +# } +# vpa = Vpa.from_state_setup(state_setup, "q0", call_set, return_set, internal_set) +# return vpa +# +# +# def vpa_for_L15(): +# # Dyck order 1 +# +# call_set = {'('} +# return_set = {')'} +# internal_set = {'a', 'b', 'c', 'd'} +# +# state_setup = { +# "q0": (False, {"(": [("q1", 'push', None)], +# "a": [("qa", None, None)], +# "d": [("q1", None, None)], # exclude empty seq +# }), +# "q1": (True, {"(": [("q1", 'push', None)], +# ")": [("q1", 'pop', "(")], +# "a": [("qa", None, None)], +# "d": [("q1", None, None)], +# }), +# "qa": (False, {"b": [("qb", None, None)], +# }), +# "qb": (False, {"c": [("q1", None, None)], +# }) +# } +# vpa = Vpa.from_state_setup(state_setup, "q0", call_set, return_set, internal_set) +# return vpa +# +# +# def vpa_for_L16(): +# # just a testing language +# call_set = {'a'} +# return_set = {'b'} +# internal_set = {} +# +# state_setup = { +# "q0": (False, {"a": [("q1", 'push', "$")]}), +# "q1": (False, {"a": [("q1", 'push', "x")], +# "b": [("q1", 'pop', "x"), ("q2", 'pop', "$")], +# }), +# "q2": (True, {}) +# } +# vpa = Vpa.from_state_setup(state_setup, "q0", call_set, return_set, internal_set) +# return vpa + +def sevpa_for_L12_refined(): + # Like L12 Language (Balanced parathesis) but the state setup is different + + call_set = {'(', '['} + return_set = {')', ']'} + internal_set = {'x'} + + input_alphabet = SevpaAlphabet(internal_alphabet=internal_set, call_alphabet=call_set, return_alphabet=return_set) + + state_setup = { + "q0": (False, {")": [("q1", 'pop', ("q0", "("))], + "]": [("q1", 'pop', ("q0", "["))], + "x": [("q1", None, None)] + }), + "q1": (True, {")": [("q1", 'pop', ("q0", "("))], + "]": [("q1", 'pop', ("q0", "["))], + "x": [("q0", None, None)] + }), + } + sevpa = Sevpa.from_state_setup(state_setup, "q0", input_alphabet) + return sevpa + + + + + diff --git a/aalpy/utils/BenchmarkVpaModels.py b/aalpy/utils/BenchmarkVpaModels.py index 3878b299..1b8f2291 100644 --- a/aalpy/utils/BenchmarkVpaModels.py +++ b/aalpy/utils/BenchmarkVpaModels.py @@ -1,21 +1,261 @@ from aalpy.automata.Vpa import Vpa def vpa_for_L1(): - # just a testing language + # we always ensure that n >= 1 + call_set = {'a'} return_set = {'b'} internal_set = {} state_setup = { - "q0": (False, {"a": [("q1", 'push', "$")]}), - "q1": (False, {"a": [("q1", 'push', "x")], - "b": [("q1", 'pop', "x"), ("q2", 'pop', "$")], + "q0": (False, {"a": [("q1", 'push', None)], "b": [(Vpa.error_state.state_id, None, None)]}), + "q1": (False, {"a": [("q1", 'push', None)], "b": [("q2", 'pop', "a")]}), + "q2": (True, {"a": [(Vpa.error_state.state_id, None, None)], "b": [("q2", 'pop', "a")]}), + } + vpa = Vpa.from_state_setup(state_setup, "q0", call_set, return_set, internal_set) + return vpa + + +def vpa_for_L2(): + + call_set = {'a', 'b'} + return_set = {'c', 'd'} + internal_set = {} + + state_setup = { + "q0": (False, {"a": [("q1", 'push', None)], "b": [("q1", 'push', None)], + "c": [(Vpa.error_state.state_id, None, None)], + "d": [(Vpa.error_state.state_id, None, None)]}), + "q1": (False, {"a": [("q1", 'push', None)], "b": [("q1", 'push', None)], + "c": [("q2", 'pop', "a"), ("q2", 'pop', "b")], + "d": [("q2", 'pop', "a"), ("q2", 'pop', "b")]}), + "q2": (True, {"a": [(Vpa.error_state.state_id, None, None)], + "b": [(Vpa.error_state.state_id, None, None)], + "c": [("q2", 'pop', "a"), ("q2", 'pop', "b")], + "d": [("q2", 'pop', "a"), ("q2", 'pop', "b")]}), + } + vpa = Vpa.from_state_setup(state_setup, "q0", call_set, return_set, internal_set) + return vpa + + +def vpa_for_L3(): + + call_set = {'a', 'c', 'b', 'd'} + return_set = {'e', 'g', 'f', 'h'} + internal_set = {} + + state_setup = { + "q0": (False, {"a": [("q0a", 'push', None)], + "c": [("q0c", 'push', None)], }), - "q2": (True, {}) + "q0a": (False, {"b": [("q1", 'push', None)]}), + "q0c": (False, {"d": [("q1", 'push', None)]}), + "q1": (False, {"a": [("q1a", 'push', None)], + "c": [("q1c", 'push', None)], + "e": [("q2e", 'pop', "b"), ("q2e", 'pop', "d")], + "g": [("q2g", 'pop', "b"), ("q2g", 'pop', "d")], # stack should actually be redundant + }), + "q1a": (False, {"b": [("q1", 'push', None)]}), + "q1c": (False, {"d": [("q1", 'push', None)]}), + "q2e": (False, {"f": [("q2", 'pop', "a"), ("q2", 'pop', "c")]}), + "q2g": (False, {"h": [("q2", 'pop', "a"), ("q2", 'pop', "c")]}), + "q2": (True, {"e": [("q2e", 'pop', "b"), ("q2e", 'pop', "d")], + "g": [("q2g", 'pop', "b"), ("q2g", 'pop', "d")]}) + } + vpa = Vpa.from_state_setup(state_setup, "q0", call_set, return_set, internal_set) + return vpa + + +def vpa_for_L4(): + + call_set = {'a', 'b'} + return_set = {'c', 'd'} + internal_set = {} + + state_setup = { + "q0": (False, {"a": [("q01", 'push', None)], "b": [(Vpa.error_state.state_id, None, None)]}), + "q01": (False, {"b": [("q1", 'push', None)], "a": [(Vpa.error_state.state_id, None, None)]}), + + "q1": (False, {"a": [("q11", 'push', None)], "b": [(Vpa.error_state.state_id, None, None)], + "c": [("q21", 'pop', "b")]}), + "q11": (False, {"b": [("q1", 'push', None)], "a": [(Vpa.error_state.state_id, None, None)]}), + "q21": (False, {"d": [("q2", 'pop', "a")]}), + "q2": (True, {"c": [("q21", 'pop', "b")]}), } vpa = Vpa.from_state_setup(state_setup, "q0", call_set, return_set, internal_set) return vpa + +def vpa_for_L5(): + + call_set = {'a', 'b', 'c'} + return_set = {'d', 'e', 'f'} + internal_set = {} + + state_setup = { + "q0": (False, {"a": [("q01", 'push', None)]}), + "q01": (False, {"b": [("q02", 'push', None)]}), + "q02": (False, {"c": [("q1", 'push', None)]}), + "q1": (False, {"a": [("q11", 'push', None)], + "d": [("q21", 'pop', "c")]}), + "q11": (False, {"b": [("q12", 'push', None)]}), + "q12": (False, {"c": [("q1", 'push', None)]}), + "q21": (False, {"e": [("q22", 'pop', "b")]}), + "q22": (False, {"f": [("q2", 'pop', "a")]}), + "q2": (True, {"d": [("q21", 'pop', "c")]}), + } + vpa = Vpa.from_state_setup(state_setup, "q0", call_set, return_set, internal_set) + return vpa + + +def vpa_for_L7(): + # Dyck order 2 + + call_set = {'(', '['} + return_set = {')', ']'} + internal_set = {} + + state_setup = { + "q0": (False, {"(": [("q1", 'push', None)], + "[": [("q1", 'push', None)], # exclude empty seq + }), + "q1": (True, {"(": [("q1", 'push', None)], + "[": [("q1", 'push', None)], + ")": [("q1", 'pop', "(")], + "]": [("q1", 'pop', "[")] + }), + } + vpa = Vpa.from_state_setup(state_setup, "q0", call_set, return_set, internal_set) + return vpa + + +def vpa_for_L8(): + # Dyck order 3 + + call_set = {'(', '[', '{'} + return_set = {')', ']', '}'} + internal_set = {} + + state_setup = { + "q0": (False, {"(": [("q1", 'push', None)], + "[": [("q1", 'push', None)], + "{": [("q1", 'push', None)], + }), + "q1": (True, {"(": [("q1", 'push', None)], + "[": [("q1", 'push', None)], + "{": [("q1", 'push', None)], + ")": [("q1", 'pop', "(")], + "]": [("q1", 'pop', "[")], + "}": [("q1", 'pop', "{")], + }), + } + vpa = Vpa.from_state_setup(state_setup, "q0", call_set, return_set, internal_set) + return vpa + + +def vpa_for_L9(): + # Dyck order 4 + + call_set = {'(', '[', '{', '<'} + return_set = {')', ']', '}', '>'} + internal_set = {} + + state_setup = { + "q0": (False, {"(": [("q1", 'push', None)], + "[": [("q1", 'push', None)], + "{": [("q1", 'push', None)], + "<": [("q1", 'push', None)], + }), + "q1": (True, {"(": [("q1", 'push', None)], + "[": [("q1", 'push', None)], + "{": [("q1", 'push', None)], + "<": [("q1", 'push', None)], + ")": [("q1", 'pop', "(")], + "]": [("q1", 'pop', "[")], + "}": [("q1", 'pop', "{")], + ">": [("q1", 'pop', "{")], + }), + } + vpa = Vpa.from_state_setup(state_setup, "q0", call_set, return_set, internal_set) + return vpa + + +def vpa_for_L10(): + # RE Dyck order 1 + + call_set = {'a'} + return_set = {'v'} + internal_set = {'b', 'c', 'd', ' e', 'w', 'x', 'y', 'z'} + + state_setup = { + "q0": (False, {"a": [("qa", 'push', None)], + }), + "qa": (False, {"b": [("qb", None, None)], + }), + "qb": (False, {"c": [("qc", None, None)], + }), + "qc": (False, {"d": [("qd", None, None)], + }), + "qd": (False, {"e": [("q1", None, None)], + }), + "q1": (True, {"a": [("qa", 'push', None)], + "v": [("qv", 'pop', "a")]}), + "qv": (False, {"w": [("qw", None, None)]}), + "qw": (False, {"x": [("qx", None, None)]}), + "qx": (False, {"y": [("qy", None, None)]}), + "qy": (False, {"z": [("q1", None, None)]}) + } + vpa = Vpa.from_state_setup(state_setup, "q0", call_set, return_set, internal_set) + return vpa + + +def vpa_for_L11(): + # RE Dyck order 1 + + call_set = {'a', 'c'} + return_set = {'d', 'f'} + internal_set = {'b', 'e'} + + state_setup = { + "q0": (False, {"a": [("qa", 'push', None)], + "c": [("q1", 'push', None)], + }), + "qa": (False, {"b": [("q1", None, None)], + }), + "q1": (True, {"a": [("qa", 'push', None)], + "c": [("q1", 'push', None)], + "d": [("qd", 'pop', "a"), ("qd", 'pop', "c")], + "f": [("q1", 'pop', "a"), ("q1", 'pop', "c")]}), + "qd": (False, {"e": [("q1", None, None)]}) + } + vpa = Vpa.from_state_setup(state_setup, "q0", call_set, return_set, internal_set) + return vpa + + +def vpa_for_L12(): + # Dyck order 2 (single-nested) + + call_set = {'(', '['} + return_set = {')', ']'} + internal_set = {} + + state_setup = { + "q0": (False, {"(": [("q1", 'push', None)], + "[": [("q1", 'push', None)], # exclude empty seq + }), + "q1": (False, {"(": [("q1", 'push', None)], + "[": [("q1", 'push', None)], + ")": [("q2", 'pop', "(")], + "]": [("q2", 'pop', "[")]}), + "q2": (True, { + ")": [("q2", 'pop', "(")], + "]": [("q2", 'pop', "[")] + }), + } + vpa = Vpa.from_state_setup(state_setup, "q0", call_set, return_set, internal_set) + return vpa + + def vpa_for_L13(): # Dyck order 1 @@ -39,3 +279,75 @@ def vpa_for_L13(): vpa = Vpa.from_state_setup(state_setup, "q0", call_set, return_set, internal_set) return vpa + +def vpa_for_L14(): + # Dyck order 2 + + call_set = {'(', '['} + return_set = {')', ']'} + internal_set = {'a', 'b', 'c'} + + state_setup = { + "q0": (False, {"(": [("q1", 'push', None)], + "[": [("q1", 'push', None)], + "a": [("q1", None, None)], + "b": [("q1", None, None)], + "c": [("q1", None, None)], # exclude empty seq + }), + "q1": (True, {"(": [("q1", 'push', None)], + "[": [("q1", 'push', None)], + ")": [("q1", 'pop', "(")], + "]": [("q1", 'pop', "[")], + "a": [("q1", None, None)], + "b": [("q1", None, None)], + "c": [("q1", None, None)] + }), + } + vpa = Vpa.from_state_setup(state_setup, "q0", call_set, return_set, internal_set) + return vpa + + +def vpa_for_L15(): + # Dyck order 1 + + call_set = {'('} + return_set = {')'} + internal_set = {'a', 'b', 'c', 'd'} + + state_setup = { + "q0": (False, {"(": [("q1", 'push', None)], + "a": [("qa", None, None)], + "d": [("q1", None, None)], # exclude empty seq + }), + "q1": (True, {"(": [("q1", 'push', None)], + ")": [("q1", 'pop', "(")], + "a": [("qa", None, None)], + "d": [("q1", None, None)], + }), + "qa": (False, {"b": [("qb", None, None)], + }), + "qb": (False, {"c": [("q1", None, None)], + }) + } + vpa = Vpa.from_state_setup(state_setup, "q0", call_set, return_set, internal_set) + return vpa + + +def vpa_for_L16(): + # just a testing language + call_set = {'a'} + return_set = {'b'} + internal_set = {} + + state_setup = { + "q0": (False, {"a": [("q1", 'push', "$")]}), + "q1": (False, {"a": [("q1", 'push', "x")], + "b": [("q1", 'pop', "x"), ("q2", 'pop', "$")], + }), + "q2": (True, {}) + } + vpa = Vpa.from_state_setup(state_setup, "q0", call_set, return_set, internal_set) + return vpa + + + diff --git a/aalpy/utils/FileHandler.py b/aalpy/utils/FileHandler.py index 85fcdf07..84c89eda 100644 --- a/aalpy/utils/FileHandler.py +++ b/aalpy/utils/FileHandler.py @@ -6,11 +6,11 @@ from pydot import Dot, Node, Edge, graph_from_dot_file from aalpy.automata import Dfa, MooreMachine, Mdp, Onfsm, MealyState, DfaState, MooreState, MealyMachine, \ - MdpState, StochasticMealyMachine, StochasticMealyState, OnfsmState, MarkovChain, McState, Pda, Vpa + MdpState, StochasticMealyMachine, StochasticMealyState, OnfsmState, MarkovChain, McState, Pda, Vpa, Sevpa file_types = ['dot', 'png', 'svg', 'pdf', 'string'] automaton_types = {Dfa: 'dfa', MealyMachine: 'mealy', MooreMachine: 'moore', Mdp: 'mdp', - StochasticMealyMachine: 'smm', Onfsm: 'onfsm', MarkovChain: 'mc', Pda: 'pda', Vpa: 'vpa'} + StochasticMealyMachine: 'smm', Onfsm: 'onfsm', MarkovChain: 'mc', Pda: 'pda', Vpa: 'vpa', Sevpa: 'vpa'} def _wrap_label(label): diff --git a/aalpy/utils/HelperFunctions.py b/aalpy/utils/HelperFunctions.py index ef65330e..d4aba028 100644 --- a/aalpy/utils/HelperFunctions.py +++ b/aalpy/utils/HelperFunctions.py @@ -296,3 +296,31 @@ def convert_i_o_traces_for_RPNI(sequences): rpni_sequences.add((inputs, s[i][1])) return list(rpni_sequences) + + +def visualize_classification_tree(root_node): + from pydot import Dot, Node, Edge + + graph = Dot('classification_tree', graph_type='digraph') + root_node_dot = Node(id(root_node), shape='box', + label=f'Distinguishing String:\n{root_node.distinguishing_string}') + graph.add_node(root_node_dot) + + queue = [(root_node, root_node_dot)] + + while queue: + origin_node, origin_node_dot = queue.pop(0) + + for key, child in origin_node.children.items(): + if child.is_leaf(): + destination_dot = Node(id(child), label=f'Access String:\n{child.access_string}') + else: + destination_dot = Node(id(child), shape='box', + label=f'Distinguishing String:\n{child.distinguishing_string}') + queue.append((child, destination_dot)) + graph.add_node(destination_dot) + graph.add_edge(Edge(origin_node_dot, destination_dot, label=key)) + + # print(graph.to_string()) + graph.write(path='classification_tree.pdf', format='pdf') + diff --git a/aalpy/utils/__init__.py b/aalpy/utils/__init__.py index 53cb7b67..25802f01 100644 --- a/aalpy/utils/__init__.py +++ b/aalpy/utils/__init__.py @@ -8,4 +8,4 @@ from .ModelChecking import model_check_experiment, mdp_2_prism_format, model_check_properties, get_properties_file, \ get_correct_prop_values, compare_automata, generate_test_cases, statistical_model_checking, \ bisimilar -from .HelperFunctions import make_input_complete, convert_i_o_traces_for_RPNI +from .HelperFunctions import make_input_complete, convert_i_o_traces_for_RPNI \ No newline at end of file diff --git a/pda_main_experiments.py b/pda_main_experiments.py index e6bd731c..f52b5f6f 100644 --- a/pda_main_experiments.py +++ b/pda_main_experiments.py @@ -7,6 +7,7 @@ pda = pda_for_L12() +pda.visualize() input_alphabet = pda.get_input_alphabet() sul = PdaSUL(pda, include_top=True, check_balance=True) diff --git a/sevpa_main_experiments.py b/sevpa_main_experiments.py new file mode 100644 index 00000000..98959dd6 --- /dev/null +++ b/sevpa_main_experiments.py @@ -0,0 +1,49 @@ +from aalpy.SULs.AutomataSUL import SevpaSUL, DfaSUL +from aalpy.automata.Pda import generate_data_from_pda +from aalpy.learning_algs import run_KV_vpda, run_KV +from aalpy.oracles import RandomWordEqOracle, RandomWalkEqOracle +from aalpy.utils import visualize_automaton, get_Angluin_dfa +from aalpy.utils.BenchmarkPdaModels import * +from aalpy.utils.BenchmarkVpaModels import * +from aalpy.utils.BenchmarkSevpaModels import * + +# Example for normal KV + +# dfa = get_Angluin_dfa() +# +# visualize_automaton(dfa, path="InitialModel") +# +# alphabet = dfa.get_input_alphabet() +# +# sul = DfaSUL(dfa) +# eq_oracle = RandomWalkEqOracle(alphabet, sul, 500) +# +# learned_dfa = run_KV(alphabet, sul, eq_oracle, automaton_type='dfa', cache_and_non_det_check=True, cex_processing=None, print_level=3) +# +# learned_dfa.visualize() + +######################################## + +sevpa = sevpa_for_L12_refined() + +# visualize_automaton(sevpa, path="InitialModel") + +print(sevpa.input_alphabet) +merged_input_alphabet = sevpa.input_alphabet.get_merged_alphabet() + +sul = SevpaSUL(sevpa, include_top=True, check_balance=True) +print(sul.query(('(', ')'))) +print(sul.query(('[', ')'))) +print(sul.query(('[', '(', ')', ']'))) + + +eq_oracle = RandomWordEqOracle(alphabet=merged_input_alphabet, sul=sul) +model = run_KV_vpda(alphabet=sevpa.input_alphabet, sul=sul, eq_oracle=eq_oracle, print_level=3, max_learning_rounds=1) + +model_sul = SevpaSUL(model, include_top=True, check_balance=True) +print(model_sul.query(('(', ')'))) +print(model_sul.query(('[', ')'))) +print(model_sul.query(('[', '(', ')', ']'))) + +# model.visualize() + diff --git a/vpa_eq_checks.py b/vpa_eq_checks.py new file mode 100644 index 00000000..2bbd33b2 --- /dev/null +++ b/vpa_eq_checks.py @@ -0,0 +1,60 @@ +from aalpy.SULs.AutomataSUL import VpaSUL, PdaSUL +import random + +amount_languages = 15 + +missing_languages = {6} + +pda_suls = [] +vpa_suls = [] +alphabets = [] + +for l in range(1, amount_languages+1): + if l in missing_languages: + pda_suls.append(None) + vpa_suls.append(None) + alphabets.append(None) + continue + language_pda = f'pda_for_L{l}' + language_vpa = f'vpa_for_L{l}' + + # Get PDAs + pda = globals()[language_pda]() + pda_input_alphabet = pda.get_input_alphabet() + pda_sul = PdaSUL(pda, include_top=True, check_balance=True) + pda_suls.append(pda_sul) + alphabets.append(pda_input_alphabet) + + # Get VPA + vpa = globals()[language_vpa]() + vpa_input_alphabet = vpa.get_input_alphabet() + merged_input_alphabet = vpa.get_input_alphabet_merged() + vpa_sul = VpaSUL(vpa, include_top=True, check_balance=True) + vpa_suls.append(vpa_sul) + +for l in range(0, amount_languages): + print(f'Checking Language L{l+1}') + if l in missing_languages: + continue + for i in range(0, 50000): + word_length = random.randint(5, 100) + word = [] + for j in range(0, word_length): + word.append(random.choice(alphabets[l])) + + pda_out = pda_suls[l].query(word) + vpa_out = vpa_suls[l].query(word) + + if pda_out == vpa_out: + continue + else: + print(f'Input: {word}') + print(f'Pda out: {pda_out} \nVpa out: {vpa_out}') + break + + print(f'Language L{l+1} passed') + + + + + diff --git a/vpa_main_experiments.py b/vpa_main_experiments.py index 8a3e9fa3..92bae9d4 100644 --- a/vpa_main_experiments.py +++ b/vpa_main_experiments.py @@ -8,7 +8,7 @@ vpa = vpa_for_L1() -# vpa.visualize() +vpa.visualize() input_alphabet = vpa.get_input_alphabet() merged_input_alphabet = vpa.get_input_alphabet_merged() From bc012dfb4ad1fb5f4fa4fa4d4135bb1e9bd434fa Mon Sep 17 00:00:00 2001 From: Edi Muskardin Date: Wed, 18 Oct 2023 14:41:02 +0200 Subject: [PATCH 09/62] push failing test case --- test_edi.py | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) create mode 100644 test_edi.py diff --git a/test_edi.py b/test_edi.py new file mode 100644 index 00000000..5c562d43 --- /dev/null +++ b/test_edi.py @@ -0,0 +1,33 @@ +from aalpy.SULs.AutomataSUL import SevpaSUL, VpaSUL +from aalpy.automata.Pda import generate_data_from_pda +from aalpy.learning_algs import run_KV_vpda, run_KV +from aalpy.oracles import RandomWordEqOracle, RandomWalkEqOracle +from aalpy.utils import visualize_automaton, get_Angluin_dfa +from aalpy.utils.BenchmarkPdaModels import * +from aalpy.utils.BenchmarkVpaModels import * +from aalpy.utils.BenchmarkSevpaModels import * + +# +# sevpa = sevpa_for_L12_refined() +# +# # visualize_automaton(sevpa, path="InitialModel") +# +# print(sevpa.input_alphabet) + +model_under_learning = vpa_for_L12() + +merged_input_alphabet = SevpaAlphabet(list(model_under_learning.internal_set), + list(model_under_learning.call_set), + list(model_under_learning.return_set)) + +sul = VpaSUL(model_under_learning, include_top=False, check_balance=False) + +assert sul.query(('(', ')'))[-1] == True + +eq_oracle = RandomWordEqOracle(alphabet=merged_input_alphabet.get_merged_alphabet(), sul=sul, num_walks=1000) +model = run_KV_vpda(alphabet=merged_input_alphabet, sul=sul, eq_oracle=eq_oracle, print_level=3, max_learning_rounds=3) + +model_sul = SevpaSUL(model, include_top=True, check_balance=True) +print(model_sul.query(('(', ')'))) +print(model_sul.query(('[', ')'))) +print(model_sul.query(('[', '(', ')', ']'))) From 42e9fdd481722a5700dc716f4619de1ad16d1633 Mon Sep 17 00:00:00 2001 From: Moritz Pistauer Date: Wed, 18 Oct 2023 18:40:05 +0200 Subject: [PATCH 10/62] Adaption of All VPABenchmark Languages to new Format --- aalpy/automata/Pda.py | 2 +- aalpy/automata/Vpa.py | 2 +- aalpy/utils/BenchmarkPdaModels.py | 2 +- aalpy/utils/BenchmarkVpaModels.py | 118 +++++++++++++++--------------- pda_main_experiments.py | 22 +++--- vpa_eq_checks.py | 35 +++++++-- vpa_main_experiments.py | 18 +++-- 7 files changed, 111 insertions(+), 88 deletions(-) diff --git a/aalpy/automata/Pda.py b/aalpy/automata/Pda.py index 003315d7..1fc32fc4 100644 --- a/aalpy/automata/Pda.py +++ b/aalpy/automata/Pda.py @@ -75,7 +75,7 @@ def step(self, letter): trans = [t for t in transitions if t.stack_guard is None or self.top() == t.stack_guard][0] self.current_state = trans.target if trans.action == 'push': - self.stack.append(trans.stack_guard) + self.stack.append(letter) elif trans.action == 'pop': if len(self.stack) <= 1: # empty stack elem should always be there self.current_state = Pda.error_state diff --git a/aalpy/automata/Vpa.py b/aalpy/automata/Vpa.py index d9cfd9aa..67ef48af 100644 --- a/aalpy/automata/Vpa.py +++ b/aalpy/automata/Vpa.py @@ -77,7 +77,7 @@ def possible(self, letter): elif t.symbol in self.internal_set: possible_trans.append(t) else: - assert False + assert False and print(f'Letter {letter} is not part of any alphabet') # trans = [t for t in transitions if t.stack_guard is None or self.top() == t.stack_guard] assert len(possible_trans) < 2 if len(possible_trans) == 0: diff --git a/aalpy/utils/BenchmarkPdaModels.py b/aalpy/utils/BenchmarkPdaModels.py index 8ed8bb6b..901a971e 100644 --- a/aalpy/utils/BenchmarkPdaModels.py +++ b/aalpy/utils/BenchmarkPdaModels.py @@ -134,7 +134,7 @@ def pda_for_L9(): ")": [("q1", 'pop', "(")], "]": [("q1", 'pop', "[")], "}": [("q1", 'pop', "{")], - ">": [("q1", 'pop', "{")], + ">": [("q1", 'pop', ">")], }), } pda = Pda.from_state_setup(state_setup, "q0") diff --git a/aalpy/utils/BenchmarkVpaModels.py b/aalpy/utils/BenchmarkVpaModels.py index 1b8f2291..1f08288f 100644 --- a/aalpy/utils/BenchmarkVpaModels.py +++ b/aalpy/utils/BenchmarkVpaModels.py @@ -8,8 +8,8 @@ def vpa_for_L1(): internal_set = {} state_setup = { - "q0": (False, {"a": [("q1", 'push', None)], "b": [(Vpa.error_state.state_id, None, None)]}), - "q1": (False, {"a": [("q1", 'push', None)], "b": [("q2", 'pop', "a")]}), + "q0": (False, {"a": [("q1", 'push', "a")], "b": [(Vpa.error_state.state_id, None, None)]}), + "q1": (False, {"a": [("q1", 'push', "a")], "b": [("q2", 'pop', "a")]}), "q2": (True, {"a": [(Vpa.error_state.state_id, None, None)], "b": [("q2", 'pop', "a")]}), } vpa = Vpa.from_state_setup(state_setup, "q0", call_set, return_set, internal_set) @@ -23,10 +23,10 @@ def vpa_for_L2(): internal_set = {} state_setup = { - "q0": (False, {"a": [("q1", 'push', None)], "b": [("q1", 'push', None)], + "q0": (False, {"a": [("q1", 'push', "a")], "b": [("q1", 'push', "b")], "c": [(Vpa.error_state.state_id, None, None)], "d": [(Vpa.error_state.state_id, None, None)]}), - "q1": (False, {"a": [("q1", 'push', None)], "b": [("q1", 'push', None)], + "q1": (False, {"a": [("q1", 'push', "a")], "b": [("q1", 'push', "b")], "c": [("q2", 'pop', "a"), ("q2", 'pop', "b")], "d": [("q2", 'pop', "a"), ("q2", 'pop', "b")]}), "q2": (True, {"a": [(Vpa.error_state.state_id, None, None)], @@ -45,18 +45,18 @@ def vpa_for_L3(): internal_set = {} state_setup = { - "q0": (False, {"a": [("q0a", 'push', None)], - "c": [("q0c", 'push', None)], + "q0": (False, {"a": [("q0a", 'push', "a")], + "c": [("q0c", 'push', "c")], }), - "q0a": (False, {"b": [("q1", 'push', None)]}), - "q0c": (False, {"d": [("q1", 'push', None)]}), - "q1": (False, {"a": [("q1a", 'push', None)], - "c": [("q1c", 'push', None)], + "q0a": (False, {"b": [("q1", 'push', "b")]}), + "q0c": (False, {"d": [("q1", 'push', "d")]}), + "q1": (False, {"a": [("q1a", 'push', "a")], + "c": [("q1c", 'push', "c")], "e": [("q2e", 'pop', "b"), ("q2e", 'pop', "d")], "g": [("q2g", 'pop', "b"), ("q2g", 'pop', "d")], # stack should actually be redundant }), - "q1a": (False, {"b": [("q1", 'push', None)]}), - "q1c": (False, {"d": [("q1", 'push', None)]}), + "q1a": (False, {"b": [("q1", 'push', "b")]}), + "q1c": (False, {"d": [("q1", 'push', "d")]}), "q2e": (False, {"f": [("q2", 'pop', "a"), ("q2", 'pop', "c")]}), "q2g": (False, {"h": [("q2", 'pop', "a"), ("q2", 'pop', "c")]}), "q2": (True, {"e": [("q2e", 'pop', "b"), ("q2e", 'pop', "d")], @@ -73,12 +73,12 @@ def vpa_for_L4(): internal_set = {} state_setup = { - "q0": (False, {"a": [("q01", 'push', None)], "b": [(Vpa.error_state.state_id, None, None)]}), - "q01": (False, {"b": [("q1", 'push', None)], "a": [(Vpa.error_state.state_id, None, None)]}), + "q0": (False, {"a": [("q01", 'push', "a")], "b": [(Vpa.error_state.state_id, None, None)]}), + "q01": (False, {"b": [("q1", 'push', "b")], "a": [(Vpa.error_state.state_id, None, None)]}), - "q1": (False, {"a": [("q11", 'push', None)], "b": [(Vpa.error_state.state_id, None, None)], + "q1": (False, {"a": [("q11", 'push', "a")], "b": [(Vpa.error_state.state_id, None, None)], "c": [("q21", 'pop', "b")]}), - "q11": (False, {"b": [("q1", 'push', None)], "a": [(Vpa.error_state.state_id, None, None)]}), + "q11": (False, {"b": [("q1", 'push', "b")], "a": [(Vpa.error_state.state_id, None, None)]}), "q21": (False, {"d": [("q2", 'pop', "a")]}), "q2": (True, {"c": [("q21", 'pop', "b")]}), } @@ -93,13 +93,13 @@ def vpa_for_L5(): internal_set = {} state_setup = { - "q0": (False, {"a": [("q01", 'push', None)]}), - "q01": (False, {"b": [("q02", 'push', None)]}), - "q02": (False, {"c": [("q1", 'push', None)]}), - "q1": (False, {"a": [("q11", 'push', None)], + "q0": (False, {"a": [("q01", 'push', "a")]}), + "q01": (False, {"b": [("q02", 'push', "b")]}), + "q02": (False, {"c": [("q1", 'push', "c")]}), + "q1": (False, {"a": [("q11", 'push', "a")], "d": [("q21", 'pop', "c")]}), - "q11": (False, {"b": [("q12", 'push', None)]}), - "q12": (False, {"c": [("q1", 'push', None)]}), + "q11": (False, {"b": [("q12", 'push', "b")]}), + "q12": (False, {"c": [("q1", 'push', "c")]}), "q21": (False, {"e": [("q22", 'pop', "b")]}), "q22": (False, {"f": [("q2", 'pop', "a")]}), "q2": (True, {"d": [("q21", 'pop', "c")]}), @@ -116,11 +116,11 @@ def vpa_for_L7(): internal_set = {} state_setup = { - "q0": (False, {"(": [("q1", 'push', None)], - "[": [("q1", 'push', None)], # exclude empty seq + "q0": (False, {"(": [("q1", 'push', '(')], + "[": [("q1", 'push', '[')], # exclude empty seq }), - "q1": (True, {"(": [("q1", 'push', None)], - "[": [("q1", 'push', None)], + "q1": (True, {"(": [("q1", 'push', '(')], + "[": [("q1", 'push', '[')], ")": [("q1", 'pop', "(")], "]": [("q1", 'pop', "[")] }), @@ -137,13 +137,13 @@ def vpa_for_L8(): internal_set = {} state_setup = { - "q0": (False, {"(": [("q1", 'push', None)], - "[": [("q1", 'push', None)], - "{": [("q1", 'push', None)], + "q0": (False, {"(": [("q1", 'push', '(')], + "[": [("q1", 'push', '[')], + "{": [("q1", 'push', '{')], }), - "q1": (True, {"(": [("q1", 'push', None)], - "[": [("q1", 'push', None)], - "{": [("q1", 'push', None)], + "q1": (True, {"(": [("q1", 'push', '(')], + "[": [("q1", 'push', '[')], + "{": [("q1", 'push', '{')], ")": [("q1", 'pop', "(")], "]": [("q1", 'pop', "[")], "}": [("q1", 'pop', "{")], @@ -161,19 +161,19 @@ def vpa_for_L9(): internal_set = {} state_setup = { - "q0": (False, {"(": [("q1", 'push', None)], - "[": [("q1", 'push', None)], - "{": [("q1", 'push', None)], - "<": [("q1", 'push', None)], + "q0": (False, {"(": [("q1", 'push', '(')], + "[": [("q1", 'push', '[')], + "{": [("q1", 'push', '{')], + "<": [("q1", 'push', '<')], }), - "q1": (True, {"(": [("q1", 'push', None)], - "[": [("q1", 'push', None)], - "{": [("q1", 'push', None)], - "<": [("q1", 'push', None)], + "q1": (True, {"(": [("q1", 'push', '(')], + "[": [("q1", 'push', '[')], + "{": [("q1", 'push', '{')], + "<": [("q1", 'push', '<')], ")": [("q1", 'pop', "(")], "]": [("q1", 'pop', "[")], "}": [("q1", 'pop', "{")], - ">": [("q1", 'pop', "{")], + ">": [("q1", 'pop', ">")], }), } vpa = Vpa.from_state_setup(state_setup, "q0", call_set, return_set, internal_set) @@ -188,7 +188,7 @@ def vpa_for_L10(): internal_set = {'b', 'c', 'd', ' e', 'w', 'x', 'y', 'z'} state_setup = { - "q0": (False, {"a": [("qa", 'push', None)], + "q0": (False, {"a": [("qa", 'push', "a")], }), "qa": (False, {"b": [("qb", None, None)], }), @@ -198,7 +198,7 @@ def vpa_for_L10(): }), "qd": (False, {"e": [("q1", None, None)], }), - "q1": (True, {"a": [("qa", 'push', None)], + "q1": (True, {"a": [("qa", 'push', "a")], "v": [("qv", 'pop', "a")]}), "qv": (False, {"w": [("qw", None, None)]}), "qw": (False, {"x": [("qx", None, None)]}), @@ -217,13 +217,13 @@ def vpa_for_L11(): internal_set = {'b', 'e'} state_setup = { - "q0": (False, {"a": [("qa", 'push', None)], - "c": [("q1", 'push', None)], + "q0": (False, {"a": [("qa", 'push', "a")], + "c": [("q1", 'push', "c")], }), "qa": (False, {"b": [("q1", None, None)], }), - "q1": (True, {"a": [("qa", 'push', None)], - "c": [("q1", 'push', None)], + "q1": (True, {"a": [("qa", 'push', "a")], + "c": [("q1", 'push', "c")], "d": [("qd", 'pop', "a"), ("qd", 'pop', "c")], "f": [("q1", 'pop', "a"), ("q1", 'pop', "c")]}), "qd": (False, {"e": [("q1", None, None)]}) @@ -240,11 +240,11 @@ def vpa_for_L12(): internal_set = {} state_setup = { - "q0": (False, {"(": [("q1", 'push', None)], - "[": [("q1", 'push', None)], # exclude empty seq + "q0": (False, {"(": [("q1", 'push', "(")], + "[": [("q1", 'push', "[")], # exclude empty seq }), - "q1": (False, {"(": [("q1", 'push', None)], - "[": [("q1", 'push', None)], + "q1": (False, {"(": [("q1", 'push', "(")], + "[": [("q1", 'push', "[")], ")": [("q2", 'pop', "(")], "]": [("q2", 'pop', "[")]}), "q2": (True, { @@ -264,12 +264,12 @@ def vpa_for_L13(): internal_set = {'a', 'b', 'c'} state_setup = { - "q0": (False, {"(": [("q1", 'push', None)], + "q0": (False, {"(": [("q1", 'push', "(")], "a": [("q1", None, None)], "b": [("q1", None, None)], "c": [("q1", None, None)], # exclude empty seq }), - "q1": (True, {"(": [("q1", 'push', None)], + "q1": (True, {"(": [("q1", 'push', "(")], ")": [("q1", 'pop', "(")], "a": [("q1", None, None)], "b": [("q1", None, None)], @@ -288,14 +288,14 @@ def vpa_for_L14(): internal_set = {'a', 'b', 'c'} state_setup = { - "q0": (False, {"(": [("q1", 'push', None)], - "[": [("q1", 'push', None)], + "q0": (False, {"(": [("q1", 'push', "(")], + "[": [("q1", 'push', "[")], "a": [("q1", None, None)], "b": [("q1", None, None)], "c": [("q1", None, None)], # exclude empty seq }), - "q1": (True, {"(": [("q1", 'push', None)], - "[": [("q1", 'push', None)], + "q1": (True, {"(": [("q1", 'push', "(")], + "[": [("q1", 'push', "[")], ")": [("q1", 'pop', "(")], "]": [("q1", 'pop', "[")], "a": [("q1", None, None)], @@ -315,11 +315,11 @@ def vpa_for_L15(): internal_set = {'a', 'b', 'c', 'd'} state_setup = { - "q0": (False, {"(": [("q1", 'push', None)], + "q0": (False, {"(": [("q1", 'push', "(")], "a": [("qa", None, None)], "d": [("q1", None, None)], # exclude empty seq }), - "q1": (True, {"(": [("q1", 'push', None)], + "q1": (True, {"(": [("q1", 'push', "(")], ")": [("q1", 'pop', "(")], "a": [("qa", None, None)], "d": [("q1", None, None)], diff --git a/pda_main_experiments.py b/pda_main_experiments.py index f52b5f6f..c8dbe4cd 100644 --- a/pda_main_experiments.py +++ b/pda_main_experiments.py @@ -11,15 +11,17 @@ input_alphabet = pda.get_input_alphabet() sul = PdaSUL(pda, include_top=True, check_balance=True) +print(sul.query(('(',')'))) -pda_sequances = generate_data_from_pda(pda, 10000) -accepting_seq, rejecting_seq = [x[0] for x in pda_sequances if x[1]], [x[0] for x in pda_sequances if not x[1]] -accepting_seq.sort(key=len) -print('Positive') -for i in range(10): - print(accepting_seq[i]) -exit() -eq_oracle = RandomWMethodEqOracle(alphabet=input_alphabet, sul=sul, walks_per_state=100, walk_len=10) -model = run_vpda_Lstar(alphabet=input_alphabet, sul=sul, eq_oracle=eq_oracle, automaton_type="dfa", print_level=3, - max_learning_rounds=1) \ No newline at end of file +# pda_sequances = generate_data_from_pda(pda, 10000) +# accepting_seq, rejecting_seq = [x[0] for x in pda_sequances if x[1]], [x[0] for x in pda_sequances if not x[1]] +# accepting_seq.sort(key=len) +# print('Positive') +# for i in range(10): +# print(accepting_seq[i]) +# exit() + +# eq_oracle = RandomWMethodEqOracle(alphabet=input_alphabet, sul=sul, walks_per_state=100, walk_len=10) +# model = run_vpda_Lstar(alphabet=input_alphabet, sul=sul, eq_oracle=eq_oracle, automaton_type="dfa", print_level=3, +# max_learning_rounds=1) \ No newline at end of file diff --git a/vpa_eq_checks.py b/vpa_eq_checks.py index 2bbd33b2..b032ea04 100644 --- a/vpa_eq_checks.py +++ b/vpa_eq_checks.py @@ -1,5 +1,7 @@ from aalpy.SULs.AutomataSUL import VpaSUL, PdaSUL import random +import aalpy.utils.BenchmarkPdaModels as PDAs +import aalpy.utils.BenchmarkVpaModels as VPAs amount_languages = 15 @@ -19,40 +21,57 @@ language_vpa = f'vpa_for_L{l}' # Get PDAs - pda = globals()[language_pda]() + if hasattr(PDAs, language_pda): + pda = getattr(PDAs, language_pda)() + else: + print(f"Function {language_pda} not found") + continue pda_input_alphabet = pda.get_input_alphabet() pda_sul = PdaSUL(pda, include_top=True, check_balance=True) pda_suls.append(pda_sul) alphabets.append(pda_input_alphabet) # Get VPA - vpa = globals()[language_vpa]() + if hasattr(VPAs, language_vpa): + vpa = getattr(VPAs, language_vpa)() + else: + print(f"Function {language_vpa} not found") + continue vpa_input_alphabet = vpa.get_input_alphabet() merged_input_alphabet = vpa.get_input_alphabet_merged() vpa_sul = VpaSUL(vpa, include_top=True, check_balance=True) vpa_suls.append(vpa_sul) for l in range(0, amount_languages): - print(f'Checking Language L{l+1}') - if l in missing_languages: + language_index = l+1 + print(f'Checking Language L{language_index}') + if language_index in missing_languages: + print(f'Skipping L{language_index}') continue + tests_passed = True for i in range(0, 50000): - word_length = random.randint(5, 100) + word_length = random.randint(1, 100) word = [] for j in range(0, word_length): word.append(random.choice(alphabets[l])) - pda_out = pda_suls[l].query(word) - vpa_out = vpa_suls[l].query(word) + pda_out = pda_suls[l].query(tuple(word)) + vpa_out = vpa_suls[l].query(tuple(word)) if pda_out == vpa_out: continue else: + print(f'Language L{language_index} failed on following test:') print(f'Input: {word}') print(f'Pda out: {pda_out} \nVpa out: {vpa_out}') + tests_passed = False break - print(f'Language L{l+1} passed') + if tests_passed: + print(f'Language L{language_index} passed') + else: + print(f'Language L{language_index} failed') + diff --git a/vpa_main_experiments.py b/vpa_main_experiments.py index 92bae9d4..d9090e5a 100644 --- a/vpa_main_experiments.py +++ b/vpa_main_experiments.py @@ -6,7 +6,7 @@ from aalpy.utils.BenchmarkVpaModels import * -vpa = vpa_for_L1() +vpa = vpa_for_L12() vpa.visualize() @@ -15,7 +15,9 @@ # print("Call: " + str(input_alphabet[0]) + "\nReturn: " + str(input_alphabet[1]) + "\nInternal: " + str(input_alphabet[2])) sul = VpaSUL(vpa, include_top=True, check_balance=True) -out = sul.query('a') +print(sul.query(('(',')'))) + +assert sul.query(('(', ')'))[-1][0] == True # pda_sequences = generate_data_from_pda(vpa, 10000) # accepting_seq, rejecting_seq = [x[0] for x in pda_sequences if x[1]], [x[0] for x in pda_sequences if not x[1]] @@ -23,10 +25,10 @@ # print('Positive') # for i in range(10): # print(accepting_seq[i]) - -eq_oracle = RandomWMethodEqOracle(alphabet=merged_input_alphabet, sul=sul, walks_per_state=100, walk_len=10) -model = run_vpda_Lstar(alphabet=input_alphabet, sul=sul, eq_oracle=eq_oracle, automaton_type="vpa", print_level=3, - max_learning_rounds=1) - -model.visualize() +# +# eq_oracle = RandomWMethodEqOracle(alphabet=merged_input_alphabet, sul=sul, walks_per_state=100, walk_len=10) +# model = run_vpda_Lstar(alphabet=input_alphabet, sul=sul, eq_oracle=eq_oracle, automaton_type="vpa", print_level=3, +# max_learning_rounds=1) +# +# model.visualize() From 2a91e6e56dcb799ad673ae6353d35d17920374b6 Mon Sep 17 00:00:00 2001 From: Moritz Pistauer Date: Thu, 19 Oct 2023 01:05:12 +0200 Subject: [PATCH 11/62] implementation of transform_access_sequence --- aalpy/automata/Sevpa.py | 15 +- .../vpda/VpdaClassificationTree.py | 6 +- .../vpda/VpdaCounterExampleProcessing.py | 133 ++++++++++++++++++ aalpy/learning_algs/vpda/VpdaKV.py | 4 +- aalpy/utils/BenchmarkSevpaModels.py | 24 ++++ sevpa_main_experiments.py | 4 +- 6 files changed, 177 insertions(+), 9 deletions(-) create mode 100644 aalpy/learning_algs/vpda/VpdaCounterExampleProcessing.py diff --git a/aalpy/automata/Sevpa.py b/aalpy/automata/Sevpa.py index 41963cb8..3bc64cae 100644 --- a/aalpy/automata/Sevpa.py +++ b/aalpy/automata/Sevpa.py @@ -47,7 +47,7 @@ class Sevpa(Automaton): empty = "_" error_state = SevpaState("ErrorSinkState", False) - def __init__(self, initial_state: SevpaState, states, input_alphabet: SevpaAlphabet): + def __init__(self, initial_state: SevpaState, states: list[SevpaState], input_alphabet: SevpaAlphabet): super().__init__(initial_state, states) self.initial_state = initial_state self.states = states @@ -136,6 +136,13 @@ def step(self, letter): return self.current_state.is_accepting and self.top() == self.empty + def get_state_by_id(self, state_id) -> SevpaState: + for state in self.states: + if state.state_id == state_id: + return state + else: + return None + def to_state_setup(self): state_setup_dict = {} @@ -209,6 +216,12 @@ def from_state_setup(state_setup: dict, init_state_id, input_alphabet: SevpaAlph return sevpa +def generate_random_sevpa(alphabet: SevpaAlphabet, amount_states, acceptance_prob, ): + + return None + + + def generate_data_from_pda(automaton, num_examples, lens=None, classify_states=False, stack_limit=None, break_on_impossible=False, possible_prob=0.75): import random diff --git a/aalpy/learning_algs/vpda/VpdaClassificationTree.py b/aalpy/learning_algs/vpda/VpdaClassificationTree.py index fff9c04d..ee016bf9 100644 --- a/aalpy/learning_algs/vpda/VpdaClassificationTree.py +++ b/aalpy/learning_algs/vpda/VpdaClassificationTree.py @@ -187,8 +187,6 @@ def gen_hypothesis(self): action='pop', stack_guard=stack_guard) state.transitions[return_letter].append(trans) - - states = [state for state in states.values()] return Sevpa(initial_state=initial_state, states=states, input_alphabet=self.alphabet) @@ -292,7 +290,7 @@ def update_rs(self, cex: tuple, hypothesis): hypothesis: the former (wrong) hypothesis """ - from aalpy.learning_algs.deterministic.CounterExampleProcessing import rs_cex_processing + from aalpy.learning_algs.vpda.VpdaCounterExampleProcessing import rs_cex_processing v = max(rs_cex_processing(self.sul, cex, hypothesis, suffix_closedness=True), key=len) a = cex[len(cex) - len(v) - 1] u = cex[:len(cex) - len(v) - 1] @@ -336,7 +334,7 @@ def _insert_new_leaf(self, discriminator, old_leaf_access_string, new_leaf_acces # create an internal node at the same position as the old leaf node # TODO is this context pair?? - discriminator_node = CTInternalNode(context_pair=discriminator, + discriminator_node = CTInternalNode(distinguishing_string=discriminator, parent=old_leaf.parent, path_to_node=old_leaf.path_to_node) # create the new leaf node and add it as child of the internal node diff --git a/aalpy/learning_algs/vpda/VpdaCounterExampleProcessing.py b/aalpy/learning_algs/vpda/VpdaCounterExampleProcessing.py new file mode 100644 index 00000000..7e024772 --- /dev/null +++ b/aalpy/learning_algs/vpda/VpdaCounterExampleProcessing.py @@ -0,0 +1,133 @@ +import re + +from aalpy.SULs.AutomataSUL import SevpaSUL +from aalpy.base import SUL +from aalpy.utils.HelperFunctions import all_suffixes, all_prefixes +from aalpy.automata import Sevpa, SevpaState + + +def counterexample_successfully_processed(sul, cex, hypothesis): + cex_outputs = sul.query(cex) + hyp_outputs = hypothesis.execute_sequence(hypothesis.initial_state, cex) + return cex_outputs[-1] == hyp_outputs[-1] + + +def longest_prefix_cex_processing(s_union_s_dot_a: list, cex: tuple, closedness='suffix'): + """ + Suffix processing strategy found in Shahbaz-Groz paper 'Inferring Mealy Machines'. + It splits the counterexample into prefix and suffix. The prefix is the longest element of the S union S.A that + matches the beginning of the counterexample. By removing such prefixes from counterexample, no consistency check + is needed. + + Args: + + s_union_s_dot_a: list of all prefixes found in observation table sorted from shortest to longest + cex: counterexample + closedness: either 'suffix' or 'prefix'. (Default value = 'suffix') + s_union_s_dot_a: list: + cex: tuple: counterexample + + Returns: + + suffixes to add to the E set + + """ + prefixes = s_union_s_dot_a + prefixes.reverse() + trimmed_suffix = None + + for p in prefixes: + if p == cex[:len(p)]: + trimmed_suffix = cex[len(p):] + break + + trimmed_suffix = trimmed_suffix if trimmed_suffix else cex + suffixes = all_suffixes(trimmed_suffix) if closedness == 'suffix' else all_prefixes(trimmed_suffix) + suffixes.reverse() + return suffixes + + +def rs_cex_processing(sul: SUL, cex: tuple, hypothesis: Sevpa, suffix_closedness=True, closedness='suffix'): + """Riverst-Schapire counter example processing. + + Args: + + sul: system under learning + cex: found counterexample + hypothesis: hypothesis on which counterexample was found + suffix_closedness: If true all suffixes will be added, else just one (Default value = True) + closedness: either 'suffix' or 'prefix'. (Default value = 'suffix') + sul: SUL: system under learning + cex: tuple: counterexample + + Returns: + + suffixes to be added to the E set + + """ + cex_out = sul.query(cex) + cex_input = list(cex) + + lower = 1 + upper = len(cex_input) - 2 + + while True: + hypothesis.reset_to_initial() + mid = (lower + upper) // 2 + + # arr[:n] -> first n values + # arr[n:] -> last n values + + for s_p in cex_input[:mid]: + hypothesis.step(s_p) + s_bracket = hypothesis.current_state.prefix + + d = tuple(cex_input[mid:]) + mq = sul.query(s_bracket + d) + + if mq[-1] == cex_out[-1]: # only check if the last element is the same as the cex + lower = mid + 1 + if upper < lower: + suffix = d[1:] + break + else: + upper = mid - 1 + if upper < lower: + suffix = d + break + + hyp_sul = SevpaSUL(hypothesis) + hyp_sul.query(('(',)) + word = transform_access_seq(hypothesis, hyp_sul.sevpa.stack) + print(word) + + if suffix_closedness: + suffixes = all_suffixes(suffix) if closedness == 'suffix' else all_prefixes(suffix) + suffixes.reverse() + suffix_to_query = suffixes + else: + suffix_to_query = [suffix] + return suffix_to_query + + +def transform_access_seq(hypothesis: Sevpa, stack: []) -> list[str]: + word = [] + pattern = r"(q\d+)(.*)" + + for i in range(1, len(stack)): # skip the first element because it's the start of the stack '_ + stack_elem = stack[i] + match = re.search(pattern, stack_elem) + if match: + from_state_id = match.group(1) # the corresponding state where the stack element got pushed from + call_letter = match.group(2) # the call letter that was pushed on the stack + print("From state:", from_state_id) + print("Call letter:", call_letter) + from_state = hypothesis.get_state_by_id(from_state_id) + word.append(from_state.prefix) # .prefix is the access sequence of the node in the classificationTree + word.append(call_letter) + else: + assert False and print("Stack content does not follow convention") + + word.append(hypothesis.initial_state.prefix) + return word + diff --git a/aalpy/learning_algs/vpda/VpdaKV.py b/aalpy/learning_algs/vpda/VpdaKV.py index 4b6fd35a..2d53c3f1 100644 --- a/aalpy/learning_algs/vpda/VpdaKV.py +++ b/aalpy/learning_algs/vpda/VpdaKV.py @@ -4,7 +4,7 @@ from aalpy.base import Oracle, SUL from aalpy.utils.HelperFunctions import print_learning_info, visualize_classification_tree from .VpdaClassificationTree import VpdaClassificationTree -from ..deterministic.CounterExampleProcessing import counterexample_successfully_processed +from aalpy.learning_algs.vpda.VpdaCounterExampleProcessing import counterexample_successfully_processed from ...base.SUL import CacheSUL print_options = [0, 1, 2, 3] @@ -93,7 +93,7 @@ def run_KV_vpda(alphabet: list, sul: SUL, eq_oracle: Oracle, cex_processing='rs' break hypothesis = classification_tree.gen_hypothesis() - return hypothesis + hypothesis.reset_to_initial() if print_level == 2: print(f'\rHypothesis {learning_rounds}: {hypothesis.size} states.', end="") diff --git a/aalpy/utils/BenchmarkSevpaModels.py b/aalpy/utils/BenchmarkSevpaModels.py index 4307bbed..6ddda2c0 100644 --- a/aalpy/utils/BenchmarkSevpaModels.py +++ b/aalpy/utils/BenchmarkSevpaModels.py @@ -372,6 +372,30 @@ def sevpa_for_L12_refined(): sevpa = Sevpa.from_state_setup(state_setup, "q0", input_alphabet) return sevpa +def sevpa_congruence_for_vpa_paper(): + # This is a 1-SEVPA which accepts the language L = c1L1r + c2L2r + # L1 is a regular language which has an even number of a's + # L2 is a regular language which has an even number of b's + + call_set = {'(', '['} + return_set = {')', ']'} + internal_set = {'x'} + + input_alphabet = SevpaAlphabet(internal_alphabet=internal_set, call_alphabet=call_set, return_alphabet=return_set) + + state_setup = { + "q0": (False, {")": [("q1", 'pop', ("q0", "("))], + "]": [("q1", 'pop', ("q0", "["))], + "x": [("q1", None, None)] + }), + "q1": (True, {")": [("q1", 'pop', ("q0", "("))], + "]": [("q1", 'pop', ("q0", "["))], + "x": [("q0", None, None)] + }), + } + sevpa = Sevpa.from_state_setup(state_setup, "q0", input_alphabet) + return sevpa + diff --git a/sevpa_main_experiments.py b/sevpa_main_experiments.py index 98959dd6..4fb890f1 100644 --- a/sevpa_main_experiments.py +++ b/sevpa_main_experiments.py @@ -38,12 +38,12 @@ eq_oracle = RandomWordEqOracle(alphabet=merged_input_alphabet, sul=sul) -model = run_KV_vpda(alphabet=sevpa.input_alphabet, sul=sul, eq_oracle=eq_oracle, print_level=3, max_learning_rounds=1) +model = run_KV_vpda(alphabet=sevpa.input_alphabet, sul=sul, eq_oracle=eq_oracle, print_level=3, max_learning_rounds=10) model_sul = SevpaSUL(model, include_top=True, check_balance=True) print(model_sul.query(('(', ')'))) print(model_sul.query(('[', ')'))) print(model_sul.query(('[', '(', ')', ']'))) -# model.visualize() +model.visualize() From 4009548c7afbbb5fe9a5252d81c0f95a47508f4b Mon Sep 17 00:00:00 2001 From: Edi Muskardin Date: Thu, 19 Oct 2023 09:33:58 +0200 Subject: [PATCH 12/62] seeems to work at least on 2-3 examples --- .../vpda/VpdaClassificationTree.py | 32 +++++++++++++++++-- test_edi.py | 6 ++-- 2 files changed, 31 insertions(+), 7 deletions(-) diff --git a/aalpy/learning_algs/vpda/VpdaClassificationTree.py b/aalpy/learning_algs/vpda/VpdaClassificationTree.py index ee016bf9..1fefc8b8 100644 --- a/aalpy/learning_algs/vpda/VpdaClassificationTree.py +++ b/aalpy/learning_algs/vpda/VpdaClassificationTree.py @@ -1,7 +1,9 @@ +import re from collections import defaultdict from aalpy.automata import SevpaState, SevpaAlphabet, SevpaTransition, Sevpa from aalpy.base import SUL +from aalpy.learning_algs.deterministic.CounterExampleProcessing import rs_cex_processing class CTNode: @@ -290,7 +292,7 @@ def update_rs(self, cex: tuple, hypothesis): hypothesis: the former (wrong) hypothesis """ - from aalpy.learning_algs.vpda.VpdaCounterExampleProcessing import rs_cex_processing + v = max(rs_cex_processing(self.sul, cex, hypothesis, suffix_closedness=True), key=len) a = cex[len(cex) - len(v) - 1] u = cex[:len(cex) - len(v) - 1] @@ -303,7 +305,9 @@ def update_rs(self, cex: tuple, hypothesis): new_leaf_position = not hypothesis.execute_sequence(hypothesis.initial_state, cex)[-1] - self._insert_new_leaf(discriminator=v, + discriminator = (tuple(transform_access_seq(hypothesis, hypothesis.stack)), tuple(v)) + + self._insert_new_leaf(discriminator=discriminator, old_leaf_access_string=ua_state, new_leaf_access_string=(*u_state, a), new_leaf_position=new_leaf_position) @@ -333,7 +337,7 @@ def _insert_new_leaf(self, discriminator, old_leaf_access_string, new_leaf_acces old_leaf = self.leaf_nodes[old_leaf_access_string] # create an internal node at the same position as the old leaf node - # TODO is this context pair?? + discriminator_node = CTInternalNode(distinguishing_string=discriminator, parent=old_leaf.parent, path_to_node=old_leaf.path_to_node) @@ -371,3 +375,25 @@ def _query_and_update_cache(self, word): output = self.sul.query(word)[-1] self.query_cache[word] = output return output + +def transform_access_seq(hypothesis: Sevpa, stack: []) -> list[str]: + + word = [] + pattern = r"(q\d+)(.*)" + + for i in range(1, len(stack)): # skip the first element because it's the start of the stack '_ + stack_elem = stack[i] + match = re.search(pattern, stack_elem) + if match: + from_state_id = match.group(1) # the corresponding state where the stack element got pushed from + call_letter = match.group(2) # the call letter that was pushed on the stack + print("From state:", from_state_id) + print("Call letter:", call_letter) + from_state = hypothesis.get_state_by_id(from_state_id) + word.append(from_state.prefix) # .prefix is the access sequence of the node in the classificationTree + word.append(call_letter) + else: + assert False and print("Stack content does not follow convention") + + word.append(hypothesis.initial_state.prefix) + return word diff --git a/test_edi.py b/test_edi.py index 5c562d43..41d2fb90 100644 --- a/test_edi.py +++ b/test_edi.py @@ -14,7 +14,7 @@ # # print(sevpa.input_alphabet) -model_under_learning = vpa_for_L12() +model_under_learning = vpa_for_L15() merged_input_alphabet = SevpaAlphabet(list(model_under_learning.internal_set), list(model_under_learning.call_set), @@ -22,9 +22,7 @@ sul = VpaSUL(model_under_learning, include_top=False, check_balance=False) -assert sul.query(('(', ')'))[-1] == True - -eq_oracle = RandomWordEqOracle(alphabet=merged_input_alphabet.get_merged_alphabet(), sul=sul, num_walks=1000) +eq_oracle = RandomWordEqOracle(alphabet=merged_input_alphabet.get_merged_alphabet(), sul=sul, num_walks=10000) model = run_KV_vpda(alphabet=merged_input_alphabet, sul=sul, eq_oracle=eq_oracle, print_level=3, max_learning_rounds=3) model_sul = SevpaSUL(model, include_top=True, check_balance=True) From a65cbe7f38988a0fb639a33df245f955eeb35889 Mon Sep 17 00:00:00 2001 From: Edi Muskardin Date: Thu, 19 Oct 2023 10:34:11 +0200 Subject: [PATCH 13/62] seeems to work at least on 2-3 examples --- aalpy/automata/Sevpa.py | 75 +---------------------------------------- test_edi.py | 21 ++++++------ 2 files changed, 11 insertions(+), 85 deletions(-) diff --git a/aalpy/automata/Sevpa.py b/aalpy/automata/Sevpa.py index 3bc64cae..778946e3 100644 --- a/aalpy/automata/Sevpa.py +++ b/aalpy/automata/Sevpa.py @@ -140,8 +140,7 @@ def get_state_by_id(self, state_id) -> SevpaState: for state in self.states: if state.state_id == state_id: return state - else: - return None + return None def to_state_setup(self): state_setup_dict = {} @@ -221,75 +220,3 @@ def generate_random_sevpa(alphabet: SevpaAlphabet, amount_states, acceptance_pro return None - -def generate_data_from_pda(automaton, num_examples, lens=None, classify_states=False, stack_limit=None, - break_on_impossible=False, possible_prob=0.75): - import random - from itertools import product - - input_al = automaton.get_input_alphabet() - - if lens is None: - lens = list(range(1, 15)) - - sum_lens = sum(lens) - # key is length, value is number of examples for said length - ex_per_len = dict() - - additional_seq = 0 - for l in lens: - ex_per_len[l] = int(num_examples * (l / sum_lens)) + 1 - if ex_per_len[l] > pow(len(input_al), l): - additional_seq += ex_per_len[l] - pow(len(input_al), l) - ex_per_len[l] = 'comb' - - additional_seq = additional_seq // len([i for i in ex_per_len.values() if i != 'comb']) - - training_data = [] - for l in ex_per_len.keys(): - seqs = [] - if ex_per_len[l] == 'comb': - - seqs = list(product(input_al, repeat=l)) - for seq in seqs: - - out = automaton.reset() - nr_steps = 0 - for inp in seq: - if automaton.possible(inp) or not break_on_impossible: - nr_steps += 1 - if stack_limit and len(automaton.stack) > stack_limit: - break - if break_on_impossible and not automaton.possible(inp): - break - out = automaton.step(inp) - seq = seq[:nr_steps] - training_data.append((tuple(seq), out if not classify_states else automaton.current_state.state_id)) - - else: - for _ in range(ex_per_len[l] + additional_seq): - # seq = [random.choice(input_al) for _ in range(l)] - out = automaton.reset() - nr_steps = 0 - seq = [] - for i in range(l): - possible_inp = [inp for inp in input_al if automaton.possible(inp)] - if len(possible_inp) == 0: - inp = random.choice(input_al) - else: - if random.random() <= possible_prob: - inp = random.choice(possible_inp) - else: - inp = random.choice(input_al) - seq.append(inp) - if automaton.possible(inp) or not break_on_impossible: - nr_steps += 1 - if stack_limit and len(automaton.stack) > stack_limit: - break - if break_on_impossible and not automaton.possible(inp): - break - out = automaton.step(inp) - seq = seq[:nr_steps] - training_data.append((tuple(seq), out)) - - return training_data diff --git a/test_edi.py b/test_edi.py index 41d2fb90..b47b1745 100644 --- a/test_edi.py +++ b/test_edi.py @@ -14,18 +14,17 @@ # # print(sevpa.input_alphabet) -model_under_learning = vpa_for_L15() +for i, vpa in enumerate([vpa_for_L1(), vpa_for_L2(), vpa_for_L3(), vpa_for_L4(), vpa_for_L5(), vpa_for_L7(), vpa_for_L8(), + vpa_for_L9(), vpa_for_L10(), vpa_for_L11(), vpa_for_L12(),vpa_for_L13(), vpa_for_L14(), vpa_for_L15()]): -merged_input_alphabet = SevpaAlphabet(list(model_under_learning.internal_set), - list(model_under_learning.call_set), - list(model_under_learning.return_set)) + print(f'VPA {i + 1 if i < 6 else i + 2}') + model_under_learning = vpa -sul = VpaSUL(model_under_learning, include_top=False, check_balance=False) + merged_input_alphabet = SevpaAlphabet(list(model_under_learning.internal_set), + list(model_under_learning.call_set), + list(model_under_learning.return_set)) -eq_oracle = RandomWordEqOracle(alphabet=merged_input_alphabet.get_merged_alphabet(), sul=sul, num_walks=10000) -model = run_KV_vpda(alphabet=merged_input_alphabet, sul=sul, eq_oracle=eq_oracle, print_level=3, max_learning_rounds=3) + sul = VpaSUL(model_under_learning, include_top=False, check_balance=False) -model_sul = SevpaSUL(model, include_top=True, check_balance=True) -print(model_sul.query(('(', ')'))) -print(model_sul.query(('[', ')'))) -print(model_sul.query(('[', '(', ')', ']'))) + eq_oracle = RandomWordEqOracle(alphabet=merged_input_alphabet.get_merged_alphabet(), sul=sul, num_walks=10000) + model = run_KV_vpda(alphabet=merged_input_alphabet, sul=sul, eq_oracle=eq_oracle, print_level=3, max_learning_rounds=3) From 028af10de80148a635052d9974189ca8837a80ab Mon Sep 17 00:00:00 2001 From: Edi Muskardin Date: Thu, 19 Oct 2023 14:31:07 +0200 Subject: [PATCH 14/62] updarte --- aalpy/automata/Sevpa.py | 32 +++- .../deterministic/ClassificationTree.py | 145 +++++++++++++----- aalpy/learning_algs/deterministic/KV.py | 36 +++-- aalpy/utils/BenchmarkVpaModels.py | 6 +- test_edi.py | 31 ++-- 5 files changed, 183 insertions(+), 67 deletions(-) diff --git a/aalpy/automata/Sevpa.py b/aalpy/automata/Sevpa.py index 778946e3..178d36b6 100644 --- a/aalpy/automata/Sevpa.py +++ b/aalpy/automata/Sevpa.py @@ -1,4 +1,5 @@ from collections import defaultdict +import re from aalpy.base import Automaton, AutomatonState @@ -142,6 +143,13 @@ def get_state_by_id(self, state_id) -> SevpaState: return state return None + def execute_sequence(self, origin_state, seq): + if origin_state.prefix != self.initial_state.prefix: + assert False, 'execute_sequance for Sevpa only is only supported from the initial state.' + self.reset_to_initial() + self.current_state = origin_state + return [self.step(s) for s in seq] + def to_state_setup(self): state_setup_dict = {} @@ -191,7 +199,7 @@ def from_state_setup(state_setup: dict, init_state_id, input_alphabet: SevpaAlph if action == 'pop': assert stack_guard[0] in states assert stack_guard[1] in input_alphabet.call_alphabet - stack_guard = f'{stack_guard[0]}{stack_guard[1]}' + stack_guard = (stack_guard[0], stack_guard[1]) trans = SevpaTransition(start=state, target=states[target_state_id], symbol=_input, action=action, stack_guard=stack_guard) elif action == 'push': # In SEVPA you can only define return transitions and internal transitions @@ -214,9 +222,29 @@ def from_state_setup(state_setup: dict, init_state_id, input_alphabet: SevpaAlph sevpa = Sevpa(init_state, states, input_alphabet) return sevpa + def transform_access_sequance(self, stack: []) -> list[str]: + + word = [] + + for i in range(1, len(stack)): # skip the first element because it's the start of the stack '_ + stack_elem = stack[i] + from_state_id = stack_elem[0] # the corresponding state where the stack element got pushed from + call_letter = stack_elem[1] # the call letter that was pushed on the stack + print("From state:", from_state_id) + print("Call letter:", call_letter) + from_state = self.get_state_by_id(from_state_id) + if from_state.prefix != (): + word.append(from_state.prefix) + word.append(call_letter) + + # word.append(self.initial_state.prefix) + return word + + + + def generate_random_sevpa(alphabet: SevpaAlphabet, amount_states, acceptance_prob, ): return None - diff --git a/aalpy/learning_algs/deterministic/ClassificationTree.py b/aalpy/learning_algs/deterministic/ClassificationTree.py index e5fecac5..773375ef 100644 --- a/aalpy/learning_algs/deterministic/ClassificationTree.py +++ b/aalpy/learning_algs/deterministic/ClassificationTree.py @@ -1,7 +1,10 @@ from collections import defaultdict +from typing import Union -from aalpy.automata import DfaState, Dfa, MealyState, MealyMachine, MooreState, MooreMachine +from aalpy.automata import DfaState, Dfa, MealyState, MealyMachine, MooreState, MooreMachine, SevpaAlphabet, SevpaState, \ + SevpaTransition, Sevpa from aalpy.base import SUL +from aalpy.learning_algs.deterministic.CounterExampleProcessing import rs_cex_processing automaton_class = {'dfa': Dfa, 'mealy': MealyMachine, 'moore': MooreMachine} @@ -55,7 +58,7 @@ def is_leaf(self): class ClassificationTree: - def __init__(self, alphabet: list, sul: SUL, automaton_type: str, cex: tuple): + def __init__(self, alphabet: Union[list, SevpaAlphabet], sul: SUL, automaton_type: str, cex: tuple): self.sul = sul self.alphabet = alphabet self.automaton_type = automaton_type @@ -65,13 +68,15 @@ def __init__(self, alphabet: list, sul: SUL, automaton_type: str, cex: tuple): self.sifting_cache = {} - if self.automaton_type == "dfa" or self.automaton_type == 'moore': + if self.automaton_type != 'mealy': initial_output = sul.query(())[-1] cex_output = sul.query(cex)[-1] self.query_cache[()] = initial_output - self.root = CTInternalNode(distinguishing_string=tuple(), parent=None, path_to_node=None) + root_distinguishing_string = () if automaton_type != 'vpa' else ([(), ()]) + + self.root = CTInternalNode(distinguishing_string=root_distinguishing_string, parent=None, path_to_node=None) initial_output_node = CTLeafNode(access_string=tuple(), parent=self.root, path_to_node=initial_output) cex_output_node = CTLeafNode(access_string=cex, parent=self.root, path_to_node=cex_output) @@ -114,14 +119,18 @@ def _sift(self, word): the CTLeafNode that is reached by the sifting operation. """ for letter in word: - assert letter is None or letter in self.alphabet + alphabet = self.alphabet if self.automaton_type != 'vpa' else self.alphabet.get_merged_alphabet() + assert letter is None or letter in alphabet if word in self.sifting_cache: return self.sifting_cache[word] node = self.root while not node.is_leaf(): - query = word + node.distinguishing_string + if self.automaton_type != 'vpa': + query = word + node.distinguishing_string + else: + query = node.distinguishing_string[0] + word + node.distinguishing_string[1] if query not in self.query_cache.keys(): mq_result = self.sul.query(query) @@ -155,12 +164,12 @@ def gen_hypothesis(self): state_counter = 0 for node in self.leaf_nodes.values(): - if self.automaton_type != "mealy": - # output = self._query_and_update_cache(node.access_string) - if self.automaton_type == 'dfa': - new_state = DfaState(state_id=f's{state_counter}', is_accepting=node.output) - else: - new_state = MooreState(state_id=f's{state_counter}', output=node.output) + if self.automaton_type == 'dfa': + new_state = DfaState(state_id=f's{state_counter}', is_accepting=node.output) + elif self.automaton_type == 'moore': + new_state = MooreState(state_id=f's{state_counter}', output=node.output) + elif self.automaton_type == 'vpa': + new_state = SevpaState(state_id=f'q{state_counter}', is_accepting=node.output) else: new_state = MealyState(state_id=f's{state_counter}') @@ -175,26 +184,62 @@ def gen_hypothesis(self): # alphabet, compute the b-transition out of state s by sifting s.state_id*b states_for_transitions = list(states.values()) for state in states_for_transitions: - for letter in self.alphabet: - transition_target_node = self._sift(state.prefix + (letter,)) - transition_target_access_string = transition_target_node.access_string - - if self.automaton_type != "dfa" and transition_target_access_string not in states: - if self.automaton_type == 'mealy': - new_state = MealyState(state_id=f's{state_counter}') - else: - output = self._query_and_update_cache(transition_target_access_string) - new_state = MooreState(state_id=f's{state_counter}', output=output) - - new_state.prefix = transition_target_access_string - states_for_transitions.append(new_state) - states[new_state.prefix] = new_state - state_counter += 1 - - state.transitions[letter] = states[transition_target_access_string] - - if self.automaton_type == "mealy": - state.output_fun[letter] = self._query_and_update_cache(state.prefix + (letter,)) + if self.automaton_type != 'vpa': + for letter in self.alphabet: + transition_target_node = self._sift(state.prefix + (letter,)) + transition_target_access_string = transition_target_node.access_string + + if self.automaton_type != "dfa" and transition_target_access_string not in states: + if self.automaton_type == 'mealy': + new_state = MealyState(state_id=f's{state_counter}') + else: + output = self._query_and_update_cache(transition_target_access_string) + new_state = MooreState(state_id=f's{state_counter}', output=output) + + new_state.prefix = transition_target_access_string + states_for_transitions.append(new_state) + states[new_state.prefix] = new_state + state_counter += 1 + + state.transitions[letter] = states[transition_target_access_string] + + if self.automaton_type == "mealy": + state.output_fun[letter] = self._query_and_update_cache(state.prefix + (letter,)) + else: + # internal transitions + for internal_letter in self.alphabet.internal_alphabet: + transition_target_node = self._sift(state.prefix + (internal_letter,)) + transition_target_access_string = transition_target_node.access_string + + assert transition_target_access_string in states + trans = SevpaTransition(start=state, target=states[transition_target_access_string], + symbol=internal_letter, action=None) + state.transitions[internal_letter].append(trans) + + # Add call transitions + for call_letter in self.alphabet.call_alphabet: + # TODO This should not be here, but without it it breaks the algorithm as SEVPA incorrectly + # deals with call transitions I think. Important! + trans = SevpaTransition(start=state, target=initial_state, symbol=call_letter, action='push', + stack_guard=(state.state_id, call_letter)) + state.transitions[call_letter].append(trans) + + for other_state in states_for_transitions: + # Add return transitions + for return_letter in self.alphabet.return_alphabet: + transition_target_node = self._sift( + other_state.prefix + (call_letter,) + state.prefix + (return_letter,)) + transition_target_access_string = transition_target_node.access_string + + trans = SevpaTransition(start=state, target=states[transition_target_access_string], + symbol=return_letter, + action='pop', stack_guard=(other_state.state_id, call_letter)) + + state.transitions[return_letter].append(trans) + + # TODO this should be removed, when input alphabet is removed from a constructor + if self.automaton_type == 'vpa': + return Sevpa(initial_state=initial_state, states=list(states.values()), input_alphabet=self.alphabet) return automaton_class[self.automaton_type](initial_state=initial_state, states=list(states.values())) @@ -276,7 +321,12 @@ def update(self, cex: tuple, hypothesis): hypothesis.execute_sequence(hypothesis.initial_state, cex[:j - 1] or tuple()) - self._insert_new_leaf(discriminator=(cex[j - 1], *d), + if self.automaton_type == 'vpa': + discriminator = (tuple(hypothesis.transform_access_sequance(hypothesis.stack)), (cex[j - 1], *d)) + else: + discriminator = (cex[j - 1], *d) + + self._insert_new_leaf(discriminator=discriminator, old_leaf_access_string=hypothesis.current_state.prefix, new_leaf_access_string=tuple(cex[:j - 1]) or tuple(), new_leaf_position=self.sul.query((*cex[:j - 1], *(cex[j - 1], *d)))[-1]) @@ -297,25 +347,40 @@ def update_rs(self, cex: tuple, hypothesis): hypothesis: the former (wrong) hypothesis """ - from aalpy.learning_algs.deterministic.CounterExampleProcessing import rs_cex_processing v = max(rs_cex_processing(self.sul, cex, hypothesis, suffix_closedness=True), key=len) a = cex[len(cex) - len(v) - 1] u = cex[:len(cex) - len(v) - 1] assert (*u, a, *v) == cex hypothesis.execute_sequence(hypothesis.initial_state, u) - u_state = hypothesis.current_state.prefix + u_state = hypothesis.current_state + + top_of_stack = hypothesis.stack[-1] + hypothesis.step(a) - ua_state = hypothesis.current_state.prefix + ua_state = hypothesis.current_state + + if self.automaton_type == 'vpa': + v = (tuple(hypothesis.transform_access_sequance(hypothesis.stack)), tuple(v)) + + if a in self.alphabet.internal_alphabet: + new_access_string = (*u_state.prefix, a) + else: + # TODO ????? + assert a in self.alphabet.return_alphabet + l_prime, call = hypothesis.get_state_by_id(top_of_stack[0]), top_of_stack[1] + new_access_string = l_prime.prefix + (call,) + u_state.prefix + (a,) + else: + new_access_string = (*u_state.prefix, a) - if self.automaton_type == 'dfa': + if self.automaton_type == 'dfa' or self.automaton_type == 'vpa': new_leaf_position = not hypothesis.execute_sequence(hypothesis.initial_state, cex)[-1] else: new_leaf_position = self.sul.query(cex)[-1] self._insert_new_leaf(discriminator=v, - old_leaf_access_string=ua_state, - new_leaf_access_string=(*u_state, a), + old_leaf_access_string=ua_state.prefix, + new_leaf_access_string=new_access_string, # TODO WRONG new_leaf_position=new_leaf_position) def _insert_new_leaf(self, discriminator, old_leaf_access_string, new_leaf_access_string, new_leaf_position): @@ -338,7 +403,7 @@ def _insert_new_leaf(self, discriminator, old_leaf_access_string, new_leaf_acces Returns: """ - if self.automaton_type == "dfa": + if self.automaton_type == "dfa" or self.automaton_type == 'vpa': other_leaf_position = not new_leaf_position else: # check if this query is in the node cache diff --git a/aalpy/learning_algs/deterministic/KV.py b/aalpy/learning_algs/deterministic/KV.py index 77a211e5..5dcbd5e4 100644 --- a/aalpy/learning_algs/deterministic/KV.py +++ b/aalpy/learning_algs/deterministic/KV.py @@ -1,6 +1,8 @@ import time +from typing import Union -from aalpy.automata import Dfa, DfaState, MealyState, MealyMachine, MooreState, MooreMachine +from aalpy.automata import Dfa, DfaState, MealyState, MealyMachine, MooreState, MooreMachine, \ + Sevpa, SevpaState, SevpaAlphabet from aalpy.base import Oracle, SUL from aalpy.utils.HelperFunctions import print_learning_info, visualize_classification_tree from .ClassificationTree import ClassificationTree @@ -9,10 +11,10 @@ print_options = [0, 1, 2, 3] counterexample_processing_strategy = [None, 'rs'] -automaton_class = {'dfa': Dfa, 'mealy': MealyMachine, 'moore': MooreMachine} +automaton_class = {'dfa': Dfa, 'mealy': MealyMachine, 'moore': MooreMachine, 'vpa': Sevpa} -def run_KV(alphabet: list, sul: SUL, eq_oracle: Oracle, automaton_type, cex_processing='rs', +def run_KV(alphabet: Union[list, SevpaAlphabet], sul: SUL, eq_oracle: Oracle, automaton_type, cex_processing='rs', max_learning_rounds=None, cache_and_non_det_check=True, return_data=False, print_level=2): """ Executes the KV algorithm. @@ -49,6 +51,7 @@ def run_KV(alphabet: list, sul: SUL, eq_oracle: Oracle, automaton_type, cex_proc assert print_level in print_options assert cex_processing in counterexample_processing_strategy assert automaton_type in [*automaton_class] + assert automaton_type != 'vpa' and isinstance(alphabet, list) or isinstance(alphabet, SevpaAlphabet) start_time = time.time() eq_query_time = 0 @@ -69,19 +72,30 @@ def run_KV(alphabet: list, sul: SUL, eq_oracle: Oracle, automaton_type, cex_proc # all transitions. if automaton_type == 'dfa': initial_state = DfaState(state_id='s0', is_accepting=empty_string_mq) - else: + elif automaton_type == 'moore': initial_state = MooreState(state_id='s0', output=empty_string_mq) + else: + initial_state = SevpaState(state_id='s0', is_accepting=empty_string_mq) else: initial_state = MealyState(state_id='s0') initial_state.prefix = tuple() - for a in alphabet: - initial_state.transitions[a] = initial_state - if automaton_type == 'mealy': - initial_state.output_fun[a] = sul.query((a,))[-1] - - hypothesis = automaton_class[automaton_type](initial_state, [initial_state]) + # TODO there should be static function in SEVPA class that creates a daisy hypothesis, + # where all transitions are self loops, and all return transitions are self loops with initial state being stack guard + # then we just call SEVPA.create_daisy_hypothesis(empty_string_mq) + if automaton_type != 'vpa': + for a in alphabet: + initial_state.transitions[a] = initial_state + if automaton_type == 'mealy': + initial_state.output_fun[a] = sul.query((a,))[-1] + + # TODO this is quite ugly... do we need input alphbabet in the constructur of SEVPA? + # Input alphbaet for SVEPA/VPA should not be in a constructor, but you can get it with get_input_alphabet() + if automaton_type != 'vpa': + hypothesis = automaton_class[automaton_type](initial_state, [initial_state]) + else: + hypothesis = Sevpa(initial_state, [initial_state], alphabet) # Perform an equivalence query on this automaton eq_query_start = time.time() @@ -102,6 +116,8 @@ def run_KV(alphabet: list, sul: SUL, eq_oracle: Oracle, automaton_type, cex_proc break hypothesis = classification_tree.gen_hypothesis() + # TODO this is needed for SEVPA for stack, leave for now, but ugly + # hypothesis.reset_to_initial() if print_level == 2: print(f'\rHypothesis {learning_rounds}: {hypothesis.size} states.', end="") diff --git a/aalpy/utils/BenchmarkVpaModels.py b/aalpy/utils/BenchmarkVpaModels.py index 1f08288f..06f51854 100644 --- a/aalpy/utils/BenchmarkVpaModels.py +++ b/aalpy/utils/BenchmarkVpaModels.py @@ -235,9 +235,9 @@ def vpa_for_L11(): def vpa_for_L12(): # Dyck order 2 (single-nested) - call_set = {'(', '['} - return_set = {')', ']'} - internal_set = {} + call_set = ['(', '['] + return_set = [')', ']'] + internal_set = [] state_setup = { "q0": (False, {"(": [("q1", 'push', "(")], diff --git a/test_edi.py b/test_edi.py index b47b1745..40682ade 100644 --- a/test_edi.py +++ b/test_edi.py @@ -7,24 +7,31 @@ from aalpy.utils.BenchmarkVpaModels import * from aalpy.utils.BenchmarkSevpaModels import * -# -# sevpa = sevpa_for_L12_refined() -# -# # visualize_automaton(sevpa, path="InitialModel") -# -# print(sevpa.input_alphabet) +# TODOs with priority ranking +# refactor SEVPA and VPA classes, and allign with Edi if they are nice +# When creating form state setup or whatever, all alphabets should be lists, not sets!!! important for reproducability +# # Check TODOs in KV and Classification tree file +# random generation of SEVPA as done in learnlib +# test test test + +from random import seed +seed(12) for i, vpa in enumerate([vpa_for_L1(), vpa_for_L2(), vpa_for_L3(), vpa_for_L4(), vpa_for_L5(), vpa_for_L7(), vpa_for_L8(), vpa_for_L9(), vpa_for_L10(), vpa_for_L11(), vpa_for_L12(),vpa_for_L13(), vpa_for_L14(), vpa_for_L15()]): print(f'VPA {i + 1 if i < 6 else i + 2}') - model_under_learning = vpa + model_under_learning = vpa_for_L12() - merged_input_alphabet = SevpaAlphabet(list(model_under_learning.internal_set), - list(model_under_learning.call_set), - list(model_under_learning.return_set)) + alphabet = SevpaAlphabet(list(model_under_learning.internal_set), + list(model_under_learning.call_set), + list(model_under_learning.return_set)) sul = VpaSUL(model_under_learning, include_top=False, check_balance=False) - eq_oracle = RandomWordEqOracle(alphabet=merged_input_alphabet.get_merged_alphabet(), sul=sul, num_walks=10000) - model = run_KV_vpda(alphabet=merged_input_alphabet, sul=sul, eq_oracle=eq_oracle, print_level=3, max_learning_rounds=3) + eq_oracle = RandomWordEqOracle(alphabet=alphabet.get_merged_alphabet(), sul=sul, num_walks=10000) + # model = run_KV_vpda(alphabet=alphabet, sul=sul, eq_oracle=eq_oracle, print_level=3,) + model = run_KV(alphabet=alphabet, sul=sul, eq_oracle=eq_oracle, automaton_type='vpa', + print_level=3,) + + exit() \ No newline at end of file From 451c7a41114bed6ff68c6f216d018f23e47d3c9e Mon Sep 17 00:00:00 2001 From: Edi Muskardin Date: Thu, 19 Oct 2023 15:53:43 +0200 Subject: [PATCH 15/62] updarte --- aalpy/automata/Sevpa.py | 13 ++++++------ .../deterministic/ClassificationTree.py | 12 +++++++---- .../deterministic/CounterExampleProcessing.py | 10 +++++++-- aalpy/utils/BenchmarkVpaModels.py | 18 ++++++++-------- test_edi.py | 21 +++++++++++-------- 5 files changed, 43 insertions(+), 31 deletions(-) diff --git a/aalpy/automata/Sevpa.py b/aalpy/automata/Sevpa.py index 178d36b6..15ec00f0 100644 --- a/aalpy/automata/Sevpa.py +++ b/aalpy/automata/Sevpa.py @@ -222,22 +222,21 @@ def from_state_setup(state_setup: dict, init_state_id, input_alphabet: SevpaAlph sevpa = Sevpa(init_state, states, input_alphabet) return sevpa - def transform_access_sequance(self, stack: []) -> list[str]: + def transform_access_sequance(self) -> list[str]: word = [] + calling_state = self.current_state - for i in range(1, len(stack)): # skip the first element because it's the start of the stack '_ - stack_elem = stack[i] + for i in range(1, len(self.stack)): # skip the first element because it's the start of the stack '_ + stack_elem = self.stack[i] from_state_id = stack_elem[0] # the corresponding state where the stack element got pushed from call_letter = stack_elem[1] # the call letter that was pushed on the stack - print("From state:", from_state_id) - print("Call letter:", call_letter) from_state = self.get_state_by_id(from_state_id) if from_state.prefix != (): - word.append(from_state.prefix) + word.extend(from_state.prefix) word.append(call_letter) - # word.append(self.initial_state.prefix) + word.extend(calling_state.prefix) return word diff --git a/aalpy/learning_algs/deterministic/ClassificationTree.py b/aalpy/learning_algs/deterministic/ClassificationTree.py index 773375ef..8a7782b6 100644 --- a/aalpy/learning_algs/deterministic/ClassificationTree.py +++ b/aalpy/learning_algs/deterministic/ClassificationTree.py @@ -131,7 +131,6 @@ def _sift(self, word): query = word + node.distinguishing_string else: query = node.distinguishing_string[0] + word + node.distinguishing_string[1] - if query not in self.query_cache.keys(): mq_result = self.sul.query(query) # keep track of transitions (this might miss some due to other caching, but rest can be obtained from @@ -322,7 +321,7 @@ def update(self, cex: tuple, hypothesis): hypothesis.execute_sequence(hypothesis.initial_state, cex[:j - 1] or tuple()) if self.automaton_type == 'vpa': - discriminator = (tuple(hypothesis.transform_access_sequance(hypothesis.stack)), (cex[j - 1], *d)) + discriminator = (tuple(hypothesis.transform_access_sequance()), (cex[j - 1], *d)) else: discriminator = (cex[j - 1], *d) @@ -347,7 +346,7 @@ def update_rs(self, cex: tuple, hypothesis): hypothesis: the former (wrong) hypothesis """ - v = max(rs_cex_processing(self.sul, cex, hypothesis, suffix_closedness=True), key=len) + v = max(rs_cex_processing(self.sul, cex, hypothesis, is_vpa=self.automaton_type == 'vpa'), key=len) a = cex[len(cex) - len(v) - 1] u = cex[:len(cex) - len(v) - 1] assert (*u, a, *v) == cex @@ -361,7 +360,7 @@ def update_rs(self, cex: tuple, hypothesis): ua_state = hypothesis.current_state if self.automaton_type == 'vpa': - v = (tuple(hypothesis.transform_access_sequance(hypothesis.stack)), tuple(v)) + v = (tuple(hypothesis.transform_access_sequance()), tuple(v)) if a in self.alphabet.internal_alphabet: new_access_string = (*u_state.prefix, a) @@ -370,6 +369,7 @@ def update_rs(self, cex: tuple, hypothesis): assert a in self.alphabet.return_alphabet l_prime, call = hypothesis.get_state_by_id(top_of_stack[0]), top_of_stack[1] new_access_string = l_prime.prefix + (call,) + u_state.prefix + (a,) + print('Ret acc', new_access_string) else: new_access_string = (*u_state.prefix, a) @@ -432,6 +432,10 @@ def _insert_new_leaf(self, discriminator, old_leaf_access_string, new_leaf_acces discriminator_node.children[new_leaf_position] = new_leaf discriminator_node.children[other_leaf_position] = old_leaf + # from aalpy.utils.HelperFunctions import visualize_classification_tree + # visualize_classification_tree(self.root) + # input('inp') + # sifting cache update sifting_cache_outdated = [] if old_leaf in self.sifting_cache.values(): diff --git a/aalpy/learning_algs/deterministic/CounterExampleProcessing.py b/aalpy/learning_algs/deterministic/CounterExampleProcessing.py index 4a63c3e1..18691a44 100644 --- a/aalpy/learning_algs/deterministic/CounterExampleProcessing.py +++ b/aalpy/learning_algs/deterministic/CounterExampleProcessing.py @@ -43,7 +43,8 @@ def longest_prefix_cex_processing(s_union_s_dot_a: list, cex: tuple, closedness= return suffixes -def rs_cex_processing(sul: SUL, cex: tuple, hypothesis, suffix_closedness=True, closedness='suffix'): +def rs_cex_processing(sul: SUL, cex: tuple, hypothesis, suffix_closedness=True, closedness='suffix', + is_vpa=False): """Riverst-Schapire counter example processing. Args: @@ -55,6 +56,7 @@ def rs_cex_processing(sul: SUL, cex: tuple, hypothesis, suffix_closedness=True, closedness: either 'suffix' or 'prefix'. (Default value = 'suffix') sul: SUL: system under learning cex: tuple: counterexample + is_vpa: system under learning behaves as a context free language Returns: @@ -76,7 +78,11 @@ def rs_cex_processing(sul: SUL, cex: tuple, hypothesis, suffix_closedness=True, for s_p in cex_input[:mid]: hypothesis.step(s_p) - s_bracket = hypothesis.current_state.prefix + + if not is_vpa: + s_bracket = hypothesis.current_state.prefix + else: + s_bracket = tuple(hypothesis.transform_access_sequance()) d = tuple(cex_input[mid:]) mq = sul.query(s_bracket + d) diff --git a/aalpy/utils/BenchmarkVpaModels.py b/aalpy/utils/BenchmarkVpaModels.py index 06f51854..31d9ec45 100644 --- a/aalpy/utils/BenchmarkVpaModels.py +++ b/aalpy/utils/BenchmarkVpaModels.py @@ -212,9 +212,9 @@ def vpa_for_L10(): def vpa_for_L11(): # RE Dyck order 1 - call_set = {'a', 'c'} - return_set = {'d', 'f'} - internal_set = {'b', 'e'} + call_set = ['a', 'c'] + return_set = ['d', 'f'] + internal_set = ['b', 'e'] state_setup = { "q0": (False, {"a": [("qa", 'push', "a")], @@ -283,9 +283,9 @@ def vpa_for_L13(): def vpa_for_L14(): # Dyck order 2 - call_set = {'(', '['} - return_set = {')', ']'} - internal_set = {'a', 'b', 'c'} + call_set = ['(', '['] + return_set = [')', ']'] + internal_set = ['a', 'b', 'c'] state_setup = { "q0": (False, {"(": [("q1", 'push', "(")], @@ -310,9 +310,9 @@ def vpa_for_L14(): def vpa_for_L15(): # Dyck order 1 - call_set = {'('} - return_set = {')'} - internal_set = {'a', 'b', 'c', 'd'} + call_set = ['('] + return_set = [')'] + internal_set = ['a', 'b', 'c', 'd'] state_setup = { "q0": (False, {"(": [("q1", 'push', "(")], diff --git a/test_edi.py b/test_edi.py index 40682ade..c2d5542a 100644 --- a/test_edi.py +++ b/test_edi.py @@ -21,17 +21,20 @@ vpa_for_L9(), vpa_for_L10(), vpa_for_L11(), vpa_for_L12(),vpa_for_L13(), vpa_for_L14(), vpa_for_L15()]): print(f'VPA {i + 1 if i < 6 else i + 2}') - model_under_learning = vpa_for_L12() + for i in range(1000): + seed(i) + print(i) + model_under_learning = vpa_for_L15() - alphabet = SevpaAlphabet(list(model_under_learning.internal_set), - list(model_under_learning.call_set), - list(model_under_learning.return_set)) + alphabet = SevpaAlphabet(list(model_under_learning.internal_set), + list(model_under_learning.call_set), + list(model_under_learning.return_set)) - sul = VpaSUL(model_under_learning, include_top=False, check_balance=False) + sul = VpaSUL(model_under_learning, include_top=False, check_balance=False) - eq_oracle = RandomWordEqOracle(alphabet=alphabet.get_merged_alphabet(), sul=sul, num_walks=10000) - # model = run_KV_vpda(alphabet=alphabet, sul=sul, eq_oracle=eq_oracle, print_level=3,) - model = run_KV(alphabet=alphabet, sul=sul, eq_oracle=eq_oracle, automaton_type='vpa', - print_level=3,) + eq_oracle = RandomWordEqOracle(alphabet=alphabet.get_merged_alphabet(), sul=sul, num_walks=1000) + # model = run_KV_vpda(alphabet=alphabet, sul=sul, eq_oracle=eq_oracle, print_level=3,) + model = run_KV(alphabet=alphabet, sul=sul, eq_oracle=eq_oracle, automaton_type='vpa', + print_level=3, cex_processing='rs') exit() \ No newline at end of file From 6ad3881a2a41e6d02caa94c9c238af61e301dc23 Mon Sep 17 00:00:00 2001 From: Edi Muskardin Date: Thu, 19 Oct 2023 16:45:03 +0200 Subject: [PATCH 16/62] internal transitions wrong - or cex processing --- aalpy/automata/Sevpa.py | 13 +++++++++++++ .../deterministic/ClassificationTree.py | 8 ++++++-- test_edi.py | 2 +- 3 files changed, 20 insertions(+), 3 deletions(-) diff --git a/aalpy/automata/Sevpa.py b/aalpy/automata/Sevpa.py index 15ec00f0..17a2952e 100644 --- a/aalpy/automata/Sevpa.py +++ b/aalpy/automata/Sevpa.py @@ -239,6 +239,19 @@ def transform_access_sequance(self) -> list[str]: word.extend(calling_state.prefix) return word + # TODO move + def is_balanced(self, x): + call_counter = 0 + for i in x: + if i in self.input_alphabet.call_alphabet: + call_counter += 1 + if i in self.input_alphabet.return_alphabet: + call_counter -= 1 + if call_counter < 0: + return False + + return call_counter == 0 + diff --git a/aalpy/learning_algs/deterministic/ClassificationTree.py b/aalpy/learning_algs/deterministic/ClassificationTree.py index 8a7782b6..172e71cb 100644 --- a/aalpy/learning_algs/deterministic/ClassificationTree.py +++ b/aalpy/learning_algs/deterministic/ClassificationTree.py @@ -361,9 +361,13 @@ def update_rs(self, cex: tuple, hypothesis): if self.automaton_type == 'vpa': v = (tuple(hypothesis.transform_access_sequance()), tuple(v)) + assert hypothesis.is_balanced(v) if a in self.alphabet.internal_alphabet: - new_access_string = (*u_state.prefix, a) + new_access_string = (*u_state.prefix, a,) + print('Int acc', new_access_string) + if new_access_string in self.leaf_nodes.keys(): + assert False else: # TODO ????? assert a in self.alphabet.return_alphabet @@ -434,9 +438,9 @@ def _insert_new_leaf(self, discriminator, old_leaf_access_string, new_leaf_acces # from aalpy.utils.HelperFunctions import visualize_classification_tree # visualize_classification_tree(self.root) - # input('inp') # sifting cache update + sifting_cache_outdated = [] if old_leaf in self.sifting_cache.values(): for prefix, node in self.sifting_cache.items(): diff --git a/test_edi.py b/test_edi.py index c2d5542a..70ee2dba 100644 --- a/test_edi.py +++ b/test_edi.py @@ -32,7 +32,7 @@ sul = VpaSUL(model_under_learning, include_top=False, check_balance=False) - eq_oracle = RandomWordEqOracle(alphabet=alphabet.get_merged_alphabet(), sul=sul, num_walks=1000) + eq_oracle = RandomWordEqOracle(alphabet=alphabet.get_merged_alphabet(), sul=sul, num_walks=5000) # model = run_KV_vpda(alphabet=alphabet, sul=sul, eq_oracle=eq_oracle, print_level=3,) model = run_KV(alphabet=alphabet, sul=sul, eq_oracle=eq_oracle, automaton_type='vpa', print_level=3, cex_processing='rs') From e588b6d680f7816205f467659b07314de9bb0880 Mon Sep 17 00:00:00 2001 From: Edi Muskardin Date: Thu, 19 Oct 2023 19:41:05 +0200 Subject: [PATCH 17/62] works for all but L11 it seems --- aalpy/automata/Sevpa.py | 35 ++++++++++++++----- .../deterministic/ClassificationTree.py | 23 ++++++------ .../deterministic/CounterExampleProcessing.py | 3 +- aalpy/learning_algs/deterministic/KV.py | 6 +--- aalpy/utils/BenchmarkVpaModels.py | 22 ++++++------ test_edi.py | 7 ++-- 6 files changed, 56 insertions(+), 40 deletions(-) diff --git a/aalpy/automata/Sevpa.py b/aalpy/automata/Sevpa.py index 15ec00f0..a2a7ba85 100644 --- a/aalpy/automata/Sevpa.py +++ b/aalpy/automata/Sevpa.py @@ -81,6 +81,9 @@ def possible(self, letter): """ if self.current_state == Sevpa.error_state: return True + # push is always possible + if letter in self.input_alphabet.call_alphabet: + return True if letter is not None: transitions = self.current_state.transitions[letter] possible_trans = [] @@ -122,13 +125,16 @@ def step(self, letter): else: assert False + if letter in self.input_alphabet.call_alphabet: + assert(letter in self.input_alphabet.call_alphabet) # push letters must be in call set + self.stack.append((self.current_state.state_id, letter)) + return self.current_state.is_accepting and self.top() == self.empty + assert len(possible_trans) < 2 trans = possible_trans[0] self.current_state = trans.target - if trans.action == 'push': - assert(letter in self.input_alphabet.call_alphabet) # push letters must be in call set - self.stack.append(trans.stack_guard) - elif trans.action == 'pop': + + if trans.action == 'pop': assert(letter in self.input_alphabet.return_alphabet) # pop letters must be in return set if len(self.stack) <= 1: # empty stack elem should always be there self.current_state = Sevpa.error_state @@ -222,23 +228,34 @@ def from_state_setup(state_setup: dict, init_state_id, input_alphabet: SevpaAlph sevpa = Sevpa(init_state, states, input_alphabet) return sevpa - def transform_access_sequance(self) -> list[str]: + def transform_access_sequance(self, state=None, stack_content=None) -> list[str]: word = [] - calling_state = self.current_state + calling_state = self.initial_state if not state else state + stack = self.stack if not stack_content else stack_content - for i in range(1, len(self.stack)): # skip the first element because it's the start of the stack '_ - stack_elem = self.stack[i] + for i in range(1, len(stack)): # skip the first element because it's the start of the stack '_ + stack_elem = stack[i] from_state_id = stack_elem[0] # the corresponding state where the stack element got pushed from call_letter = stack_elem[1] # the call letter that was pushed on the stack from_state = self.get_state_by_id(from_state_id) if from_state.prefix != (): word.extend(from_state.prefix) word.append(call_letter) - word.extend(calling_state.prefix) + print('TRANS', word) return word + def is_balanced(self, x): + counter = 0 + for i in x: + if i in self.input_alphabet.call_alphabet: + counter += 1 + if i in self.input_alphabet.return_alphabet: + counter -= 1 + if counter < 0: + return False + return counter == 0 diff --git a/aalpy/learning_algs/deterministic/ClassificationTree.py b/aalpy/learning_algs/deterministic/ClassificationTree.py index 8a7782b6..010680ad 100644 --- a/aalpy/learning_algs/deterministic/ClassificationTree.py +++ b/aalpy/learning_algs/deterministic/ClassificationTree.py @@ -5,6 +5,7 @@ SevpaTransition, Sevpa from aalpy.base import SUL from aalpy.learning_algs.deterministic.CounterExampleProcessing import rs_cex_processing +from aalpy.utils.HelperFunctions import visualize_classification_tree automaton_class = {'dfa': Dfa, 'mealy': MealyMachine, 'moore': MooreMachine} @@ -217,11 +218,11 @@ def gen_hypothesis(self): # Add call transitions for call_letter in self.alphabet.call_alphabet: - # TODO This should not be here, but without it it breaks the algorithm as SEVPA incorrectly - # deals with call transitions I think. Important! - trans = SevpaTransition(start=state, target=initial_state, symbol=call_letter, action='push', - stack_guard=(state.state_id, call_letter)) - state.transitions[call_letter].append(trans) + # # TODO This should not be here, but without it it breaks the algorithm as SEVPA incorrectly + # # deals with call transitions I think. Important! + # trans = SevpaTransition(start=state, target=initial_state, symbol=call_letter, action='push', + # stack_guard=(state.state_id, call_letter)) + # state.transitions[call_letter].append(trans) for other_state in states_for_transitions: # Add return transitions @@ -309,6 +310,7 @@ def update(self, cex: tuple, hypothesis): s_i = self._sift(cex[:i]).access_string hypothesis.execute_sequence(hypothesis.initial_state, cex[:i]) s_star_i = hypothesis.current_state.prefix + if s_i != s_star_i: j = i d = self._least_common_ancestor(s_i, s_star_i) @@ -360,17 +362,18 @@ def update_rs(self, cex: tuple, hypothesis): ua_state = hypothesis.current_state if self.automaton_type == 'vpa': - v = (tuple(hypothesis.transform_access_sequance()), tuple(v)) + discriminator = (tuple(hypothesis.transform_access_sequance()), tuple(v)) if a in self.alphabet.internal_alphabet: new_access_string = (*u_state.prefix, a) else: - # TODO ????? + if a not in self.alphabet.return_alphabet: + i = 123312321 assert a in self.alphabet.return_alphabet l_prime, call = hypothesis.get_state_by_id(top_of_stack[0]), top_of_stack[1] new_access_string = l_prime.prefix + (call,) + u_state.prefix + (a,) - print('Ret acc', new_access_string) else: + discriminator = v new_access_string = (*u_state.prefix, a) if self.automaton_type == 'dfa' or self.automaton_type == 'vpa': @@ -378,9 +381,9 @@ def update_rs(self, cex: tuple, hypothesis): else: new_leaf_position = self.sul.query(cex)[-1] - self._insert_new_leaf(discriminator=v, + self._insert_new_leaf(discriminator=discriminator, old_leaf_access_string=ua_state.prefix, - new_leaf_access_string=new_access_string, # TODO WRONG + new_leaf_access_string=new_access_string, new_leaf_position=new_leaf_position) def _insert_new_leaf(self, discriminator, old_leaf_access_string, new_leaf_access_string, new_leaf_position): diff --git a/aalpy/learning_algs/deterministic/CounterExampleProcessing.py b/aalpy/learning_algs/deterministic/CounterExampleProcessing.py index 18691a44..853d4f82 100644 --- a/aalpy/learning_algs/deterministic/CounterExampleProcessing.py +++ b/aalpy/learning_algs/deterministic/CounterExampleProcessing.py @@ -82,7 +82,8 @@ def rs_cex_processing(sul: SUL, cex: tuple, hypothesis, suffix_closedness=True, if not is_vpa: s_bracket = hypothesis.current_state.prefix else: - s_bracket = tuple(hypothesis.transform_access_sequance()) + print('cex', cex_input[:mid]) + s_bracket = tuple(hypothesis.transform_access_sequance(hypothesis.current_state)) d = tuple(cex_input[mid:]) mq = sul.query(s_bracket + d) diff --git a/aalpy/learning_algs/deterministic/KV.py b/aalpy/learning_algs/deterministic/KV.py index 5dcbd5e4..690deafc 100644 --- a/aalpy/learning_algs/deterministic/KV.py +++ b/aalpy/learning_algs/deterministic/KV.py @@ -116,8 +116,6 @@ def run_KV(alphabet: Union[list, SevpaAlphabet], sul: SUL, eq_oracle: Oracle, au break hypothesis = classification_tree.gen_hypothesis() - # TODO this is needed for SEVPA for stack, leave for now, but ugly - # hypothesis.reset_to_initial() if print_level == 2: print(f'\rHypothesis {learning_rounds}: {hypothesis.size} states.', end="") @@ -133,9 +131,7 @@ def run_KV(alphabet: Union[list, SevpaAlphabet], sul: SUL, eq_oracle: Oracle, au eq_query_time += time.time() - eq_query_start if cex is None: - if print_level == 3: - visualize_classification_tree(classification_tree.root) - break + break else: cex = tuple(cex) diff --git a/aalpy/utils/BenchmarkVpaModels.py b/aalpy/utils/BenchmarkVpaModels.py index 31d9ec45..02289094 100644 --- a/aalpy/utils/BenchmarkVpaModels.py +++ b/aalpy/utils/BenchmarkVpaModels.py @@ -212,21 +212,21 @@ def vpa_for_L10(): def vpa_for_L11(): # RE Dyck order 1 - call_set = ['a', 'c'] - return_set = ['d', 'f'] - internal_set = ['b', 'e'] + call_set = ['c1', 'c2'] + return_set = ['r1', 'r2'] + internal_set = ['i1', 'i2'] state_setup = { - "q0": (False, {"a": [("qa", 'push', "a")], - "c": [("q1", 'push', "c")], + "q0": (False, {"c1": [("qa", 'push', "c1")], + "c2": [("q1", 'push', "c2")], }), - "qa": (False, {"b": [("q1", None, None)], + "qa": (False, {"i1": [("q1", None, None)], }), - "q1": (True, {"a": [("qa", 'push', "a")], - "c": [("q1", 'push', "c")], - "d": [("qd", 'pop', "a"), ("qd", 'pop', "c")], - "f": [("q1", 'pop', "a"), ("q1", 'pop', "c")]}), - "qd": (False, {"e": [("q1", None, None)]}) + "q1": (True, {"c1": [("qa", 'push', "c1")], + "c2": [("q1", 'push', "c2")], + "r1": [("qd", 'pop', "c1"), ("qd", 'pop', "c2")], + "r2": [("q1", 'pop', "c1"), ("q1", 'pop', "c2")]}), + "qd": (False, {"i2": [("q1", None, None)]}) } vpa = Vpa.from_state_setup(state_setup, "q0", call_set, return_set, internal_set) return vpa diff --git a/test_edi.py b/test_edi.py index c2d5542a..c44732fc 100644 --- a/test_edi.py +++ b/test_edi.py @@ -21,10 +21,10 @@ vpa_for_L9(), vpa_for_L10(), vpa_for_L11(), vpa_for_L12(),vpa_for_L13(), vpa_for_L14(), vpa_for_L15()]): print(f'VPA {i + 1 if i < 6 else i + 2}') - for i in range(1000): - seed(i) + for i in range(10): + seed(4) print(i) - model_under_learning = vpa_for_L15() + model_under_learning = vpa_for_L11() alphabet = SevpaAlphabet(list(model_under_learning.internal_set), list(model_under_learning.call_set), @@ -37,4 +37,3 @@ model = run_KV(alphabet=alphabet, sul=sul, eq_oracle=eq_oracle, automaton_type='vpa', print_level=3, cex_processing='rs') - exit() \ No newline at end of file From b32d5d0f8ffcdad2fe155d481376aa8e06940c08 Mon Sep 17 00:00:00 2001 From: Edi Muskardin Date: Thu, 19 Oct 2023 20:23:38 +0200 Subject: [PATCH 18/62] works for all but L11 it seems --- aalpy/automata/Sevpa.py | 8 +++++--- aalpy/learning_algs/deterministic/ClassificationTree.py | 3 ++- .../deterministic/CounterExampleProcessing.py | 4 +--- 3 files changed, 8 insertions(+), 7 deletions(-) diff --git a/aalpy/automata/Sevpa.py b/aalpy/automata/Sevpa.py index a2a7ba85..e7870b85 100644 --- a/aalpy/automata/Sevpa.py +++ b/aalpy/automata/Sevpa.py @@ -234,8 +234,10 @@ def transform_access_sequance(self, state=None, stack_content=None) -> list[str] calling_state = self.initial_state if not state else state stack = self.stack if not stack_content else stack_content - for i in range(1, len(stack)): # skip the first element because it's the start of the stack '_ - stack_elem = stack[i] + for index, stack_elem in enumerate(stack): + # skip the first element because it's the start of the stack '_ + if index == 0: + continue from_state_id = stack_elem[0] # the corresponding state where the stack element got pushed from call_letter = stack_elem[1] # the call letter that was pushed on the stack from_state = self.get_state_by_id(from_state_id) @@ -243,7 +245,7 @@ def transform_access_sequance(self, state=None, stack_content=None) -> list[str] word.extend(from_state.prefix) word.append(call_letter) word.extend(calling_state.prefix) - print('TRANS', word) + print('TRANS', word, calling_state.prefix) return word def is_balanced(self, x): diff --git a/aalpy/learning_algs/deterministic/ClassificationTree.py b/aalpy/learning_algs/deterministic/ClassificationTree.py index 010680ad..ab183849 100644 --- a/aalpy/learning_algs/deterministic/ClassificationTree.py +++ b/aalpy/learning_algs/deterministic/ClassificationTree.py @@ -368,7 +368,8 @@ def update_rs(self, cex: tuple, hypothesis): new_access_string = (*u_state.prefix, a) else: if a not in self.alphabet.return_alphabet: - i = 123312321 + v = max(rs_cex_processing(self.sul, cex, hypothesis, is_vpa=self.automaton_type == 'vpa'), key=len) + exit() assert a in self.alphabet.return_alphabet l_prime, call = hypothesis.get_state_by_id(top_of_stack[0]), top_of_stack[1] new_access_string = l_prime.prefix + (call,) + u_state.prefix + (a,) diff --git a/aalpy/learning_algs/deterministic/CounterExampleProcessing.py b/aalpy/learning_algs/deterministic/CounterExampleProcessing.py index 853d4f82..16903a25 100644 --- a/aalpy/learning_algs/deterministic/CounterExampleProcessing.py +++ b/aalpy/learning_algs/deterministic/CounterExampleProcessing.py @@ -43,8 +43,7 @@ def longest_prefix_cex_processing(s_union_s_dot_a: list, cex: tuple, closedness= return suffixes -def rs_cex_processing(sul: SUL, cex: tuple, hypothesis, suffix_closedness=True, closedness='suffix', - is_vpa=False): +def rs_cex_processing(sul: SUL, cex: tuple, hypothesis, suffix_closedness=True, closedness='suffix', is_vpa=False): """Riverst-Schapire counter example processing. Args: @@ -82,7 +81,6 @@ def rs_cex_processing(sul: SUL, cex: tuple, hypothesis, suffix_closedness=True, if not is_vpa: s_bracket = hypothesis.current_state.prefix else: - print('cex', cex_input[:mid]) s_bracket = tuple(hypothesis.transform_access_sequance(hypothesis.current_state)) d = tuple(cex_input[mid:]) From 13619e600a168478d9fa6f13bde32717e303771e Mon Sep 17 00:00:00 2001 From: Moritz Pistauer Date: Thu, 19 Oct 2023 22:41:11 +0200 Subject: [PATCH 19/62] generate random sevpa --- aalpy/automata/Sevpa.py | 95 ++++++++++++++++++++++++++++++++++------- 1 file changed, 79 insertions(+), 16 deletions(-) diff --git a/aalpy/automata/Sevpa.py b/aalpy/automata/Sevpa.py index 15ec00f0..f555853b 100644 --- a/aalpy/automata/Sevpa.py +++ b/aalpy/automata/Sevpa.py @@ -1,11 +1,10 @@ +import random from collections import defaultdict -import re - from aalpy.base import Automaton, AutomatonState class SevpaAlphabet: - def __init__(self, internal_alphabet, call_alphabet, return_alphabet): + def __init__(self, internal_alphabet: list, call_alphabet: list, return_alphabet: list): self.internal_alphabet = internal_alphabet self.call_alphabet = call_alphabet self.return_alphabet = return_alphabet @@ -28,7 +27,7 @@ class SevpaState(AutomatonState): def __init__(self, state_id, is_accepting=False): super().__init__(state_id) - self.transitions = defaultdict(list) + self.transitions = defaultdict(list[SevpaTransition]) self.is_accepting = is_accepting @@ -126,10 +125,10 @@ def step(self, letter): trans = possible_trans[0] self.current_state = trans.target if trans.action == 'push': - assert(letter in self.input_alphabet.call_alphabet) # push letters must be in call set + assert (letter in self.input_alphabet.call_alphabet) # push letters must be in call set self.stack.append(trans.stack_guard) elif trans.action == 'pop': - assert(letter in self.input_alphabet.return_alphabet) # pop letters must be in return set + assert (letter in self.input_alphabet.return_alphabet) # pop letters must be in return set if len(self.stack) <= 1: # empty stack elem should always be there self.current_state = Sevpa.error_state return False @@ -212,7 +211,8 @@ def from_state_setup(state_setup: dict, init_state_id, input_alphabet: SevpaAlph # add call transitions for call_letter in input_alphabet.call_alphabet: - trans = SevpaTransition(start=state, target=states[init_state_id], symbol=call_letter, action='push', stack_guard=f'{state_id}{call_letter}') + trans = SevpaTransition(start=state, target=states[init_state_id], symbol=call_letter, action='push', + stack_guard=f'{state_id}{call_letter}') state.transitions[call_letter].append(trans) init_state = states[init_state_id] @@ -223,11 +223,10 @@ def from_state_setup(state_setup: dict, init_state_id, input_alphabet: SevpaAlph return sevpa def transform_access_sequance(self) -> list[str]: - word = [] calling_state = self.current_state - for i in range(1, len(self.stack)): # skip the first element because it's the start of the stack '_ + for i in range(1, len(self.stack)): # skip the first element because it's the start of the stack '_' stack_elem = self.stack[i] from_state_id = stack_elem[0] # the corresponding state where the stack element got pushed from call_letter = stack_elem[1] # the call letter that was pushed on the stack @@ -240,10 +239,74 @@ def transform_access_sequance(self) -> list[str]: return word - - - -def generate_random_sevpa(alphabet: SevpaAlphabet, amount_states, acceptance_prob, ): - - return None - +def has_transition(state: SevpaState, transition_letter, stack_guard) -> bool: + transitions = state.transitions[transition_letter] + if transitions is not None: + if stack_guard is None: # internal transition + for transition in transitions: + if transition.symbol == transition_letter: + return True + else: # return transition + for transition in transitions: + if transition.stack_guard == stack_guard and transition.symbol == transition_letter: + return True + + return False + + +def generate_random_sevpa(alphabet: SevpaAlphabet, amount_states, acceptance_prob, return_transition_prob): + # TODO: for some reason the alphabet attributes get + # treated as sets which are don't have accessible elements via index + internal_alphabet = list(alphabet.internal_alphabet) + return_alphabet = list(alphabet.return_alphabet) + call_alphabet = list(alphabet.call_alphabet) + + state_list = [SevpaState('q0', random.uniform(0.0, 1.0) < acceptance_prob)] + for i in range(1, amount_states): # add a return transition + if internal_alphabet == 0 or random.uniform(0.0, 1.0) < return_transition_prob: + while True: + from_state = state_list[random.randint(0, len(state_list)-1)] + return_letter = return_alphabet[random.randint(0, len(return_alphabet)-1)] + stack_state = state_list[random.randint(0, len(state_list)-1)] + call_letter = call_alphabet[random.randint(0, len(call_alphabet)-1)] + stack_guard = f'{stack_state}{call_letter}' + if not has_transition(from_state, return_letter, stack_guard): + break + target_state = SevpaState(f'q{i}', random.uniform(0.0, 1.0) < acceptance_prob) + state_list.append(target_state) + from_state.transitions[return_letter].append(SevpaTransition(from_state, target_state, return_letter, 'pop', stack_guard)) + else: # add an internal transition + while True: + from_state = state_list[random.randint(0, len(state_list)-1)] + internal_letter = internal_alphabet[random.randint(0, len(internal_alphabet)-1)] + if not has_transition(from_state, internal_letter, None): + break + target_state = SevpaState(f'q{i}', random.uniform(0.0, 1.0) < acceptance_prob) + state_list.append(target_state) + from_state.transitions[internal_letter].append(SevpaTransition(from_state, target_state, internal_letter, None, None)) + + assert len(state_list) == amount_states + initial_state_id = random.randint(0, amount_states) + initial_state = state_list[initial_state_id] + + for state in state_list: + for internal_letter in internal_alphabet: + if state.transitions[internal_letter] is None: + target_state = state_list[random.randint(0, len(state_list)-1)] + state.transitions[internal_letter].append(SevpaTransition(state, target_state, internal_letter, None, None)) + + for call_letter in call_alphabet: + for stack_state in state_list: + stack_guard = f'{stack_state.state_id}{call_letter}' + for return_letter in return_alphabet: + if not has_transition(state, return_letter, stack_guard): + target_state = state_list[random.randint(0, len(state_list)-1)] + state.transitions[return_letter].append(SevpaTransition(state, target_state, return_letter, 'pop', stack_guard)) + + # add call transitions + for call_letter in call_alphabet: + trans = SevpaTransition(start=state, target=initial_state, symbol=call_letter, action='push', + stack_guard=f'{state.state_id}{call_letter}') + state.transitions[call_letter].append(trans) + + return Sevpa(initial_state, state_list, alphabet) From e2f1204a2d9e904fdf5ab287b71f26a20a00ed55 Mon Sep 17 00:00:00 2001 From: Edi Muskardin Date: Fri, 20 Oct 2023 11:51:45 +0200 Subject: [PATCH 20/62] works, but creation of initial hypothesis is wrong is initial state is accepting --- Examples.py | 16 ++ aalpy/automata/Sevpa.py | 2 +- .../deterministic/ClassificationTree.py | 39 +--- aalpy/utils/BenchmarkSULs.py | 206 ++++-------------- test_edi.py | 22 +- 5 files changed, 88 insertions(+), 197 deletions(-) diff --git a/Examples.py b/Examples.py index a88387ab..66629697 100644 --- a/Examples.py +++ b/Examples.py @@ -938,3 +938,19 @@ def compare_stochastic_and_non_deterministic_learning(example='first_grid'): print(model_type) print('Error for each property:', [round(d * 100, 2) for d in diff.values()]) + + +def learning_context_free_grammar_example(): + from aalpy.automata import SevpaAlphabet + from aalpy.learning_algs import run_KV + from aalpy.oracles import RandomWordEqOracle + from aalpy.utils.BenchmarkSULs import get_balanced_string_sul + + call_return_map = {'(': ')', '[': ']'} + balanced_string_sul = get_balanced_string_sul(call_return_map) + + sevpa_alphabet = SevpaAlphabet([], list(call_return_map.keys()), list(call_return_map.values())) + eq_oracle = RandomWordEqOracle(sevpa_alphabet.get_merged_alphabet(), balanced_string_sul, num_walks=1000) + + learned_model = run_KV(sevpa_alphabet, balanced_string_sul, eq_oracle, automaton_type='vpa') + learned_model.visualize() diff --git a/aalpy/automata/Sevpa.py b/aalpy/automata/Sevpa.py index c9d591eb..00b123a7 100644 --- a/aalpy/automata/Sevpa.py +++ b/aalpy/automata/Sevpa.py @@ -127,6 +127,7 @@ def step(self, letter): if letter in self.input_alphabet.call_alphabet: assert(letter in self.input_alphabet.call_alphabet) # push letters must be in call set self.stack.append((self.current_state.state_id, letter)) + self.current_state = self.initial_state return self.current_state.is_accepting and self.top() == self.empty assert len(possible_trans) < 2 @@ -245,7 +246,6 @@ def transform_access_sequance(self, state=None, stack_content=None) -> list[str] word.extend(from_state.prefix) word.append(call_letter) word.extend(calling_state.prefix) - print('TRANS', word, calling_state.prefix) return word def is_balanced(self, x): diff --git a/aalpy/learning_algs/deterministic/ClassificationTree.py b/aalpy/learning_algs/deterministic/ClassificationTree.py index ab183849..8d1863ae 100644 --- a/aalpy/learning_algs/deterministic/ClassificationTree.py +++ b/aalpy/learning_algs/deterministic/ClassificationTree.py @@ -5,7 +5,6 @@ SevpaTransition, Sevpa from aalpy.base import SUL from aalpy.learning_algs.deterministic.CounterExampleProcessing import rs_cex_processing -from aalpy.utils.HelperFunctions import visualize_classification_tree automaton_class = {'dfa': Dfa, 'mealy': MealyMachine, 'moore': MooreMachine} @@ -128,10 +127,12 @@ def _sift(self, word): node = self.root while not node.is_leaf(): + if self.automaton_type != 'vpa': query = word + node.distinguishing_string else: query = node.distinguishing_string[0] + word + node.distinguishing_string[1] + if query not in self.query_cache.keys(): mq_result = self.sul.query(query) # keep track of transitions (this might miss some due to other caching, but rest can be obtained from @@ -218,12 +219,6 @@ def gen_hypothesis(self): # Add call transitions for call_letter in self.alphabet.call_alphabet: - # # TODO This should not be here, but without it it breaks the algorithm as SEVPA incorrectly - # # deals with call transitions I think. Important! - # trans = SevpaTransition(start=state, target=initial_state, symbol=call_letter, action='push', - # stack_guard=(state.state_id, call_letter)) - # state.transitions[call_letter].append(trans) - for other_state in states_for_transitions: # Add return transitions for return_letter in self.alphabet.return_alphabet: @@ -234,10 +229,8 @@ def gen_hypothesis(self): trans = SevpaTransition(start=state, target=states[transition_target_access_string], symbol=return_letter, action='pop', stack_guard=(other_state.state_id, call_letter)) - state.transitions[return_letter].append(trans) - # TODO this should be removed, when input alphabet is removed from a constructor if self.automaton_type == 'vpa': return Sevpa(initial_state=initial_state, states=list(states.values()), input_alphabet=self.alphabet) @@ -320,14 +313,7 @@ def update(self, cex: tuple, hypothesis): d = [] assert j is not None and d is not None - hypothesis.execute_sequence(hypothesis.initial_state, cex[:j - 1] or tuple()) - - if self.automaton_type == 'vpa': - discriminator = (tuple(hypothesis.transform_access_sequance()), (cex[j - 1], *d)) - else: - discriminator = (cex[j - 1], *d) - - self._insert_new_leaf(discriminator=discriminator, + self._insert_new_leaf(discriminator=(cex[j - 1], *d), old_leaf_access_string=hypothesis.current_state.prefix, new_leaf_access_string=tuple(cex[:j - 1]) or tuple(), new_leaf_position=self.sul.query((*cex[:j - 1], *(cex[j - 1], *d)))[-1]) @@ -356,26 +342,25 @@ def update_rs(self, cex: tuple, hypothesis): hypothesis.execute_sequence(hypothesis.initial_state, u) u_state = hypothesis.current_state - top_of_stack = hypothesis.stack[-1] + top_of_stack = hypothesis.stack[-1] if self.automaton_type == 'vpa' else None + # get state reached after executing last action => old leaf hypothesis.step(a) ua_state = hypothesis.current_state + # get discriminator and new_leaf_access_string if self.automaton_type == 'vpa': discriminator = (tuple(hypothesis.transform_access_sequance()), tuple(v)) if a in self.alphabet.internal_alphabet: - new_access_string = (*u_state.prefix, a) + new_leaf_access_string = (*u_state.prefix, a) else: - if a not in self.alphabet.return_alphabet: - v = max(rs_cex_processing(self.sul, cex, hypothesis, is_vpa=self.automaton_type == 'vpa'), key=len) - exit() assert a in self.alphabet.return_alphabet l_prime, call = hypothesis.get_state_by_id(top_of_stack[0]), top_of_stack[1] - new_access_string = l_prime.prefix + (call,) + u_state.prefix + (a,) + new_leaf_access_string = l_prime.prefix + (call,) + u_state.prefix + (a,) else: discriminator = v - new_access_string = (*u_state.prefix, a) + new_leaf_access_string = (*u_state.prefix, a) if self.automaton_type == 'dfa' or self.automaton_type == 'vpa': new_leaf_position = not hypothesis.execute_sequence(hypothesis.initial_state, cex)[-1] @@ -384,7 +369,7 @@ def update_rs(self, cex: tuple, hypothesis): self._insert_new_leaf(discriminator=discriminator, old_leaf_access_string=ua_state.prefix, - new_leaf_access_string=new_access_string, + new_leaf_access_string=new_leaf_access_string, new_leaf_position=new_leaf_position) def _insert_new_leaf(self, discriminator, old_leaf_access_string, new_leaf_access_string, new_leaf_position): @@ -436,10 +421,6 @@ def _insert_new_leaf(self, discriminator, old_leaf_access_string, new_leaf_acces discriminator_node.children[new_leaf_position] = new_leaf discriminator_node.children[other_leaf_position] = old_leaf - # from aalpy.utils.HelperFunctions import visualize_classification_tree - # visualize_classification_tree(self.root) - # input('inp') - # sifting cache update sifting_cache_outdated = [] if old_leaf in self.sifting_cache.values(): diff --git a/aalpy/utils/BenchmarkSULs.py b/aalpy/utils/BenchmarkSULs.py index b53b0eff..4af23a8b 100644 --- a/aalpy/utils/BenchmarkSULs.py +++ b/aalpy/utils/BenchmarkSULs.py @@ -11,6 +11,7 @@ def get_Angluin_dfa(): return dfa + def get_benchmark_ONFSM(): """ Returns ONFSM presented in 'Learning Finite State Models of Observable Nondeterministic Systems in a Testing @@ -386,164 +387,47 @@ def get_small_pomdp(): return Mdp(q0, [q0, q1, q2, q3, q4]) -# -# class CarAlarmSystem: -# -# def __init__(self, max_wait_time=700): -# self.timer = 0 -# self.max_wait_time = max_wait_time -# self.bonnet_open = False -# self.trunk_open = False -# self.doors_opened = [False, False, False, False] -# # Is alarm active -# self.alarm_active = False -# # Is alarm triggered -# self.alarm_triggered = False -# # Is car locked -# self.is_locked = False -# # Time at which car was locked and alarm activated -# self.locked_time = None -# self.alarm_activation_time = None -# -# def __get_alarm_status(self): -# if self.is_locked and self.alarm_active: -# self.alarm_triggered = True -# if not self.alarm_triggered: -# return None -# alarm_note = '' -# if abs(self.timer - self.alarm_activation_time) <= 30: -# alarm_note += '_ALARM_SOUND' -# if abs(self.timer - self.alarm_activation_time) <= 500: -# alarm_note += '_ALARM_LIGHTS' -# return alarm_note -# -# def open_bonnet(self): -# if self.bonnet_open: -# return 'Bonnet already opened' -# self.bonnet_open = True -# alarm_note = self.__get_alarm_status() -# return_msg = 'Bonnet opened' -# if alarm_note: -# return_msg += alarm_note -# return return_msg -# -# def close_bonnet(self): -# if not self.bonnet_open: -# return 'Bonnet already closed' -# self.bonnet_open = False -# alarm_note = self.__get_alarm_status() -# return_msg = 'Bonnet closed' -# if alarm_note: -# return_msg += alarm_note -# return return_msg -# -# def open_trunk(self): -# if self.trunk_open: -# return 'Trunk already opened' -# self.trunk_open = True -# alarm_note = self.__get_alarm_status() -# return_msg = 'Trunk opened' -# if alarm_note: -# return_msg += alarm_note -# return return_msg -# -# def close_trunk(self): -# if not self.trunk_open: -# return 'Trunk already closed' -# self.trunk_open = False -# alarm_note = self.__get_alarm_status() -# return_msg = 'Trunk closed' -# if alarm_note: -# return_msg += alarm_note -# return return_msg -# -# def open_door(self, door_id): -# door_id = door_id - 1 -# if self.doors_opened[door_id]: -# return 'Door already opened' -# self.doors_opened[door_id] = True -# alarm_note = self.__get_alarm_status() -# return_msg = 'Doors opened' -# if alarm_note: -# return_msg += alarm_note -# return return_msg -# -# def close_door(self, door_id): -# door_id = door_id - 1 -# if not self.doors_opened[door_id]: -# return 'Door already closed' -# self.doors_opened[door_id] = False -# alarm_note = self.__get_alarm_status() -# return_msg = 'Doors closed' -# if alarm_note: -# return_msg += alarm_note -# return return_msg -# -# def lock_vehicle(self): -# if self.is_locked: -# return 'Locked' -# if not self.bonnet_open and not self.trunk_open and len(list(set(self.doors_opened))) == 1 and self.doors_opened[0] is False: -# self.is_locked = True -# self.locked_time = self.timer -# return 'Locked' -# return 'Cannot lock' -# -# def unlock_vehicle(self): -# self.is_locked = False -# self.alarm_active = False -# self.locked_time = None -# return 'Unlocked' -# -# def wait(self, time_to_wait): -# self.timer += time_to_wait -# self.timer = min(self.timer, self.max_wait_time) -# if self.locked_time and abs(self.timer - self.locked_time) >= 20: -# self.alarm_active = True -# self.alarm_activation_time = self.timer -# if self.alarm_active: -# return 'Alarm Activated' -# return 'Waiting' -# -# def reset(self): -# self.timer = 0 -# self.bonnet_open = False -# self.doors_opened = [False, False, False, False] -# # Is alarm active -# self.alarm_active = False -# # Is alarm triggered -# self.alarm_triggered = False -# # Is car locked -# self.is_locked = False -# # Time at which car was locked and alarm activated -# self.locked_time = None -# self.alarm_activation_time = None -# -# -# if __name__ == '__main__': -# from aalpy.learning_algs import run_Lstar -# from aalpy.oracles import RandomWMethodEqOracle -# from aalpy.SULs import FunctionDecorator, PyClassSUL -# # class under learning (do not instantiate it) -# car_alarm_class = CarAlarmSystem -# -# # methods weapped in the function decorators -# input_al = [FunctionDecorator(car_alarm_class.open_trunk), FunctionDecorator(car_alarm_class.close_trunk), -# FunctionDecorator(car_alarm_class.open_bonnet), FunctionDecorator(car_alarm_class.close_bonnet), -# FunctionDecorator(car_alarm_class.lock_vehicle), FunctionDecorator(car_alarm_class.unlock_vehicle), -# -# FunctionDecorator(car_alarm_class.open_door, 1), FunctionDecorator(car_alarm_class.close_door, 1), -# FunctionDecorator(car_alarm_class.open_door, 2), FunctionDecorator(car_alarm_class.close_door, 2), -# FunctionDecorator(car_alarm_class.open_door, 3), FunctionDecorator(car_alarm_class.close_door, 3), -# FunctionDecorator(car_alarm_class.open_door, 4), FunctionDecorator(car_alarm_class.close_door, 4), -# -# FunctionDecorator(car_alarm_class.wait, 10), -# FunctionDecorator(car_alarm_class.wait, 20), -# FunctionDecorator(car_alarm_class.wait, 200), -# ] -# -# sul = PyClassSUL(car_alarm_class) -# -# eq_oracle = RandomWMethodEqOracle(input_al, sul, walks_per_state=100, walk_len=10) -# -# learned_model = run_Lstar(input_al, sul, eq_oracle=eq_oracle, automaton_type='mealy', cache_and_non_det_check=True) -# print(learned_model) + +def is_balanced(test_string, call_return_map): + stack = [] + # Create a set of open and close characters for faster lookup + open_chars = set(call_return_map.keys()) + close_chars = set(call_return_map.values()) + + for char in test_string: + if char in open_chars: + stack.append(char) + elif char in close_chars: + # Stack should exist + if not stack: + return False + last_open = stack.pop() + # Mismatched open and close character + if call_return_map[last_open] != char: + return False + + return not stack and len(test_string) > 0 + + +def get_balanced_string_sul(call_return_map): + from aalpy.base import SUL + + class BalancedStringSUL(SUL): + def __init__(self, call_return_map): + super(BalancedStringSUL, self).__init__() + self.call_return_map = call_return_map + self.sting_under_test = [] + + def pre(self): + self.sting_under_test = [] + + def post(self): + pass + + def step(self, letter): + if letter: + self.sting_under_test += letter + return is_balanced(self.sting_under_test, self.call_return_map) + + return BalancedStringSUL(call_return_map) + diff --git a/test_edi.py b/test_edi.py index c5dd8c66..00efbf26 100644 --- a/test_edi.py +++ b/test_edi.py @@ -1,3 +1,4 @@ +from Examples import learning_context_free_grammar_example from aalpy.SULs.AutomataSUL import SevpaSUL, VpaSUL from aalpy.automata.Pda import generate_data_from_pda from aalpy.learning_algs import run_KV_vpda, run_KV @@ -14,17 +15,24 @@ # random generation of SEVPA as done in learnlib # test test test + +learning_context_free_grammar_example() + +exit() + from random import seed -seed(12) for i, vpa in enumerate([vpa_for_L1(), vpa_for_L2(), vpa_for_L3(), vpa_for_L4(), vpa_for_L5(), vpa_for_L7(), vpa_for_L8(), vpa_for_L9(), vpa_for_L10(), vpa_for_L11(), vpa_for_L12(),vpa_for_L13(), vpa_for_L14(), vpa_for_L15()]): print(f'VPA {i + 1 if i < 6 else i + 2}') - for i in range(10): - seed(4) + # 16 works + for i in range(100): + if i < 9: + continue + seed(i) print(i) - model_under_learning = vpa_for_L11() + model_under_learning = vpa alphabet = SevpaAlphabet(list(model_under_learning.internal_set), list(model_under_learning.call_set), @@ -32,8 +40,10 @@ sul = VpaSUL(model_under_learning, include_top=False, check_balance=False) - eq_oracle = RandomWordEqOracle(alphabet=alphabet.get_merged_alphabet(), sul=sul, num_walks=5000) + eq_oracle = RandomWordEqOracle(alphabet=alphabet.get_merged_alphabet(), sul=sul, num_walks=10000) # model = run_KV_vpda(alphabet=alphabet, sul=sul, eq_oracle=eq_oracle, print_level=3,) model = run_KV(alphabet=alphabet, sul=sul, eq_oracle=eq_oracle, automaton_type='vpa', - print_level=3, cex_processing='rs') + print_level=2, cex_processing='rs') + + # exit() From 24f965c389d9699e95351e0a8a2835b69d29713e Mon Sep 17 00:00:00 2001 From: Edi Muskardin Date: Fri, 20 Oct 2023 13:12:44 +0200 Subject: [PATCH 21/62] update initial hypothesis --- Examples.py | 2 +- aalpy/SULs/AutomataSUL.py | 24 +- aalpy/automata/Pda.py | 221 ---------- aalpy/automata/Sevpa.py | 64 +-- aalpy/automata/__init__.py | 1 - aalpy/learning_algs/__init__.py | 2 - aalpy/learning_algs/deterministic/KV.py | 13 +- .../vpda/VpdaClassificationTree.py | 399 ------------------ .../vpda/VpdaCounterExampleProcessing.py | 133 ------ aalpy/learning_algs/vpda/VpdaKV.py | 151 ------- aalpy/learning_algs/vpda/VpdaLStar.py | 186 -------- .../vpda/VpdaObservationTable.py | 269 ------------ aalpy/learning_algs/vpda/__init__.py | 0 aalpy/utils/BenchmarkPdaModels.py | 264 ------------ aalpy/utils/BenchmarkSULs.py | 14 +- aalpy/utils/BenchmarkSevpaModels.py | 357 +--------------- aalpy/utils/BenchmarkVpaModels.py | 8 +- aalpy/utils/FileHandler.py | 6 +- aalpy/utils/__init__.py | 1 - test_edi.py | 15 +- 20 files changed, 63 insertions(+), 2067 deletions(-) delete mode 100644 aalpy/automata/Pda.py delete mode 100644 aalpy/learning_algs/vpda/VpdaClassificationTree.py delete mode 100644 aalpy/learning_algs/vpda/VpdaCounterExampleProcessing.py delete mode 100644 aalpy/learning_algs/vpda/VpdaKV.py delete mode 100644 aalpy/learning_algs/vpda/VpdaLStar.py delete mode 100644 aalpy/learning_algs/vpda/VpdaObservationTable.py delete mode 100644 aalpy/learning_algs/vpda/__init__.py delete mode 100644 aalpy/utils/BenchmarkPdaModels.py diff --git a/Examples.py b/Examples.py index 66629697..27af9c5c 100644 --- a/Examples.py +++ b/Examples.py @@ -947,7 +947,7 @@ def learning_context_free_grammar_example(): from aalpy.utils.BenchmarkSULs import get_balanced_string_sul call_return_map = {'(': ')', '[': ']'} - balanced_string_sul = get_balanced_string_sul(call_return_map) + balanced_string_sul = get_balanced_string_sul(call_return_map, allow_empty_string=True) sevpa_alphabet = SevpaAlphabet([], list(call_return_map.keys()), list(call_return_map.values())) eq_oracle = RandomWordEqOracle(sevpa_alphabet.get_merged_alphabet(), balanced_string_sul, num_walks=1000) diff --git a/aalpy/SULs/AutomataSUL.py b/aalpy/SULs/AutomataSUL.py index 86ab2d73..a6d63329 100644 --- a/aalpy/SULs/AutomataSUL.py +++ b/aalpy/SULs/AutomataSUL.py @@ -1,5 +1,5 @@ from aalpy.base import SUL -from aalpy.automata import Dfa, MealyMachine, MooreMachine, Onfsm, Mdp, StochasticMealyMachine, MarkovChain, Pda, Vpa, Sevpa +from aalpy.automata import Dfa, MealyMachine, MooreMachine, Onfsm, Mdp, StochasticMealyMachine, MarkovChain, Vpa, Sevpa class DfaSUL(SUL): @@ -166,28 +166,6 @@ def step(self, letter): return self.smm.step(letter) -class PdaSUL(SUL): - def __init__(self, pda: Pda, include_top=True, check_balance=True): - super().__init__() - self.pda = pda - self.include_top = include_top - self.check_balance = check_balance - - def pre(self): - self.pda.reset_to_initial() - - def post(self): - pass - - def step(self, letter): - output = self.pda.step(letter) - top = self.pda.top() - if self.include_top: - if self.check_balance and self.pda.call_balance < 0: - return output, '-' - return output, top - return output - class VpaSUL(SUL): def __init__(self, vpa: Vpa, include_top=True, check_balance=True): diff --git a/aalpy/automata/Pda.py b/aalpy/automata/Pda.py deleted file mode 100644 index 1fc32fc4..00000000 --- a/aalpy/automata/Pda.py +++ /dev/null @@ -1,221 +0,0 @@ -from collections import defaultdict - -from aalpy.base import Automaton, AutomatonState - - -class PdaState(AutomatonState): - """ - Single state of a deterministic finite automaton. - """ - - def __init__(self, state_id, is_accepting=False): - super().__init__(state_id) - self.transitions = defaultdict(list) - self.is_accepting = is_accepting - - -class PdaTransition: - def __init__(self, start: PdaState, target: PdaState, symbol, action, stack_guard=None): - self.start = start - self.target = target - self.symbol = symbol - self.action = action - self.stack_guard = stack_guard - - -class Pda(Automaton): - empty = "_" - error_state = PdaState("ErrorSinkState", False) - - def __init__(self, initial_state: PdaState, states): - super().__init__(initial_state, states) - self.initial_state = initial_state - self.states = states - self.current_state = None - self.call_balance = 0 - self.stack = [] - - def reset_to_initial(self): - super().reset_to_initial() - self.reset() - - def reset(self): - self.current_state = self.initial_state - self.stack = [self.empty] - self.call_balance = 0 - return self.current_state.is_accepting and self.top() == self.empty - - def top(self): - return self.stack[-1] - - def pop(self): - return self.stack.pop() - - def possible(self, letter): - if self.current_state == Pda.error_state: - return True - if letter is not None: - transitions = self.current_state.transitions[letter] - trans = [t for t in transitions if t.stack_guard is None or self.top() == t.stack_guard] - assert len(trans) < 2 - if len(trans) == 0: - return False - else: - return True - return False - - def step(self, letter): - if self.current_state == Pda.error_state: - return False - if not self.possible(letter): - self.current_state = Pda.error_state - return False - if letter is not None: - transitions = self.current_state.transitions[letter] - trans = [t for t in transitions if t.stack_guard is None or self.top() == t.stack_guard][0] - self.current_state = trans.target - if trans.action == 'push': - self.stack.append(letter) - elif trans.action == 'pop': - if len(self.stack) <= 1: # empty stack elem should always be there - self.current_state = Pda.error_state - return False - self.stack.pop() - - return self.current_state.is_accepting and self.top() == self.empty - - # def compute_output_seq(self, state, sequence): - # if not sequence: - # return [state.is_accepting] - # return super(Dfa, self).compute_output_seq(state, sequence) - - def to_state_setup(self): - state_setup_dict = {} - - # ensure prefixes are computed - # self.compute_prefixes() - - sorted_states = sorted(self.states, key=lambda x: len(x.prefix)) - for s in sorted_states: - state_setup_dict[s.state_id] = ( - s.is_accepting, {k: (v.target.state_id, v.action) for k, v in s.transitions.items()}) - - return state_setup_dict - - @staticmethod - def from_state_setup(state_setup: dict, init_state_id): - """ - First state in the state setup is the initial state. - Example state setup: - state_setup = { - "a": (True, {"x": ("b1",PUSH), "y": ("a", NONE)}), - "b1": (False, {"x": ("b2", PUSH), "y": "a"}), - "b2": (True, {"x": "b3", "y": "a"}), - "b3": (False, {"x": "b4", "y": "a"}), - "b4": (False, {"x": "c", "y": "a"}), - "c": (True, {"x": "a", "y": "a"}), - } - - Args: - - state_setup: map from state_id to tuple(output and transitions_dict) - - Returns: - - PDA - """ - # state_setup should map from state_id to tuple(is_accepting and transitions_dict) - - # build states with state_id and output - states = {key: PdaState(key, val[0]) for key, val in state_setup.items()} - states[Pda.error_state.state_id] = Pda.error_state # PdaState(Pda.error_state,False) - # add transitions to states - for state_id, state in states.items(): - if state_id == Pda.error_state.state_id: - continue - for _input, trans_spec in state_setup[state_id][1].items(): - for (target_state_id, action, stack_guard) in trans_spec: - # action = Action[action_string] - trans = PdaTransition(start=state, target=states[target_state_id], symbol=_input, action=action, - stack_guard=stack_guard) - state.transitions[_input].append(trans) - - init_state = states[init_state_id] - # states to list - states = [state for state in states.values()] - - pda = Pda(init_state, states) - return pda - - -def generate_data_from_pda(automaton, num_examples, lens=None, classify_states=False, stack_limit=None, - break_on_impossible=False, possible_prob=0.75): - import random - from itertools import product - - input_al = automaton.get_input_alphabet() - - if lens is None: - lens = list(range(1, 15)) - - sum_lens = sum(lens) - # key is length, value is number of examples for said length - ex_per_len = dict() - - additional_seq = 0 - for l in lens: - ex_per_len[l] = int(num_examples * (l / sum_lens)) + 1 - if ex_per_len[l] > pow(len(input_al), l): - additional_seq += ex_per_len[l] - pow(len(input_al), l) - ex_per_len[l] = 'comb' - - additional_seq = additional_seq // len([i for i in ex_per_len.values() if i != 'comb']) - - training_data = [] - for l in ex_per_len.keys(): - seqs = [] - if ex_per_len[l] == 'comb': - - seqs = list(product(input_al, repeat=l)) - for seq in seqs: - - out = automaton.reset() - nr_steps = 0 - for inp in seq: - if automaton.possible(inp) or not break_on_impossible: - nr_steps += 1 - if stack_limit and len(automaton.stack) > stack_limit: - break - if break_on_impossible and not automaton.possible(inp): - break - out = automaton.step(inp) - seq = seq[:nr_steps] - training_data.append((tuple(seq), out if not classify_states else automaton.current_state.state_id)) - - else: - for _ in range(ex_per_len[l] + additional_seq): - # seq = [random.choice(input_al) for _ in range(l)] - out = automaton.reset() - nr_steps = 0 - seq = [] - for i in range(l): - possible_inp = [inp for inp in input_al if automaton.possible(inp)] - if len(possible_inp) == 0: - inp = random.choice(input_al) - else: - if random.random() <= possible_prob: - inp = random.choice(possible_inp) - else: - inp = random.choice(input_al) - seq.append(inp) - if automaton.possible(inp) or not break_on_impossible: - nr_steps += 1 - if stack_limit and len(automaton.stack) > stack_limit: - break - if break_on_impossible and not automaton.possible(inp): - break - out = automaton.step(inp) - seq = seq[:nr_steps] - training_data.append((tuple(seq), out)) - - return training_data diff --git a/aalpy/automata/Sevpa.py b/aalpy/automata/Sevpa.py index 00b123a7..65e3d5d7 100644 --- a/aalpy/automata/Sevpa.py +++ b/aalpy/automata/Sevpa.py @@ -1,5 +1,7 @@ import random from collections import defaultdict +from typing import Union + from aalpy.base import Automaton, AutomatonState @@ -125,7 +127,7 @@ def step(self, letter): assert False if letter in self.input_alphabet.call_alphabet: - assert(letter in self.input_alphabet.call_alphabet) # push letters must be in call set + assert (letter in self.input_alphabet.call_alphabet) # push letters must be in call set self.stack.append((self.current_state.state_id, letter)) self.current_state = self.initial_state return self.current_state.is_accepting and self.top() == self.empty @@ -135,7 +137,7 @@ def step(self, letter): self.current_state = trans.target if trans.action == 'pop': - assert(letter in self.input_alphabet.return_alphabet) # pop letters must be in return set + assert (letter in self.input_alphabet.return_alphabet) # pop letters must be in return set if len(self.stack) <= 1: # empty stack elem should always be there self.current_state = Sevpa.error_state return False @@ -143,7 +145,7 @@ def step(self, letter): return self.current_state.is_accepting and self.top() == self.empty - def get_state_by_id(self, state_id) -> SevpaState: + def get_state_by_id(self, state_id) -> Union[SevpaState, None]: for state in self.states: if state.state_id == state_id: return state @@ -248,25 +250,29 @@ def transform_access_sequance(self, state=None, stack_content=None) -> list[str] word.extend(calling_state.prefix) return word - def is_balanced(self, x): - counter = 0 - for i in x: - if i in self.input_alphabet.call_alphabet: - counter += 1 - if i in self.input_alphabet.return_alphabet: - counter -= 1 - if counter < 0: - return False - return counter == 0 + @staticmethod + def create_daisy_hypothesis(initial_state, alphabet): + + for i in alphabet.internal_alphabet: + trans = SevpaTransition(start=initial_state, target=initial_state, symbol=i, action=None) + initial_state.transitions[i].append(trans) + + for c in alphabet.call_alphabet: + for r in alphabet.return_alphabet: + trans = SevpaTransition(start=initial_state, target=initial_state, symbol=r, action='pop', + stack_guard=(initial_state.state_id, c)) + initial_state.transitions[r].append(trans) + + return Sevpa(initial_state, [initial_state], alphabet) def has_transition(state: SevpaState, transition_letter, stack_guard) -> bool: transitions = state.transitions[transition_letter] if transitions is not None: - if stack_guard is None: # internal transition + if stack_guard is None: # internal transition for transition in transitions: if transition.symbol == transition_letter: return True - else: # return transition + else: # return transition for transition in transitions: if transition.stack_guard == stack_guard and transition.symbol == transition_letter: return True @@ -285,25 +291,27 @@ def generate_random_sevpa(alphabet: SevpaAlphabet, amount_states, acceptance_pro for i in range(1, amount_states): # add a return transition if internal_alphabet == 0 or random.uniform(0.0, 1.0) < return_transition_prob: while True: - from_state = state_list[random.randint(0, len(state_list)-1)] - return_letter = return_alphabet[random.randint(0, len(return_alphabet)-1)] - stack_state = state_list[random.randint(0, len(state_list)-1)] - call_letter = call_alphabet[random.randint(0, len(call_alphabet)-1)] + from_state = state_list[random.randint(0, len(state_list) - 1)] + return_letter = return_alphabet[random.randint(0, len(return_alphabet) - 1)] + stack_state = state_list[random.randint(0, len(state_list) - 1)] + call_letter = call_alphabet[random.randint(0, len(call_alphabet) - 1)] stack_guard = f'{stack_state}{call_letter}' if not has_transition(from_state, return_letter, stack_guard): break target_state = SevpaState(f'q{i}', random.uniform(0.0, 1.0) < acceptance_prob) state_list.append(target_state) - from_state.transitions[return_letter].append(SevpaTransition(from_state, target_state, return_letter, 'pop', stack_guard)) + from_state.transitions[return_letter].append( + SevpaTransition(from_state, target_state, return_letter, 'pop', stack_guard)) else: # add an internal transition while True: - from_state = state_list[random.randint(0, len(state_list)-1)] - internal_letter = internal_alphabet[random.randint(0, len(internal_alphabet)-1)] + from_state = state_list[random.randint(0, len(state_list) - 1)] + internal_letter = internal_alphabet[random.randint(0, len(internal_alphabet) - 1)] if not has_transition(from_state, internal_letter, None): break target_state = SevpaState(f'q{i}', random.uniform(0.0, 1.0) < acceptance_prob) state_list.append(target_state) - from_state.transitions[internal_letter].append(SevpaTransition(from_state, target_state, internal_letter, None, None)) + from_state.transitions[internal_letter].append( + SevpaTransition(from_state, target_state, internal_letter, None, None)) assert len(state_list) == amount_states initial_state_id = random.randint(0, amount_states) @@ -312,16 +320,18 @@ def generate_random_sevpa(alphabet: SevpaAlphabet, amount_states, acceptance_pro for state in state_list: for internal_letter in internal_alphabet: if state.transitions[internal_letter] is None: - target_state = state_list[random.randint(0, len(state_list)-1)] - state.transitions[internal_letter].append(SevpaTransition(state, target_state, internal_letter, None, None)) + target_state = state_list[random.randint(0, len(state_list) - 1)] + state.transitions[internal_letter].append( + SevpaTransition(state, target_state, internal_letter, None, None)) for call_letter in call_alphabet: for stack_state in state_list: stack_guard = f'{stack_state.state_id}{call_letter}' for return_letter in return_alphabet: if not has_transition(state, return_letter, stack_guard): - target_state = state_list[random.randint(0, len(state_list)-1)] - state.transitions[return_letter].append(SevpaTransition(state, target_state, return_letter, 'pop', stack_guard)) + target_state = state_list[random.randint(0, len(state_list) - 1)] + state.transitions[return_letter].append( + SevpaTransition(state, target_state, return_letter, 'pop', stack_guard)) # add call transitions for call_letter in call_alphabet: diff --git a/aalpy/automata/__init__.py b/aalpy/automata/__init__.py index 0f7e6799..509c16aa 100644 --- a/aalpy/automata/__init__.py +++ b/aalpy/automata/__init__.py @@ -5,6 +5,5 @@ from .Onfsm import Onfsm, OnfsmState from .StochasticMealyMachine import StochasticMealyMachine, StochasticMealyState from .MarkovChain import MarkovChain, McState -from .Pda import Pda from .Vpa import Vpa, VpaState from .Sevpa import Sevpa, SevpaState, SevpaAlphabet, SevpaTransition diff --git a/aalpy/learning_algs/__init__.py b/aalpy/learning_algs/__init__.py index 715df848..507a57f9 100644 --- a/aalpy/learning_algs/__init__.py +++ b/aalpy/learning_algs/__init__.py @@ -8,5 +8,3 @@ from .stochastic_passive.ActiveAleriga import run_active_Alergia from .deterministic_passive.RPNI import run_RPNI from .deterministic_passive.active_RPNI import run_active_RPNI -from .vpda.VpdaLStar import run_vpda_Lstar -from .vpda.VpdaKV import run_KV_vpda diff --git a/aalpy/learning_algs/deterministic/KV.py b/aalpy/learning_algs/deterministic/KV.py index 690deafc..2040cf6a 100644 --- a/aalpy/learning_algs/deterministic/KV.py +++ b/aalpy/learning_algs/deterministic/KV.py @@ -71,19 +71,16 @@ def run_KV(alphabet: Union[list, SevpaAlphabet], sul: SUL, eq_oracle: Oracle, au # single (accepting or rejecting) state with self-loops for # all transitions. if automaton_type == 'dfa': - initial_state = DfaState(state_id='s0', is_accepting=empty_string_mq) + initial_state = DfaState(state_id='q0', is_accepting=empty_string_mq) elif automaton_type == 'moore': - initial_state = MooreState(state_id='s0', output=empty_string_mq) + initial_state = MooreState(state_id='q0', output=empty_string_mq) else: - initial_state = SevpaState(state_id='s0', is_accepting=empty_string_mq) + initial_state = SevpaState(state_id='q0', is_accepting=empty_string_mq) else: - initial_state = MealyState(state_id='s0') + initial_state = MealyState(state_id='q0') initial_state.prefix = tuple() - # TODO there should be static function in SEVPA class that creates a daisy hypothesis, - # where all transitions are self loops, and all return transitions are self loops with initial state being stack guard - # then we just call SEVPA.create_daisy_hypothesis(empty_string_mq) if automaton_type != 'vpa': for a in alphabet: initial_state.transitions[a] = initial_state @@ -95,7 +92,7 @@ def run_KV(alphabet: Union[list, SevpaAlphabet], sul: SUL, eq_oracle: Oracle, au if automaton_type != 'vpa': hypothesis = automaton_class[automaton_type](initial_state, [initial_state]) else: - hypothesis = Sevpa(initial_state, [initial_state], alphabet) + hypothesis = Sevpa.create_daisy_hypothesis(initial_state, alphabet) # Perform an equivalence query on this automaton eq_query_start = time.time() diff --git a/aalpy/learning_algs/vpda/VpdaClassificationTree.py b/aalpy/learning_algs/vpda/VpdaClassificationTree.py deleted file mode 100644 index 1fefc8b8..00000000 --- a/aalpy/learning_algs/vpda/VpdaClassificationTree.py +++ /dev/null @@ -1,399 +0,0 @@ -import re -from collections import defaultdict - -from aalpy.automata import SevpaState, SevpaAlphabet, SevpaTransition, Sevpa -from aalpy.base import SUL -from aalpy.learning_algs.deterministic.CounterExampleProcessing import rs_cex_processing - - -class CTNode: - __slots__ = ['parent', 'path_to_node'] - - def __init__(self, parent, path_to_node): - self.parent = parent - self.path_to_node = path_to_node - - def is_leaf(self): - pass - - -class CTInternalNode(CTNode): - __slots__ = ['distinguishing_string', 'children'] - - def __init__(self, distinguishing_string: tuple, parent, path_to_node): - super().__init__(parent, path_to_node) - self.distinguishing_string = distinguishing_string - self.children = defaultdict(None) # {True: None, False: None} - - def is_leaf(self): - return False - - -class CTLeafNode(CTNode): - __slots__ = ['access_string'] - - def __init__(self, access_string: tuple, parent, path_to_node): - super().__init__(parent, path_to_node) - self.access_string = access_string - - def __repr__(self): - return f"{self.__class__.__name__} '{self.access_string}'" - - @property - def output(self): - c, p = self, self.parent - while p.parent: - c = p - p = p.parent - for output, child in p.children.items(): - if child == c: - return output - assert False - - def is_leaf(self): - return True - - -class VpdaClassificationTree: - # TODO replace all dist. strings with context pairs appropriately - def __init__(self, alphabet: SevpaAlphabet, sul: SUL, cex: tuple): - self.sul = sul - self.alphabet = alphabet - - self.leaf_nodes = {} - self.query_cache = dict() - - self.sifting_cache = {} - - initial_output = sul.query(())[-1] - cex_output = sul.query(cex)[-1] - - self.query_cache[()] = initial_output - - self.root = CTInternalNode(distinguishing_string=tuple([(), ()]), parent=None, path_to_node=None) - - initial_output_node = CTLeafNode(access_string=tuple(), parent=self.root, path_to_node=initial_output) - cex_output_node = CTLeafNode(access_string=cex, parent=self.root, path_to_node=cex_output) - - self.root.children[initial_output] = initial_output_node - self.root.children[cex_output] = cex_output_node - - self.leaf_nodes[tuple()] = initial_output_node - self.leaf_nodes[cex] = cex_output_node - - - def _sift(self, word): - """ - Sifting a word into the classification tree. - Starting at the root, at every inner node (a CTInternalNode), - we branch into the child, depending on the result of the - membership query (word * node.distinguishing_string). Repeated until a leaf - (a CTLeafNode) is reached, which is the result of the sifting. - - Args: - - word: the word to sift into the discrimination tree (a tuple of all letters) - - Returns: - - the CTLeafNode that is reached by the sifting operation. - """ - for letter in word: - assert letter is None or letter in self.alphabet.get_merged_alphabet() - - if word in self.sifting_cache: - return self.sifting_cache[word] - - node = self.root - while not node.is_leaf(): - - query = node.distinguishing_string[0] + word + node.distinguishing_string[1] - - if query not in self.query_cache.keys(): - mq_result = self.sul.query(query) - - mq_result = mq_result[-1] - self.query_cache[query] = mq_result - else: - mq_result = self.query_cache[query] - - if mq_result not in node.children.keys(): - new_leaf = CTLeafNode(access_string=word, parent=node, path_to_node=mq_result) - self.leaf_nodes[word] = new_leaf - node.children[mq_result] = new_leaf - - node = node.children[mq_result] - - self.sifting_cache[word] = node - assert node.is_leaf() - return node - - def gen_hypothesis(self): - # for each CTLeafNode of this CT, - # create a state in the hypothesis that is labeled by that - # node's access string. The start state is the empty word - - # TODO take a look at kv how it is done - - states = dict() - initial_state = None - state_counter = 0 - for node in self.leaf_nodes.values(): - - new_state = SevpaState(state_id=f'q{state_counter}', is_accepting=node.output) - - new_state.prefix = node.access_string - if new_state.prefix == (): - initial_state = new_state - states[new_state.prefix] = new_state - state_counter += 1 - assert initial_state is not None - - # for each state - # open - # internals = > state.acc + internal - # open - # returns - # for all call - # for all other_state - # for all return - # open -> other_state.acc + call + state.acc + - # return - # if other_state != state - # open -> state.acc + call + other_state.acc + ret - - states_for_transitions = list(states.values()) - for state in states_for_transitions: - # Check internal transitions - for internal_letter in self.alphabet.internal_alphabet: - transition_target_node = self._sift(state.prefix + (internal_letter, )) - transition_target_access_string = transition_target_node.access_string - - assert transition_target_access_string in states # TODO: trigger this - trans = SevpaTransition(start=state, target=states[transition_target_access_string], symbol=internal_letter, action=None) - state.transitions[internal_letter].append(trans) - - # Add call transitions - for call_letter in self.alphabet.call_alphabet: - trans = SevpaTransition(start=state, target=initial_state, symbol=call_letter, action='push', stack_guard=f'{state.state_id}{call_letter}') - state.transitions[call_letter].append(trans) - for other_state in states_for_transitions: - # Add return transitions - for return_letter in self.alphabet.return_alphabet: - transition_target_node = self._sift(other_state.prefix + (call_letter, ) + state.prefix + (return_letter, )) - transition_target_access_string = transition_target_node.access_string - # call_letter_node = self._sift((call_letter,)) - # call_letter_access_string = call_letter_node.access_string - stack_guard = f'{other_state.state_id}{call_letter}' - trans = SevpaTransition(start=state, target=states[transition_target_access_string], symbol=return_letter, - action='pop', stack_guard=stack_guard) - state.transitions[return_letter].append(trans) - - states = [state for state in states.values()] - - return Sevpa(initial_state=initial_state, states=states, input_alphabet=self.alphabet) - - def _least_common_ancestor(self, node_1_id, node_2_id): - """ - Find the distinguishing string of the least common ancestor - of the leaf nodes node_1 and node_2. Both nodes have to exist. - Adapted from https://www.geeksforgeeks.org/lowest-common-ancestor-binary-tree-set-1/ - - Args: - - node_1_id: first leaf node's id - node_2_id: second leaf node's id - - Returns: - - the distinguishing string of the lca - - """ - - def ancestor(parent, node): - for child in parent.children.values(): - if child.is_leaf(): - if child.access_string == node: - return True - else: - next_ancestor = ancestor(child, node) - if next_ancestor: - return True - return False - - def findLCA(n1_id, n2_id): - node = self.leaf_nodes[n1_id] - parent = node.parent - while parent: - if ancestor(parent, n2_id): - return parent - if parent.parent: - parent = parent.parent - else: - return parent - return None - - return findLCA(node_1_id, node_2_id).context_pair - - def update(self, cex: tuple, hypothesis): - """ - Updates the classification tree based on a counterexample. - - For each prefix cex[:i] of the counterexample, get - s_i = self.sift(cex[:i]) and - s_star_i = id of the state with the access sequence cex[:i] - in the hypothesis - and let j be the least i such that s_i != s_star_i. - - Replace the CTLeafNode labeled with the access string of the state - that is reached by the sequence cex[:j-1] in the hypothesis - with an CTInternalNode with two CTLeafNodes: one keeps the old - access string, and one gets the new access string cex[:j-1]. - The internal node is labeled with the distinguishing string (cex[j-1],*d), - where d is the distinguishing string of the LCA of s_i and s_star_i. - - Args: - cex: the counterexample used to update the tree - hypothesis: the former (wrong) hypothesis - - """ - j = d = None - for i in range(1, len(cex) + 1): - s_i = self._sift(cex[:i]).access_string - hypothesis.execute_sequence(hypothesis.initial_state, cex[:i]) - s_star_i = hypothesis.current_state.prefix - if s_i != s_star_i: - j = i - d = self._least_common_ancestor(s_i, s_star_i) - break - if j is None and d is None: - j = len(cex) - d = [] - assert j is not None and d is not None - - hypothesis.execute_sequence(hypothesis.initial_state, cex[:j - 1] or tuple()) - - self._insert_new_leaf(discriminator=(cex[j - 1], *d), - old_leaf_access_string=hypothesis.current_state.prefix, - new_leaf_access_string=tuple(cex[:j - 1]) or tuple(), - new_leaf_position=self.sul.query((*cex[:j - 1], *(cex[j - 1], *d)))[-1]) - - def update_rs(self, cex: tuple, hypothesis): - """ - Updates the classification tree based on a counterexample, - using Rivest & Schapire's counterexample processing - - Replace the CTLeafNode labeled with the access string of the state - that is reached by the sequence cex[:j-1] in the hypothesis - with an CTInternalNode with two CTLeafNodes: one keeps the old - access string, and one gets the new access string cex[:j-1]. - The internal node is labeled with the distinguishing string (cex[j-1],*d), - where d is the distinguishing string of the LCA of s_i and s_star_i. - - Args: - cex: the counterexample used to update the tree - hypothesis: the former (wrong) hypothesis - - """ - - v = max(rs_cex_processing(self.sul, cex, hypothesis, suffix_closedness=True), key=len) - a = cex[len(cex) - len(v) - 1] - u = cex[:len(cex) - len(v) - 1] - assert (*u, a, *v) == cex - - hypothesis.execute_sequence(hypothesis.initial_state, u) - u_state = hypothesis.current_state.prefix - hypothesis.step(a) - ua_state = hypothesis.current_state.prefix - - new_leaf_position = not hypothesis.execute_sequence(hypothesis.initial_state, cex)[-1] - - discriminator = (tuple(transform_access_seq(hypothesis, hypothesis.stack)), tuple(v)) - - self._insert_new_leaf(discriminator=discriminator, - old_leaf_access_string=ua_state, - new_leaf_access_string=(*u_state, a), - new_leaf_position=new_leaf_position) - - def _insert_new_leaf(self, discriminator, old_leaf_access_string, new_leaf_access_string, new_leaf_position): - """ - Inserts a new leaf in the classification tree by: - - moving the leaf node specified by down one level - - inserting an internal node at the former position of the old node (i.e. as the parent of the old node) - - adding a new leaf node with as child of the new internal node / sibling of the old node - Could also be thought of as 'splitting' the old node into two (one of which keeps the old access string and one - of which gets the new one) with as the distinguishing string between the two. - - where one of the resulting nodes keeps the old - node's access string and the other gets new_leaf_access_string. - Args: - discriminator: The distinguishing string of the new internal node - old_leaf_access_string: The access string specifying the leaf node to be 'split' (or rather moved down) - new_leaf_access_string: The access string of the leaf node that will be created - new_leaf_position: The path from the new internal node to the new leaf node - - Returns: - - """ - other_leaf_position = not new_leaf_position - - old_leaf = self.leaf_nodes[old_leaf_access_string] - - # create an internal node at the same position as the old leaf node - - discriminator_node = CTInternalNode(distinguishing_string=discriminator, - parent=old_leaf.parent, path_to_node=old_leaf.path_to_node) - - # create the new leaf node and add it as child of the internal node - new_leaf = CTLeafNode(access_string=new_leaf_access_string, - parent=discriminator_node, - path_to_node=new_leaf_position) - self.leaf_nodes[new_leaf_access_string] = new_leaf - - # redirect the old nodes former parent to the internal node - old_leaf.parent.children[old_leaf.path_to_node] = discriminator_node - - # add the internal node as parent of the old leaf - old_leaf.parent = discriminator_node - old_leaf.path_to_node = other_leaf_position - - # set the two nodes as children of the internal node - discriminator_node.children[new_leaf_position] = new_leaf - discriminator_node.children[other_leaf_position] = old_leaf - - # sifting cache update - sifting_cache_outdated = [] - if old_leaf in self.sifting_cache.values(): - for prefix, node in self.sifting_cache.items(): - if old_leaf == node: - sifting_cache_outdated.append(prefix) - - for to_delete in sifting_cache_outdated: - del self.sifting_cache[to_delete] - - def _query_and_update_cache(self, word): - if word in self.query_cache.keys(): - output = self.query_cache[word] - else: - output = self.sul.query(word)[-1] - self.query_cache[word] = output - return output - -def transform_access_seq(hypothesis: Sevpa, stack: []) -> list[str]: - - word = [] - pattern = r"(q\d+)(.*)" - - for i in range(1, len(stack)): # skip the first element because it's the start of the stack '_ - stack_elem = stack[i] - match = re.search(pattern, stack_elem) - if match: - from_state_id = match.group(1) # the corresponding state where the stack element got pushed from - call_letter = match.group(2) # the call letter that was pushed on the stack - print("From state:", from_state_id) - print("Call letter:", call_letter) - from_state = hypothesis.get_state_by_id(from_state_id) - word.append(from_state.prefix) # .prefix is the access sequence of the node in the classificationTree - word.append(call_letter) - else: - assert False and print("Stack content does not follow convention") - - word.append(hypothesis.initial_state.prefix) - return word diff --git a/aalpy/learning_algs/vpda/VpdaCounterExampleProcessing.py b/aalpy/learning_algs/vpda/VpdaCounterExampleProcessing.py deleted file mode 100644 index 7e024772..00000000 --- a/aalpy/learning_algs/vpda/VpdaCounterExampleProcessing.py +++ /dev/null @@ -1,133 +0,0 @@ -import re - -from aalpy.SULs.AutomataSUL import SevpaSUL -from aalpy.base import SUL -from aalpy.utils.HelperFunctions import all_suffixes, all_prefixes -from aalpy.automata import Sevpa, SevpaState - - -def counterexample_successfully_processed(sul, cex, hypothesis): - cex_outputs = sul.query(cex) - hyp_outputs = hypothesis.execute_sequence(hypothesis.initial_state, cex) - return cex_outputs[-1] == hyp_outputs[-1] - - -def longest_prefix_cex_processing(s_union_s_dot_a: list, cex: tuple, closedness='suffix'): - """ - Suffix processing strategy found in Shahbaz-Groz paper 'Inferring Mealy Machines'. - It splits the counterexample into prefix and suffix. The prefix is the longest element of the S union S.A that - matches the beginning of the counterexample. By removing such prefixes from counterexample, no consistency check - is needed. - - Args: - - s_union_s_dot_a: list of all prefixes found in observation table sorted from shortest to longest - cex: counterexample - closedness: either 'suffix' or 'prefix'. (Default value = 'suffix') - s_union_s_dot_a: list: - cex: tuple: counterexample - - Returns: - - suffixes to add to the E set - - """ - prefixes = s_union_s_dot_a - prefixes.reverse() - trimmed_suffix = None - - for p in prefixes: - if p == cex[:len(p)]: - trimmed_suffix = cex[len(p):] - break - - trimmed_suffix = trimmed_suffix if trimmed_suffix else cex - suffixes = all_suffixes(trimmed_suffix) if closedness == 'suffix' else all_prefixes(trimmed_suffix) - suffixes.reverse() - return suffixes - - -def rs_cex_processing(sul: SUL, cex: tuple, hypothesis: Sevpa, suffix_closedness=True, closedness='suffix'): - """Riverst-Schapire counter example processing. - - Args: - - sul: system under learning - cex: found counterexample - hypothesis: hypothesis on which counterexample was found - suffix_closedness: If true all suffixes will be added, else just one (Default value = True) - closedness: either 'suffix' or 'prefix'. (Default value = 'suffix') - sul: SUL: system under learning - cex: tuple: counterexample - - Returns: - - suffixes to be added to the E set - - """ - cex_out = sul.query(cex) - cex_input = list(cex) - - lower = 1 - upper = len(cex_input) - 2 - - while True: - hypothesis.reset_to_initial() - mid = (lower + upper) // 2 - - # arr[:n] -> first n values - # arr[n:] -> last n values - - for s_p in cex_input[:mid]: - hypothesis.step(s_p) - s_bracket = hypothesis.current_state.prefix - - d = tuple(cex_input[mid:]) - mq = sul.query(s_bracket + d) - - if mq[-1] == cex_out[-1]: # only check if the last element is the same as the cex - lower = mid + 1 - if upper < lower: - suffix = d[1:] - break - else: - upper = mid - 1 - if upper < lower: - suffix = d - break - - hyp_sul = SevpaSUL(hypothesis) - hyp_sul.query(('(',)) - word = transform_access_seq(hypothesis, hyp_sul.sevpa.stack) - print(word) - - if suffix_closedness: - suffixes = all_suffixes(suffix) if closedness == 'suffix' else all_prefixes(suffix) - suffixes.reverse() - suffix_to_query = suffixes - else: - suffix_to_query = [suffix] - return suffix_to_query - - -def transform_access_seq(hypothesis: Sevpa, stack: []) -> list[str]: - word = [] - pattern = r"(q\d+)(.*)" - - for i in range(1, len(stack)): # skip the first element because it's the start of the stack '_ - stack_elem = stack[i] - match = re.search(pattern, stack_elem) - if match: - from_state_id = match.group(1) # the corresponding state where the stack element got pushed from - call_letter = match.group(2) # the call letter that was pushed on the stack - print("From state:", from_state_id) - print("Call letter:", call_letter) - from_state = hypothesis.get_state_by_id(from_state_id) - word.append(from_state.prefix) # .prefix is the access sequence of the node in the classificationTree - word.append(call_letter) - else: - assert False and print("Stack content does not follow convention") - - word.append(hypothesis.initial_state.prefix) - return word - diff --git a/aalpy/learning_algs/vpda/VpdaKV.py b/aalpy/learning_algs/vpda/VpdaKV.py deleted file mode 100644 index 2d53c3f1..00000000 --- a/aalpy/learning_algs/vpda/VpdaKV.py +++ /dev/null @@ -1,151 +0,0 @@ -import time - -from aalpy.automata import Sevpa, SevpaState -from aalpy.base import Oracle, SUL -from aalpy.utils.HelperFunctions import print_learning_info, visualize_classification_tree -from .VpdaClassificationTree import VpdaClassificationTree -from aalpy.learning_algs.vpda.VpdaCounterExampleProcessing import counterexample_successfully_processed -from ...base.SUL import CacheSUL - -print_options = [0, 1, 2, 3] -counterexample_processing_strategy = [None, 'rs'] - - -def run_KV_vpda(alphabet: list, sul: SUL, eq_oracle: Oracle, cex_processing='rs', - max_learning_rounds=None, cache_and_non_det_check=True, return_data=False, print_level=2): - """ - Executes the KV algorithm. - - Args: - - alphabet: input alphabet - - sul: system under learning - - eq_oracle: equivalence oracle - - cex_processing: None for no counterexample processing, or 'rs' for Rivest & Schapire counterexample processing - - max_learning_rounds: number of learning rounds after which learning will terminate (Default value = None) - - cache_and_non_det_check: Use caching and non-determinism checks (Default value = True) - - return_data: if True, a map containing all information(runtime/#queries/#steps) will be returned - (Default value = False) - - print_level: 0 - None, 1 - just results, 2 - current round and hypothesis size, 3 - educational/debug - (Default value = 2) - - - Returns: - - automaton of type automaton_type (dict containing all information about learning if 'return_data' is True) - - """ - - assert print_level in print_options - assert cex_processing in counterexample_processing_strategy - - start_time = time.time() - eq_query_time = 0 - learning_rounds = 0 - - if cache_and_non_det_check: - # Wrap the sul in the CacheSUL, so that all steps/queries are cached - sul = CacheSUL(sul) - eq_oracle.sul = sul - - empty_string_mq = sul.query(tuple())[-1] - - initial_state = SevpaState(state_id='s0', is_accepting=empty_string_mq) - - initial_state.prefix = tuple() - - # TODO Create 1-SEVPA class - # When creating a hypothesis, infer call transition destinations based on (loc, call) pairs - - # TODO Create initial hypothesis - # Maybe move initialization of classification tree here - # Add a new method to it called generate_initial_hypothesis() - # Either -> one state and then procedure is same like in default KV (add cex later) - # Discover a new state - - hypothesis = Sevpa(initial_state=initial_state, states=[], input_alphabet=alphabet) - # Perform an equivalence query on this automaton - eq_query_start = time.time() - cex = eq_oracle.find_cex(hypothesis) - - print(f'Counterexample: {cex}') - - eq_query_time += time.time() - eq_query_start - if cex is not None: - cex = tuple(cex) - - # initialise the classification tree to have a root - # labeled with the empty word as the distinguishing string - # and two leaves labeled with access strings cex and empty word - classification_tree = VpdaClassificationTree(alphabet=alphabet, sul=sul, cex=cex) - visualize_classification_tree(classification_tree.root) - - while True: - learning_rounds += 1 - if max_learning_rounds and learning_rounds - 1 == max_learning_rounds: - break - - hypothesis = classification_tree.gen_hypothesis() - hypothesis.reset_to_initial() - - if print_level == 2: - print(f'\rHypothesis {learning_rounds}: {hypothesis.size} states.', end="") - - if print_level == 3: - # would be nice to have an option to print classification tree - print(f'Hypothesis {learning_rounds}: {hypothesis.size} states.') - - if counterexample_successfully_processed(sul, cex, hypothesis): - # Perform an equivalence query on this automaton - eq_query_start = time.time() - cex = eq_oracle.find_cex(hypothesis) - eq_query_time += time.time() - eq_query_start - - if cex is None: - if print_level == 3: - visualize_classification_tree(classification_tree.root) - break - else: - cex = tuple(cex) - - if print_level == 3: - print('Counterexample', cex) - - if cex_processing == 'rs': - classification_tree.update_rs(cex, hypothesis) - else: - classification_tree.update(cex, hypothesis) - - total_time = round(time.time() - start_time, 2) - eq_query_time = round(eq_query_time, 2) - learning_time = round(total_time - eq_query_time, 2) - - info = { - 'learning_rounds': learning_rounds, - 'automaton_size': hypothesis.size, - 'queries_learning': sul.num_queries, - 'steps_learning': sul.num_steps, - 'queries_eq_oracle': eq_oracle.num_queries, - 'steps_eq_oracle': eq_oracle.num_steps, - 'learning_time': learning_time, - 'eq_oracle_time': eq_query_time, - 'total_time': total_time, - 'cache_saved': sul.num_cached_queries, - } - - if print_level > 0: - if print_level == 2: - print("") - print_learning_info(info) - - if return_data: - return hypothesis, info - - return hypothesis diff --git a/aalpy/learning_algs/vpda/VpdaLStar.py b/aalpy/learning_algs/vpda/VpdaLStar.py deleted file mode 100644 index ebe95e2c..00000000 --- a/aalpy/learning_algs/vpda/VpdaLStar.py +++ /dev/null @@ -1,186 +0,0 @@ -import time - -from aalpy.base import Oracle, SUL -from aalpy.utils.HelperFunctions import extend_set, print_learning_info, print_observation_table, all_prefixes -from ..deterministic.CounterExampleProcessing import longest_prefix_cex_processing, rs_cex_processing, \ - counterexample_successfully_processed -from .VpdaObservationTable import VpdaObservationTable -from ...base.SUL import CacheSUL - -counterexample_processing_strategy = [None, 'rs', 'longest_prefix'] -closedness_options = ['suffix_all', 'suffix_single'] -print_options = [0, 1, 2, 3] - - -def run_vpda_Lstar(alphabet: list, sul: SUL, eq_oracle: Oracle, automaton_type, samples=None, - closing_strategy='shortest_first', cex_processing='rs', - e_set_suffix_closed=False, all_prefixes_in_obs_table=True, - max_learning_rounds=None, cache_and_non_det_check=True, return_data=False, print_level=2): - """ - Executes L* algorithm. - - Args: - - alphabet: input alphabet - - sul: system under learning - - eq_oracle: equivalence oracle - - automaton_type: type of automaton to be learned. Either 'dfa', 'mealy' or 'moore'. - - samples: input output traces provided to the learning algorithm. They are added to cache and could reduce - total interaction with the system. Syntax: list of [(input_sequence, output_sequence)] or None - - closing_strategy: closing strategy used in the close method. Either 'longest_first', 'shortest_first' or - 'single' (Default value = 'shortest_first') - - cex_processing: Counterexample processing strategy. Either None, 'rs' (Riverst-Schapire) or 'longest_prefix'. - (Default value = 'rs') - - e_set_suffix_closed: True option ensures that E set is suffix closed, - False adds just a single suffix per counterexample. - - all_prefixes_in_obs_table: if True, entries of observation table will contain the whole output of the whole - suffix, otherwise just the last output meaning that all prefixes of the suffix will be added. - If False, just a single suffix will be added. - - max_learning_rounds: number of learning rounds after which learning will terminate (Default value = None) - - cache_and_non_det_check: Use caching and non-determinism checks (Default value = True) - - return_data: if True, a map containing all information(runtime/#queries/#steps) will be returned - (Default value = False) - - print_level: 0 - None, 1 - just results, 2 - current round and hypothesis size, 3 - educational/debug - (Default value = 2) - - Returns: - - automaton of type automaton_type (dict containing all information about learning if 'return_data' is True) - - """ - - assert cex_processing in counterexample_processing_strategy - assert print_level in print_options - - merged_alphabet = list() - merged_alphabet.extend(alphabet[0]) - merged_alphabet.extend(alphabet[1]) - merged_alphabet.extend(alphabet[2]) - - if cache_and_non_det_check or samples is not None: - # Wrap the sul in the CacheSUL, so that all steps/queries are cached - sul = CacheSUL(sul) - eq_oracle.sul = sul - - if samples: - for input_seq, output_seq in samples: - sul.cache.add_to_cache(input_seq, output_seq) - - start_time = time.time() - eq_query_time = 0 - learning_rounds = 0 - hypothesis = None - - observation_table = VpdaObservationTable(alphabet, sul, automaton_type, all_prefixes_in_obs_table) - - # Initial update of observation table, for empty row - observation_table.update_obs_table() - cex = None - - while True: - if max_learning_rounds and learning_rounds == max_learning_rounds: - break - - # Make observation table consistent (iff there is no counterexample processing) - if not cex_processing: - inconsistent_rows = observation_table.get_causes_of_inconsistency() - while inconsistent_rows is not None: - added_suffix = extend_set(observation_table.E, inconsistent_rows) - observation_table.update_obs_table(e_set=added_suffix) - inconsistent_rows = observation_table.get_causes_of_inconsistency() - - # Close observation table - rows_to_close = observation_table.get_rows_to_close(closing_strategy) - while rows_to_close is not None: - rows_to_query = [] - for row in rows_to_close: - observation_table.S.append(row) - rows_to_query.extend([row + (a,) for a in merged_alphabet]) - observation_table.update_obs_table(s_set=rows_to_query) - rows_to_close = observation_table.get_rows_to_close(closing_strategy) - - # Generate hypothesis - hypothesis = observation_table.gen_hypothesis(no_cex_processing_used=cex_processing is None) - # Find counterexample if none has previously been found (first round) and cex is successfully processed - # (not a counterexample in the current hypothesis) - if cex is None or counterexample_successfully_processed(sul, cex, hypothesis): - learning_rounds += 1 - - if print_level > 1: - print(f'Hypothesis {learning_rounds}: {len(hypothesis.states)} states.') - - if print_level == 3: - print_observation_table(observation_table, 'det') - - eq_query_start = time.time() - cex = eq_oracle.find_cex(hypothesis) - eq_query_time += time.time() - eq_query_start - - # If no counterexample is found, return the hypothesis - if cex is None: - break - - # make sure counterexample is a tuple in case oracle returns a list - cex = tuple(cex) - - if print_level == 3: - print('Counterexample', cex) - - # Process counterexample and ask membership queries - if not cex_processing: - s_to_update = [] - added_rows = extend_set(observation_table.S, all_prefixes(cex)) - s_to_update.extend(added_rows) - for p in added_rows: - s_to_update.extend([p + (a,) for a in merged_alphabet]) - - observation_table.update_obs_table(s_set=s_to_update) - continue - - elif cex_processing == 'longest_prefix': - cex_suffixes = longest_prefix_cex_processing(observation_table.S + list(observation_table.s_dot_a()), - cex, closedness='suffix') - else: - cex_suffixes = rs_cex_processing(sul, cex, hypothesis, e_set_suffix_closed, closedness='suffix') - - added_suffixes = extend_set(observation_table.E, cex_suffixes) - observation_table.update_obs_table(e_set=added_suffixes) - - total_time = round(time.time() - start_time, 2) - eq_query_time = round(eq_query_time, 2) - learning_time = round(total_time - eq_query_time, 2) - - info = { - 'learning_rounds': learning_rounds, - 'automaton_size': hypothesis.size, - 'queries_learning': sul.num_queries, - 'steps_learning': sul.num_steps, - 'queries_eq_oracle': eq_oracle.num_queries, - 'steps_eq_oracle': eq_oracle.num_steps, - 'learning_time': learning_time, - 'eq_oracle_time': eq_query_time, - 'total_time': total_time, - 'characterization_set': observation_table.E - } - if cache_and_non_det_check: - info['cache_saved'] = sul.num_cached_queries - - if print_level > 0: - print_learning_info(info) - - if return_data: - return hypothesis, info - - return hypothesis diff --git a/aalpy/learning_algs/vpda/VpdaObservationTable.py b/aalpy/learning_algs/vpda/VpdaObservationTable.py deleted file mode 100644 index e7146eb3..00000000 --- a/aalpy/learning_algs/vpda/VpdaObservationTable.py +++ /dev/null @@ -1,269 +0,0 @@ -from collections import defaultdict - -from aalpy.automata.Vpa import VpaTransition -from aalpy.base import Automaton, SUL -from aalpy.automata import Vpa, VpaState - -aut_type = ['pda', 'vpa'] -closing_options = ['shortest_first', 'longest_first', 'single', 'single_longest'] - - -class VpdaObservationTable: - def __init__(self, alphabet: list, sul: SUL, automaton_type, prefixes_in_cell=False): - """ - Constructor of the observation table. Initial queries are asked in the constructor. - - Args: - - alphabet: input alphabet - sul: system under learning - automaton_type: automaton type, one of ['dfa', 'mealy', 'moore'] - - Returns: - - """ - - assert automaton_type in aut_type - assert alphabet is not None and sul is not None - self.automaton_type = automaton_type - - if self.automaton_type == 'vpa': - self.call_set = alphabet[0] - self.return_set = alphabet[1] - self.internal_set = alphabet[2] - self.merged_alphabet = list() - self.merged_alphabet.extend(alphabet[0]) - self.merged_alphabet.extend(alphabet[1]) - self.merged_alphabet.extend(alphabet[2]) - - # If True add prefixes of each element of E set to a cell, else only add the output - self.prefixes_in_cell = prefixes_in_cell - - if automaton_type == 'vpa': - self.A = [tuple(a) for a in self.merged_alphabet] - else: - self.A = [tuple([a]) for a in alphabet] - - self.S = list() # prefixes of S - # DFA's can also take whole alphabet in E, this convention follows Angluin's paper - self.E = [] - # For performance reasons, the T function maps S to a tuple where element at index i is the element of the E - # set of index i. Therefore it is important to keep E set ordered and ask membership queries only when needed - # and in correct order. It would make more sense to implement it as a defaultdict(dict) where you can access - # elements via self.T[s][e], but it causes significant performance hit. - self.T = defaultdict(tuple) - - self.sul = sul - empty_word = tuple() - self.S.append(empty_word) - - # DFAs and Moore machines use empty word for identification of accepting states/state outputs - self.E.insert(0, empty_word) - - def get_rows_to_close(self, closing_strategy='longest_first'): - """ - Get rows for that need to be closed. Row selection is done according to closing_strategy. - The length of the row is defined by the length of the prefix corresponding to the row in the S set. - longest_first -> get all rows that need to be closed and ask membership queries for the longest row first - shortest_first -> get all rows that need to be closed and ask membership queries for the shortest row first - single -> find and ask membership query for the single row - single_longest -> returns single longest row to close - - Args: - - closing_strategy: one of ['shortest_first', 'longest_first', 'single'] (Default value = 'longest_first') - - Returns: - - list if non-closed exist, None otherwise: rows that will be moved to S set and closed - - """ - assert closing_strategy in closing_options - rows_to_close = [] - row_values = set() - - s_rows = {self.T[s] for s in self.S} - - for t in self.s_dot_a(): - row_t = self.T[t] - if row_t not in s_rows and row_t not in row_values: - rows_to_close.append(t) - row_values.add(row_t) - - if closing_strategy == 'single': - return rows_to_close - - if not rows_to_close: - return None - - if 'longest' in closing_strategy: - rows_to_close.sort(key=len, reverse=True) - if closing_strategy == 'longest_first': - return rows_to_close - if closing_strategy == 'single_longest': - return [rows_to_close[0]] - - return rows_to_close - - def get_causes_of_inconsistency(self): - """ - If the two rows in the S set are the same, but their one letter extensions are not, this method founds - the cause of inconsistency and returns it. - :return: - - Returns: - - a+e values that are the causes of inconsistency - - """ - for i, s1 in enumerate(self.S): - for s2 in self.S[i + 1:]: - if self.T[s1] == self.T[s2]: - for a in self.A: - if self.T[s1 + a] != self.T[s2 + a]: - for index, e in enumerate(self.E): - if self.T[s1 + a][index] != self.T[s2 + a][index]: - return [(a + e)] - - return None - - def s_dot_a(self): - """ - Helper generator function that returns extended S, or S.A set. - """ - s_set = set(self.S) - for s in self.S: - for a in self.A: - if s + a not in s_set: - yield s + a - - def update_obs_table(self, s_set: list = None, e_set: list = None): - """ - Perform the membership queries. - - Args: - - s_set: Prefixes of S set on which to preform membership queries. If None, then whole S set will be used. - - e_set: Suffixes of E set on which to perform membership queries. If None, then whole E set will be used. - - Returns: - - """ - - update_S = s_set if s_set else list(self.S) + list(self.s_dot_a()) - update_E = e_set if e_set else self.E - - # This could save few queries - update_S.reverse() - - for s in update_S: - for e in update_E: - if len(self.T[s]) != len(self.E): - output = tuple(self.sul.query(s + e)) - if self.prefixes_in_cell and len(e) > 1: - obs_table_entry = tuple([output[-len(e):]],) - else: - obs_table_entry = (output[-1],) - self.T[s] += obs_table_entry - - def get_action_type(self, letter) -> str: - if letter in self.call_set: - return 'push' - elif letter in self.return_set: - return 'pop' - elif letter in self.internal_set: - return '' - else: - assert False - - def get_stack_guard(self, prefix, letter, action): - """ - - Gets the stack guard based on the action and word (prefix + letter) - - """ - out = self.sul.query(prefix + letter) - out_pre = self.sul.query(prefix) - if action == 'push': - if out_pre[-1][1] == out[-1][1] and out_pre[-1][1] == '_': # stack doesn't change on push action - stack_guard = '?' - else: # stack changed so we know the push action worked - stack_guard = out[-1][1] - elif action == 'pop': - if out_pre[-1][1] == out[-1][1]: # stack doesn't change on pop action - stack_guard = '?' - else: # stack changed so we know the pop operation worked - stack_guard = out_pre[-1][1] - else: - stack_guard = '' - - return stack_guard - - - def gen_hypothesis(self, no_cex_processing_used=False) -> Automaton: - """ - Generate automaton based on the values found in the observation table. - :return: - - Args: - - check_for_duplicate_rows: (Default value = False) - - Returns: - - Automaton of type `automaton_type` - - """ - state_distinguish = dict() - states_dict = dict() - initial_state = None - automaton_class = {'vpa': Vpa} - - s_set = self.S - # Added check for the algorithm without counterexample processing - if no_cex_processing_used: - s_set = self._get_row_representatives() - - # create states based on S set - stateCounter = 0 - for prefix in s_set: - state_id = f's{stateCounter}' - - states_dict[prefix] = VpaState(state_id) - states_dict[prefix].is_accepting = self.T[prefix][0][0] - - states_dict[prefix].prefix = prefix - state_distinguish[tuple(self.T[prefix])] = states_dict[prefix] - - if not prefix: - initial_state = states_dict[prefix] - stateCounter += 1 - - for prefix in s_set: - for a in self.A: - prev_state = state_distinguish[self.T[prefix]] - target_state = state_distinguish[self.T[prefix + a]] - action = self.get_action_type(a[0]) - stack_guard = self.get_stack_guard(prefix, a, action) - if stack_guard == '?': - target_state = Vpa.error_state - trans = VpaTransition(start=prev_state, target=target_state, symbol=a[0], action=action, stack_guard=stack_guard) - states_dict[prefix].transitions[a[0]].append(trans) - - if self.automaton_type == 'vpa': - automaton = automaton_class[self.automaton_type](initial_state, list(states_dict.values()), self.call_set, self.return_set, self.internal_set) - else: - automaton = automaton_class[self.automaton_type](initial_state, list(states_dict.values())) - - automaton.characterization_set = self.E - - return automaton - - def _get_row_representatives(self): - self.S.sort(key=len) - representatives = defaultdict(list) - for prefix in self.S: - representatives[self.T[prefix]].append(prefix) - - return [r[0] for r in representatives.values()] diff --git a/aalpy/learning_algs/vpda/__init__.py b/aalpy/learning_algs/vpda/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/aalpy/utils/BenchmarkPdaModels.py b/aalpy/utils/BenchmarkPdaModels.py deleted file mode 100644 index 901a971e..00000000 --- a/aalpy/utils/BenchmarkPdaModels.py +++ /dev/null @@ -1,264 +0,0 @@ -from aalpy.automata.Pda import Pda - - -def pda_for_L1(): - # we always ensure that n >= 1 - state_setup = { - "q0": (False, {"a": [("q1", 'push', None)], "b": [(Pda.error_state.state_id, None, None)]}), - "q1": (False, {"a": [("q1", 'push', None)], "b": [("q2", 'pop', "a")]}), - "q2": (True, {"a": [(Pda.error_state.state_id, None, None)], "b": [("q2", 'pop', "a")]}), - } - pda = Pda.from_state_setup(state_setup, "q0") - return pda - - -def pda_for_L2(): - state_setup = { - "q0": (False, {"a": [("q1", 'push', None)], "b": [("q1", 'push', None)], - "c": [(Pda.error_state.state_id, None, None)], - "d": [(Pda.error_state.state_id, None, None)]}), - "q1": (False, {"a": [("q1", 'push', None)], "b": [("q1", 'push', None)], - "c": [("q2", 'pop', "a"), ("q2", 'pop', "b")], - "d": [("q2", 'pop', "a"), ("q2", 'pop', "b")]}), - "q2": (True, {"a": [(Pda.error_state.state_id, None, None)], - "b": [(Pda.error_state.state_id, None, None)], - "c": [("q2", 'pop', "a"), ("q2", 'pop', "b")], - "d": [("q2", 'pop', "a"), ("q2", 'pop', "b")]}), - } - pda = Pda.from_state_setup(state_setup, "q0") - return pda - - -def pda_for_L3(): - state_setup = { - "q0": (False, {"a": [("q0a", 'push', None)], - "c": [("q0c", 'push', None)], - }), - "q0a": (False, {"b": [("q1", 'push', None)]}), - "q0c": (False, {"d": [("q1", 'push', None)]}), - "q1": (False, {"a": [("q1a", 'push', None)], - "c": [("q1c", 'push', None)], - "e": [("q2e", 'pop', "b"), ("q2e", 'pop', "d")], - "g": [("q2g", 'pop', "b"), ("q2g", 'pop', "d")], # stack should actually be redundant - }), - "q1a": (False, {"b": [("q1", 'push', None)]}), - "q1c": (False, {"d": [("q1", 'push', None)]}), - "q2e": (False, {"f": [("q2", 'pop', "a"), ("q2", 'pop', "c")]}), - "q2g": (False, {"h": [("q2", 'pop', "a"), ("q2", 'pop', "c")]}), - "q2": (True, {"e": [("q2e", 'pop', "b"), ("q2e", 'pop', "d")], - "g": [("q2g", 'pop', "b"), ("q2g", 'pop', "d")]}) - } - pda = Pda.from_state_setup(state_setup, "q0") - return pda - - -def pda_for_L4(): - state_setup = { - "q0": (False, {"a": [("q01", 'push', None)], "b": [(Pda.error_state.state_id, None, None)]}), - "q01": (False, {"b": [("q1", 'push', None)], "a": [(Pda.error_state.state_id, None, None)]}), - - "q1": (False, {"a": [("q11", 'push', None)], "b": [(Pda.error_state.state_id, None, None)], - "c": [("q21", 'pop', "b")]}), - "q11": (False, {"b": [("q1", 'push', None)], "a": [(Pda.error_state.state_id, None, None)]}), - "q21": (False, {"d": [("q2", 'pop', "a")]}), - "q2": (True, {"c": [("q21", 'pop', "b")]}), - } - pda = Pda.from_state_setup(state_setup, "q0") - return pda - - -def pda_for_L5(): - state_setup = { - "q0": (False, {"a": [("q01", 'push', None)]}), - "q01": (False, {"b": [("q02", 'push', None)]}), - "q02": (False, {"c": [("q1", 'push', None)]}), - "q1": (False, {"a": [("q11", 'push', None)], - "d": [("q21", 'pop', "c")]}), - "q11": (False, {"b": [("q12", 'push', None)]}), - "q12": (False, {"c": [("q1", 'push', None)]}), - "q21": (False, {"e": [("q22", 'pop', "b")]}), - "q22": (False, {"f": [("q2", 'pop', "a")]}), - "q2": (True, {"d": [("q21", 'pop', "c")]}), - } - pda = Pda.from_state_setup(state_setup, "q0") - return pda - - -def pda_for_L7(): - # Dyck order 2 - state_setup = { - "q0": (False, {"(": [("q1", 'push', None)], - "[": [("q1", 'push', None)], # exclude empty seq - }), - "q1": (True, {"(": [("q1", 'push', None)], - "[": [("q1", 'push', None)], - ")": [("q1", 'pop', "(")], - "]": [("q1", 'pop', "[")] - }), - } - pda = Pda.from_state_setup(state_setup, "q0") - return pda - - -def pda_for_L8(): - # Dyck order 3 - state_setup = { - "q0": (False, {"(": [("q1", 'push', None)], - "[": [("q1", 'push', None)], - "{": [("q1", 'push', None)], - }), - "q1": (True, {"(": [("q1", 'push', None)], - "[": [("q1", 'push', None)], - "{": [("q1", 'push', None)], - ")": [("q1", 'pop', "(")], - "]": [("q1", 'pop', "[")], - "}": [("q1", 'pop', "{")], - }), - } - pda = Pda.from_state_setup(state_setup, "q0") - return pda - - -def pda_for_L9(): - # Dyck order 4 - state_setup = { - "q0": (False, {"(": [("q1", 'push', None)], - "[": [("q1", 'push', None)], - "{": [("q1", 'push', None)], - "<": [("q1", 'push', None)], - }), - "q1": (True, {"(": [("q1", 'push', None)], - "[": [("q1", 'push', None)], - "{": [("q1", 'push', None)], - "<": [("q1", 'push', None)], - ")": [("q1", 'pop', "(")], - "]": [("q1", 'pop', "[")], - "}": [("q1", 'pop', "{")], - ">": [("q1", 'pop', ">")], - }), - } - pda = Pda.from_state_setup(state_setup, "q0") - return pda - - -def pda_for_L10(): - # RE Dyck order 1 - state_setup = { - "q0": (False, {"a": [("qa", 'push', None)], - }), - "qa": (False, {"b": [("qb", None, None)], - }), - "qb": (False, {"c": [("qc", None, None)], - }), - "qc": (False, {"d": [("qd", None, None)], - }), - "qd": (False, {"e": [("q1", None, None)], - }), - "q1": (True, {"a": [("qa", 'push', None)], - "v": [("qv", 'pop', "a")]}), - "qv": (False, {"w": [("qw", None, None)]}), - "qw": (False, {"x": [("qx", None, None)]}), - "qx": (False, {"y": [("qy", None, None)]}), - "qy": (False, {"z": [("q1", None, None)]}) - } - pda = Pda.from_state_setup(state_setup, "q0") - return pda - - -def pda_for_L11(): - # RE Dyck order 1 - state_setup = { - "q0": (False, {"a": [("qa", 'push', None)], - "c": [("q1", 'push', None)], - }), - "qa": (False, {"b": [("q1", None, None)], - }), - "q1": (True, {"a": [("qa", 'push', None)], - "c": [("q1", 'push', None)], - "d": [("qd", 'pop', "a"), ("qd", 'pop', "c")], - "f": [("q1", 'pop', "a"), ("q1", 'pop', "c")]}), - "qd": (False, {"e": [("q1", None, None)]}) - } - pda = Pda.from_state_setup(state_setup, "q0") - return pda - - -def pda_for_L12(): - # Dyck order 2 (single-nested) - state_setup = { - "q0": (False, {"(": [("q1", 'push', None)], - "[": [("q1", 'push', None)], # exclude empty seq - }), - "q1": (False, {"(": [("q1", 'push', None)], - "[": [("q1", 'push', None)], - ")": [("q2", 'pop', "(")], - "]": [("q2", 'pop', "[")]}), - "q2": (True, { - ")": [("q2", 'pop', "(")], - "]": [("q2", 'pop', "[")] - }), - } - pda = Pda.from_state_setup(state_setup, "q0") - return pda - - -def pda_for_L13(): - # Dyck order 1 - state_setup = { - "q0": (False, {"(": [("q1", 'push', None)], - "a": [("q1", None, None)], - "b": [("q1", None, None)], - "c": [("q1", None, None)], # exclude empty seq - }), - "q1": (True, {"(": [("q1", 'push', None)], - ")": [("q1", 'pop', "(")], - "a": [("q1", None, None)], - "b": [("q1", None, None)], - "c": [("q1", None, None)] - }), - } - pda = Pda.from_state_setup(state_setup, "q0") - return pda - - -def pda_for_L14(): - # Dyck order 2 - state_setup = { - "q0": (False, {"(": [("q1", 'push', None)], - "[": [("q1", 'push', None)], - "a": [("q1", None, None)], - "b": [("q1", None, None)], - "c": [("q1", None, None)], # exclude empty seq - }), - "q1": (True, {"(": [("q1", 'push', None)], - "[": [("q1", 'push', None)], - ")": [("q1", 'pop', "(")], - "]": [("q1", 'pop', "[")], - "a": [("q1", None, None)], - "b": [("q1", None, None)], - "c": [("q1", None, None)] - }), - } - pda = Pda.from_state_setup(state_setup, "q0") - return pda - - -def pda_for_L15(): - # Dyck order 1 - state_setup = { - "q0": (False, {"(": [("q1", 'push', None)], - "a": [("qa", None, None)], - "d": [("q1", None, None)], # exclude empty seq - }), - "q1": (True, {"(": [("q1", 'push', None)], - ")": [("q1", 'pop', "(")], - "a": [("qa", None, None)], - "d": [("q1", None, None)], - }), - "qa": (False, {"b": [("qb", None, None)], - }), - "qb": (False, {"c": [("q1", None, None)], - }) - } - pda = Pda.from_state_setup(state_setup, "q0") - return pda diff --git a/aalpy/utils/BenchmarkSULs.py b/aalpy/utils/BenchmarkSULs.py index 4af23a8b..3cfa36b9 100644 --- a/aalpy/utils/BenchmarkSULs.py +++ b/aalpy/utils/BenchmarkSULs.py @@ -388,7 +388,7 @@ def get_small_pomdp(): return Mdp(q0, [q0, q1, q2, q3, q4]) -def is_balanced(test_string, call_return_map): +def is_balanced(test_string, call_return_map, allow_empty_string): stack = [] # Create a set of open and close characters for faster lookup open_chars = set(call_return_map.keys()) @@ -406,16 +406,17 @@ def is_balanced(test_string, call_return_map): if call_return_map[last_open] != char: return False - return not stack and len(test_string) > 0 + return not stack if allow_empty_string else not stack and len(test_string) > 0 -def get_balanced_string_sul(call_return_map): +def get_balanced_string_sul(call_return_map, allow_empty_string): from aalpy.base import SUL class BalancedStringSUL(SUL): - def __init__(self, call_return_map): + def __init__(self, call_return_map, allow_empty_string): super(BalancedStringSUL, self).__init__() self.call_return_map = call_return_map + self.allow_empty_string = allow_empty_string self.sting_under_test = [] def pre(self): @@ -427,7 +428,6 @@ def post(self): def step(self, letter): if letter: self.sting_under_test += letter - return is_balanced(self.sting_under_test, self.call_return_map) - - return BalancedStringSUL(call_return_map) + return is_balanced(self.sting_under_test, self.call_return_map, self.allow_empty_string) + return BalancedStringSUL(call_return_map, allow_empty_string) diff --git a/aalpy/utils/BenchmarkSevpaModels.py b/aalpy/utils/BenchmarkSevpaModels.py index 6ddda2c0..01a8bf38 100644 --- a/aalpy/utils/BenchmarkSevpaModels.py +++ b/aalpy/utils/BenchmarkSevpaModels.py @@ -1,355 +1,6 @@ from aalpy.automata.Sevpa import Sevpa, SevpaAlphabet -# def sevpa_for_L1(): -# # we always ensure that n >= 1 -# -# call_set = {'a'} -# return_set = {'b'} -# internal_set = {} -# -# state_setup = { -# "q0": (False, {"a": [("q1", 'push', None)], "b": [(Sevpa.error_state.state_id, None, None)]}), -# "q1": (False, {"a": [("q1", 'push', None)], "b": [("q2", 'pop', "a")]}), -# "q2": (True, {"a": [(Sevpa.error_state.state_id, None, None)], "b": [("q2", 'pop', "a")]}), -# } -# sevpa = Sevpa.from_state_setup(state_setup, "q0", call_set, return_set, internal_set) -# return sevpa - - -# def vpa_for_L2(): -# -# call_set = {'a', 'b'} -# return_set = {'c', 'd'} -# internal_set = {} -# -# state_setup = { -# "q0": (False, {"a": [("q1", 'push', None)], "b": [("q1", 'push', None)], -# "c": [(Vpa.error_state.state_id, None, None)], -# "d": [(Vpa.error_state.state_id, None, None)]}), -# "q1": (False, {"a": [("q1", 'push', None)], "b": [("q1", 'push', None)], -# "c": [("q2", 'pop', "a"), ("q2", 'pop', "b")], -# "d": [("q2", 'pop', "a"), ("q2", 'pop', "b")]}), -# "q2": (True, {"a": [(Vpa.error_state.state_id, None, None)], -# "b": [(Vpa.error_state.state_id, None, None)], -# "c": [("q2", 'pop', "a"), ("q2", 'pop', "b")], -# "d": [("q2", 'pop', "a"), ("q2", 'pop', "b")]}), -# } -# vpa = Vpa.from_state_setup(state_setup, "q0", call_set, return_set, internal_set) -# return vpa -# -# -# def vpa_for_L3(): -# -# call_set = {'a', 'c', 'b', 'd'} -# return_set = {'e', 'g', 'f', 'h'} -# internal_set = {} -# -# state_setup = { -# "q0": (False, {"a": [("q0a", 'push', None)], -# "c": [("q0c", 'push', None)], -# }), -# "q0a": (False, {"b": [("q1", 'push', None)]}), -# "q0c": (False, {"d": [("q1", 'push', None)]}), -# "q1": (False, {"a": [("q1a", 'push', None)], -# "c": [("q1c", 'push', None)], -# "e": [("q2e", 'pop', "b"), ("q2e", 'pop', "d")], -# "g": [("q2g", 'pop', "b"), ("q2g", 'pop', "d")], # stack should actually be redundant -# }), -# "q1a": (False, {"b": [("q1", 'push', None)]}), -# "q1c": (False, {"d": [("q1", 'push', None)]}), -# "q2e": (False, {"f": [("q2", 'pop', "a"), ("q2", 'pop', "c")]}), -# "q2g": (False, {"h": [("q2", 'pop', "a"), ("q2", 'pop', "c")]}), -# "q2": (True, {"e": [("q2e", 'pop', "b"), ("q2e", 'pop', "d")], -# "g": [("q2g", 'pop', "b"), ("q2g", 'pop', "d")]}) -# } -# vpa = Vpa.from_state_setup(state_setup, "q0", call_set, return_set, internal_set) -# return vpa -# -# -# def vpa_for_L4(): -# -# call_set = {'a', 'b'} -# return_set = {'c', 'd'} -# internal_set = {} -# -# state_setup = { -# "q0": (False, {"a": [("q01", 'push', None)], "b": [(Vpa.error_state.state_id, None, None)]}), -# "q01": (False, {"b": [("q1", 'push', None)], "a": [(Vpa.error_state.state_id, None, None)]}), -# -# "q1": (False, {"a": [("q11", 'push', None)], "b": [(Vpa.error_state.state_id, None, None)], -# "c": [("q21", 'pop', "b")]}), -# "q11": (False, {"b": [("q1", 'push', None)], "a": [(Vpa.error_state.state_id, None, None)]}), -# "q21": (False, {"d": [("q2", 'pop', "a")]}), -# "q2": (True, {"c": [("q21", 'pop', "b")]}), -# } -# vpa = Vpa.from_state_setup(state_setup, "q0", call_set, return_set, internal_set) -# return vpa -# -# -# def vpa_for_L5(): -# -# call_set = {'a', 'b', 'c'} -# return_set = {'d', 'e', 'f'} -# internal_set = {} -# -# state_setup = { -# "q0": (False, {"a": [("q01", 'push', None)]}), -# "q01": (False, {"b": [("q02", 'push', None)]}), -# "q02": (False, {"c": [("q1", 'push', None)]}), -# "q1": (False, {"a": [("q11", 'push', None)], -# "d": [("q21", 'pop', "c")]}), -# "q11": (False, {"b": [("q12", 'push', None)]}), -# "q12": (False, {"c": [("q1", 'push', None)]}), -# "q21": (False, {"e": [("q22", 'pop', "b")]}), -# "q22": (False, {"f": [("q2", 'pop', "a")]}), -# "q2": (True, {"d": [("q21", 'pop', "c")]}), -# } -# vpa = Vpa.from_state_setup(state_setup, "q0", call_set, return_set, internal_set) -# return vpa -# -# -# def vpa_for_L7(): -# # Dyck order 2 -# -# call_set = {'(', '['} -# return_set = {')', ']'} -# internal_set = {} -# -# state_setup = { -# "q0": (False, {"(": [("q1", 'push', None)], -# "[": [("q1", 'push', None)], # exclude empty seq -# }), -# "q1": (True, {"(": [("q1", 'push', None)], -# "[": [("q1", 'push', None)], -# ")": [("q1", 'pop', "(")], -# "]": [("q1", 'pop', "[")] -# }), -# } -# vpa = Vpa.from_state_setup(state_setup, "q0", call_set, return_set, internal_set) -# return vpa -# -# -# def vpa_for_L8(): -# # Dyck order 3 -# -# call_set = {'(', '[', '{'} -# return_set = {')', ']', '}'} -# internal_set = {} -# -# state_setup = { -# "q0": (False, {"(": [("q1", 'push', None)], -# "[": [("q1", 'push', None)], -# "{": [("q1", 'push', None)], -# }), -# "q1": (True, {"(": [("q1", 'push', None)], -# "[": [("q1", 'push', None)], -# "{": [("q1", 'push', None)], -# ")": [("q1", 'pop', "(")], -# "]": [("q1", 'pop', "[")], -# "}": [("q1", 'pop', "{")], -# }), -# } -# vpa = Vpa.from_state_setup(state_setup, "q0", call_set, return_set, internal_set) -# return vpa -# -# -# def vpa_for_L9(): -# # Dyck order 4 -# -# call_set = {'(', '[', '{', '<'} -# return_set = {')', ']', '}', '>'} -# internal_set = {} -# -# state_setup = { -# "q0": (False, {"(": [("q1", 'push', None)], -# "[": [("q1", 'push', None)], -# "{": [("q1", 'push', None)], -# "<": [("q1", 'push', None)], -# }), -# "q1": (True, {"(": [("q1", 'push', None)], -# "[": [("q1", 'push', None)], -# "{": [("q1", 'push', None)], -# "<": [("q1", 'push', None)], -# ")": [("q1", 'pop', "(")], -# "]": [("q1", 'pop', "[")], -# "}": [("q1", 'pop', "{")], -# ">": [("q1", 'pop', "{")], -# }), -# } -# vpa = Vpa.from_state_setup(state_setup, "q0", call_set, return_set, internal_set) -# return vpa -# -# -# def vpa_for_L10(): -# # RE Dyck order 1 -# -# call_set = {'a'} -# return_set = {'v'} -# internal_set = {'b', 'c', 'd', ' e', 'w', 'x', 'y', 'z'} -# -# state_setup = { -# "q0": (False, {"a": [("qa", 'push', None)], -# }), -# "qa": (False, {"b": [("qb", None, None)], -# }), -# "qb": (False, {"c": [("qc", None, None)], -# }), -# "qc": (False, {"d": [("qd", None, None)], -# }), -# "qd": (False, {"e": [("q1", None, None)], -# }), -# "q1": (True, {"a": [("qa", 'push', None)], -# "v": [("qv", 'pop', "a")]}), -# "qv": (False, {"w": [("qw", None, None)]}), -# "qw": (False, {"x": [("qx", None, None)]}), -# "qx": (False, {"y": [("qy", None, None)]}), -# "qy": (False, {"z": [("q1", None, None)]}) -# } -# vpa = Vpa.from_state_setup(state_setup, "q0", call_set, return_set, internal_set) -# return vpa -# -# -# def vpa_for_L11(): -# # RE Dyck order 1 -# -# call_set = {'a', 'c'} -# return_set = {'d', 'f'} -# internal_set = {'b', 'e'} -# -# state_setup = { -# "q0": (False, {"a": [("qa", 'push', None)], -# "c": [("q1", 'push', None)], -# }), -# "qa": (False, {"b": [("q1", None, None)], -# }), -# "q1": (True, {"a": [("qa", 'push', None)], -# "c": [("q1", 'push', None)], -# "d": [("qd", 'pop', "a"), ("qd", 'pop', "c")], -# "f": [("q1", 'pop', "a"), ("q1", 'pop', "c")]}), -# "qd": (False, {"e": [("q1", None, None)]}) -# } -# vpa = Vpa.from_state_setup(state_setup, "q0", call_set, return_set, internal_set) -# return vpa -# -# -# def vpa_for_L12(): -# # Dyck order 2 (single-nested) -# -# call_set = {'(', '['} -# return_set = {')', ']'} -# internal_set = {} -# -# state_setup = { -# "q0": (False, {"(": [("q1", 'push', None)], -# "[": [("q1", 'push', None)], # exclude empty seq -# }), -# "q1": (False, {"(": [("q1", 'push', None)], -# "[": [("q1", 'push', None)], -# ")": [("q2", 'pop', "(")], -# "]": [("q2", 'pop', "[")]}), -# "q2": (True, { -# ")": [("q2", 'pop', "(")], -# "]": [("q2", 'pop', "[")] -# }), -# } -# vpa = Vpa.from_state_setup(state_setup, "q0", call_set, return_set, internal_set) -# return vpa -# -# -# def vpa_for_L13(): -# # Dyck order 1 -# -# call_set = {'('} -# return_set = {')'} -# internal_set = {'a', 'b', 'c'} -# -# state_setup = { -# "q0": (False, {"(": [("q1", 'push', None)], -# "a": [("q1", None, None)], -# "b": [("q1", None, None)], -# "c": [("q1", None, None)], # exclude empty seq -# }), -# "q1": (True, {"(": [("q1", 'push', None)], -# ")": [("q1", 'pop', "(")], -# "a": [("q1", None, None)], -# "b": [("q1", None, None)], -# "c": [("q1", None, None)] -# }), -# } -# vpa = Vpa.from_state_setup(state_setup, "q0", call_set, return_set, internal_set) -# return vpa -# -# -# def vpa_for_L14(): -# # Dyck order 2 -# -# call_set = {'(', '['} -# return_set = {')', ']'} -# internal_set = {'a', 'b', 'c'} -# -# state_setup = { -# "q0": (False, {"(": [("q1", 'push', None)], -# "[": [("q1", 'push', None)], -# "a": [("q1", None, None)], -# "b": [("q1", None, None)], -# "c": [("q1", None, None)], # exclude empty seq -# }), -# "q1": (True, {"(": [("q1", 'push', None)], -# "[": [("q1", 'push', None)], -# ")": [("q1", 'pop', "(")], -# "]": [("q1", 'pop', "[")], -# "a": [("q1", None, None)], -# "b": [("q1", None, None)], -# "c": [("q1", None, None)] -# }), -# } -# vpa = Vpa.from_state_setup(state_setup, "q0", call_set, return_set, internal_set) -# return vpa -# -# -# def vpa_for_L15(): -# # Dyck order 1 -# -# call_set = {'('} -# return_set = {')'} -# internal_set = {'a', 'b', 'c', 'd'} -# -# state_setup = { -# "q0": (False, {"(": [("q1", 'push', None)], -# "a": [("qa", None, None)], -# "d": [("q1", None, None)], # exclude empty seq -# }), -# "q1": (True, {"(": [("q1", 'push', None)], -# ")": [("q1", 'pop', "(")], -# "a": [("qa", None, None)], -# "d": [("q1", None, None)], -# }), -# "qa": (False, {"b": [("qb", None, None)], -# }), -# "qb": (False, {"c": [("q1", None, None)], -# }) -# } -# vpa = Vpa.from_state_setup(state_setup, "q0", call_set, return_set, internal_set) -# return vpa -# -# -# def vpa_for_L16(): -# # just a testing language -# call_set = {'a'} -# return_set = {'b'} -# internal_set = {} -# -# state_setup = { -# "q0": (False, {"a": [("q1", 'push', "$")]}), -# "q1": (False, {"a": [("q1", 'push', "x")], -# "b": [("q1", 'pop', "x"), ("q2", 'pop', "$")], -# }), -# "q2": (True, {}) -# } -# vpa = Vpa.from_state_setup(state_setup, "q0", call_set, return_set, internal_set) -# return vpa - def sevpa_for_L12_refined(): # Like L12 Language (Balanced parathesis) but the state setup is different @@ -372,6 +23,7 @@ def sevpa_for_L12_refined(): sevpa = Sevpa.from_state_setup(state_setup, "q0", input_alphabet) return sevpa + def sevpa_congruence_for_vpa_paper(): # This is a 1-SEVPA which accepts the language L = c1L1r + c2L2r # L1 is a regular language which has an even number of a's @@ -379,7 +31,7 @@ def sevpa_congruence_for_vpa_paper(): call_set = {'(', '['} return_set = {')', ']'} - internal_set = {'x'} + internal_set = {'x'} input_alphabet = SevpaAlphabet(internal_alphabet=internal_set, call_alphabet=call_set, return_alphabet=return_set) @@ -395,8 +47,3 @@ def sevpa_congruence_for_vpa_paper(): } sevpa = Sevpa.from_state_setup(state_setup, "q0", input_alphabet) return sevpa - - - - - diff --git a/aalpy/utils/BenchmarkVpaModels.py b/aalpy/utils/BenchmarkVpaModels.py index 02289094..1a18e079 100644 --- a/aalpy/utils/BenchmarkVpaModels.py +++ b/aalpy/utils/BenchmarkVpaModels.py @@ -1,5 +1,6 @@ from aalpy.automata.Vpa import Vpa + def vpa_for_L1(): # we always ensure that n >= 1 @@ -17,7 +18,6 @@ def vpa_for_L1(): def vpa_for_L2(): - call_set = {'a', 'b'} return_set = {'c', 'd'} internal_set = {} @@ -39,7 +39,6 @@ def vpa_for_L2(): def vpa_for_L3(): - call_set = {'a', 'c', 'b', 'd'} return_set = {'e', 'g', 'f', 'h'} internal_set = {} @@ -67,7 +66,6 @@ def vpa_for_L3(): def vpa_for_L4(): - call_set = {'a', 'b'} return_set = {'c', 'd'} internal_set = {} @@ -87,7 +85,6 @@ def vpa_for_L4(): def vpa_for_L5(): - call_set = {'a', 'b', 'c'} return_set = {'d', 'e', 'f'} internal_set = {} @@ -348,6 +345,3 @@ def vpa_for_L16(): } vpa = Vpa.from_state_setup(state_setup, "q0", call_set, return_set, internal_set) return vpa - - - diff --git a/aalpy/utils/FileHandler.py b/aalpy/utils/FileHandler.py index 84c89eda..baf2042e 100644 --- a/aalpy/utils/FileHandler.py +++ b/aalpy/utils/FileHandler.py @@ -1,4 +1,4 @@ -import os +import sys import sys import traceback from pathlib import Path @@ -6,11 +6,11 @@ from pydot import Dot, Node, Edge, graph_from_dot_file from aalpy.automata import Dfa, MooreMachine, Mdp, Onfsm, MealyState, DfaState, MooreState, MealyMachine, \ - MdpState, StochasticMealyMachine, StochasticMealyState, OnfsmState, MarkovChain, McState, Pda, Vpa, Sevpa + MdpState, StochasticMealyMachine, StochasticMealyState, OnfsmState, MarkovChain, McState, Vpa, Sevpa file_types = ['dot', 'png', 'svg', 'pdf', 'string'] automaton_types = {Dfa: 'dfa', MealyMachine: 'mealy', MooreMachine: 'moore', Mdp: 'mdp', - StochasticMealyMachine: 'smm', Onfsm: 'onfsm', MarkovChain: 'mc', Pda: 'pda', Vpa: 'vpa', Sevpa: 'vpa'} + StochasticMealyMachine: 'smm', Onfsm: 'onfsm', MarkovChain: 'mc', Vpa: 'vpa', Sevpa: 'vpa'} def _wrap_label(label): diff --git a/aalpy/utils/__init__.py b/aalpy/utils/__init__.py index 25802f01..5112e636 100644 --- a/aalpy/utils/__init__.py +++ b/aalpy/utils/__init__.py @@ -2,7 +2,6 @@ generate_random_moore_machine, generate_random_markov_chain, generate_random_deterministic_automata from .AutomatonGenerators import generate_random_mdp, generate_random_ONFSM from .BenchmarkSULs import * -from .BenchmarkPdaModels import * from .DataHandler import DataHandler, CharacterTokenizer, DelimiterTokenizer, IODelimiterTokenizer from .FileHandler import save_automaton_to_file, load_automaton_from_file, visualize_automaton from .ModelChecking import model_check_experiment, mdp_2_prism_format, model_check_properties, get_properties_file, \ diff --git a/test_edi.py b/test_edi.py index 00efbf26..6d2e33bc 100644 --- a/test_edi.py +++ b/test_edi.py @@ -1,10 +1,8 @@ from Examples import learning_context_free_grammar_example from aalpy.SULs.AutomataSUL import SevpaSUL, VpaSUL -from aalpy.automata.Pda import generate_data_from_pda -from aalpy.learning_algs import run_KV_vpda, run_KV +from aalpy.learning_algs import run_KV from aalpy.oracles import RandomWordEqOracle, RandomWalkEqOracle from aalpy.utils import visualize_automaton, get_Angluin_dfa -from aalpy.utils.BenchmarkPdaModels import * from aalpy.utils.BenchmarkVpaModels import * from aalpy.utils.BenchmarkSevpaModels import * @@ -16,9 +14,9 @@ # test test test -learning_context_free_grammar_example() +# learning_context_free_grammar_example() -exit() +# exit() from random import seed @@ -27,10 +25,7 @@ print(f'VPA {i + 1 if i < 6 else i + 2}') # 16 works - for i in range(100): - if i < 9: - continue - seed(i) + for i in range(10): print(i) model_under_learning = vpa @@ -45,5 +40,7 @@ model = run_KV(alphabet=alphabet, sul=sul, eq_oracle=eq_oracle, automaton_type='vpa', print_level=2, cex_processing='rs') + e = 1 + # exit() From 6ec183e1344462b2761d0812135ce625607262ec Mon Sep 17 00:00:00 2001 From: Edi Muskardin Date: Fri, 20 Oct 2023 13:40:28 +0200 Subject: [PATCH 22/62] update SEVPA --- aalpy/automata/Sevpa.py | 146 +++++++++------------------- aalpy/utils/BenchmarkSevpaModels.py | 9 +- test_edi.py | 2 +- 3 files changed, 49 insertions(+), 108 deletions(-) diff --git a/aalpy/automata/Sevpa.py b/aalpy/automata/Sevpa.py index 65e3d5d7..276d2849 100644 --- a/aalpy/automata/Sevpa.py +++ b/aalpy/automata/Sevpa.py @@ -24,7 +24,7 @@ def get_merged_alphabet(self) -> list: class SevpaState(AutomatonState): """ - Single state of a deterministic finite automaton. + Single state of a 1-SEVPA. """ def __init__(self, state_id, is_accepting=False): @@ -55,7 +55,6 @@ def __init__(self, initial_state: SevpaState, states: list[SevpaState], input_al self.states = states self.input_alphabet = input_alphabet self.current_state = None - self.call_balance = 0 self.stack = [] def reset_to_initial(self): @@ -65,7 +64,6 @@ def reset_to_initial(self): def reset(self): self.current_state = self.initial_state self.stack = [self.empty] - self.call_balance = 0 return self.current_state.is_accepting and self.top() == self.empty def top(self): @@ -74,74 +72,47 @@ def top(self): def pop(self): return self.stack.pop() - def possible(self, letter): - """ - Checks if a certain step on the automaton is possible - - TODO: Adaptation for Stack content ? - """ + def step(self, letter): if self.current_state == Sevpa.error_state: - return True - # push is always possible + return False + + if letter is None: + return self.current_state.is_accepting and self.top() == self.empty + if letter in self.input_alphabet.call_alphabet: - return True - if letter is not None: - transitions = self.current_state.transitions[letter] - possible_trans = [] - for t in transitions: - if t.symbol in self.input_alphabet.call_alphabet: - possible_trans.append(t) - elif t.symbol in self.input_alphabet.return_alphabet: - if t.stack_guard == self.top(): - possible_trans.append(t) - elif t.symbol in self.input_alphabet.internal_alphabet: - possible_trans.append(t) - else: - assert False - # trans = [t for t in transitions if t.stack_guard is None or self.top() == t.stack_guard] - assert len(possible_trans) < 2 - if len(possible_trans) == 0: - return False + self.stack.append((self.current_state.state_id, letter)) + self.current_state = self.initial_state + return self.current_state.is_accepting and self.top() == self.empty + + # get possible transitions + transitions = self.current_state.transitions[letter] + possible_transitions = [] + for t in transitions: + if t.symbol in self.input_alphabet.return_alphabet: + if t.stack_guard == self.top(): + possible_transitions.append(t) + elif t.symbol in self.input_alphabet.internal_alphabet: + possible_transitions.append(t) else: - return True - return False + assert False - def step(self, letter): - if self.current_state == Sevpa.error_state: - return False - if not self.possible(letter): + assert len(possible_transitions) < 2 + # No transition is possible + if len(possible_transitions) == 0: self.current_state = Sevpa.error_state return False - if letter is not None: - transitions = self.current_state.transitions[letter] - possible_trans = [] - for t in transitions: - if t.symbol in self.input_alphabet.call_alphabet: - possible_trans.append(t) - elif t.symbol in self.input_alphabet.return_alphabet: - if t.stack_guard == self.top(): - possible_trans.append(t) - elif t.symbol in self.input_alphabet.internal_alphabet: - possible_trans.append(t) - else: - assert False - - if letter in self.input_alphabet.call_alphabet: - assert (letter in self.input_alphabet.call_alphabet) # push letters must be in call set - self.stack.append((self.current_state.state_id, letter)) - self.current_state = self.initial_state - return self.current_state.is_accepting and self.top() == self.empty - - assert len(possible_trans) < 2 - trans = possible_trans[0] - self.current_state = trans.target - - if trans.action == 'pop': - assert (letter in self.input_alphabet.return_alphabet) # pop letters must be in return set - if len(self.stack) <= 1: # empty stack elem should always be there - self.current_state = Sevpa.error_state - return False - self.stack.pop() + + taken_transition = possible_transitions[0] + self.current_state = taken_transition.target + + if taken_transition.action == 'pop': + # pop letters must be in return set + assert (letter in self.input_alphabet.return_alphabet) + # empty stack elem should always be on the stack + if len(self.stack) <= 1: + self.current_state = Sevpa.error_state + return False + self.stack.pop() return self.current_state.is_accepting and self.top() == self.empty @@ -163,8 +134,8 @@ def to_state_setup(self): # ensure prefixes are computed # self.compute_prefixes() - - sorted_states = sorted(self.states, key=lambda x: len(x.prefix)) + # TODO + sorted_states = sorted(self.states, key=lambda x: len(x.state_id)) for s in sorted_states: state_setup_dict[s.state_id] = ( s.is_accepting, {k: (v.target.state_id, v.action) for k, v in s.transitions.items()}) @@ -173,31 +144,11 @@ def to_state_setup(self): @staticmethod def from_state_setup(state_setup: dict, init_state_id, input_alphabet: SevpaAlphabet): - """ - First state in the state setup is the initial state. - Example state setup: - state_setup = { - "a": (True, {"x": ("b1",PUSH), "y": ("a", NONE)}), - "b1": (False, {"x": ("b2", PUSH), "y": "a"}), - "b2": (True, {"x": "b3", "y": "a"}), - "b3": (False, {"x": "b4", "y": "a"}), - "b4": (False, {"x": "c", "y": "a"}), - "c": (True, {"x": "a", "y": "a"}), - } - - Args: - - state_setup: map from state_id to tuple(output and transitions_dict) - - Returns: - - PDA - """ - # state_setup should map from state_id to tuple(is_accepting and transitions_dict) # build states with state_id and output states = {key: SevpaState(key, val[0]) for key, val in state_setup.items()} states[Sevpa.error_state.state_id] = Sevpa.error_state # PdaState(Pda.error_state,False) + # add transitions to states for state_id, state in states.items(): if state_id == Sevpa.error_state.state_id: @@ -210,26 +161,17 @@ def from_state_setup(state_setup: dict, init_state_id, input_alphabet: SevpaAlph stack_guard = (stack_guard[0], stack_guard[1]) trans = SevpaTransition(start=state, target=states[target_state_id], symbol=_input, action=action, stack_guard=stack_guard) - elif action == 'push': # In SEVPA you can only define return transitions and internal transitions - assert False - else: + elif action is None: trans = SevpaTransition(start=state, target=states[target_state_id], symbol=_input, action=None, stack_guard=None) + else: + assert False, 'Action must either be "pop" or None, note that there are no push actions ' \ + 'definitions in SEVPA' state.transitions[_input].append(trans) - # add call transitions - for call_letter in input_alphabet.call_alphabet: - trans = SevpaTransition(start=state, target=states[init_state_id], symbol=call_letter, action='push', - stack_guard=f'{state_id}{call_letter}') - state.transitions[call_letter].append(trans) - init_state = states[init_state_id] - # states to list - states = [state for state in states.values()] - - sevpa = Sevpa(init_state, states, input_alphabet) - return sevpa + return Sevpa(init_state, [state for state in states.values()], input_alphabet) def transform_access_sequance(self, state=None, stack_content=None) -> list[str]: diff --git a/aalpy/utils/BenchmarkSevpaModels.py b/aalpy/utils/BenchmarkSevpaModels.py index 01a8bf38..e6dac9ea 100644 --- a/aalpy/utils/BenchmarkSevpaModels.py +++ b/aalpy/utils/BenchmarkSevpaModels.py @@ -4,9 +4,9 @@ def sevpa_for_L12_refined(): # Like L12 Language (Balanced parathesis) but the state setup is different - call_set = {'(', '['} - return_set = {')', ']'} - internal_set = {'x'} + call_set = ['(', '['] + return_set = [')', ']'] + internal_set = ['x'] input_alphabet = SevpaAlphabet(internal_alphabet=internal_set, call_alphabet=call_set, return_alphabet=return_set) @@ -20,8 +20,7 @@ def sevpa_for_L12_refined(): "x": [("q0", None, None)] }), } - sevpa = Sevpa.from_state_setup(state_setup, "q0", input_alphabet) - return sevpa + return Sevpa.from_state_setup(state_setup, "q0", input_alphabet) def sevpa_congruence_for_vpa_paper(): diff --git a/test_edi.py b/test_edi.py index 6d2e33bc..987d9be8 100644 --- a/test_edi.py +++ b/test_edi.py @@ -25,7 +25,7 @@ print(f'VPA {i + 1 if i < 6 else i + 2}') # 16 works - for i in range(10): + for i in range(1): print(i) model_under_learning = vpa From cdb02c8a3b0ac24b3a15c1bff8d5e274eea6c732 Mon Sep 17 00:00:00 2001 From: Edi Muskardin Date: Fri, 20 Oct 2023 13:41:49 +0200 Subject: [PATCH 23/62] optimize imports --- aalpy/learning_algs/deterministic/ClassificationTree.py | 4 ++-- aalpy/learning_algs/deterministic/KV.py | 6 ++---- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/aalpy/learning_algs/deterministic/ClassificationTree.py b/aalpy/learning_algs/deterministic/ClassificationTree.py index 8d1863ae..4e05daac 100644 --- a/aalpy/learning_algs/deterministic/ClassificationTree.py +++ b/aalpy/learning_algs/deterministic/ClassificationTree.py @@ -1,8 +1,8 @@ from collections import defaultdict from typing import Union -from aalpy.automata import DfaState, Dfa, MealyState, MealyMachine, MooreState, MooreMachine, SevpaAlphabet, SevpaState, \ - SevpaTransition, Sevpa +from aalpy.automata import DfaState, Dfa, MealyState, MealyMachine, MooreState, MooreMachine, \ + SevpaAlphabet, SevpaState, SevpaTransition, Sevpa from aalpy.base import SUL from aalpy.learning_algs.deterministic.CounterExampleProcessing import rs_cex_processing diff --git a/aalpy/learning_algs/deterministic/KV.py b/aalpy/learning_algs/deterministic/KV.py index 2040cf6a..8e8561df 100644 --- a/aalpy/learning_algs/deterministic/KV.py +++ b/aalpy/learning_algs/deterministic/KV.py @@ -4,7 +4,7 @@ from aalpy.automata import Dfa, DfaState, MealyState, MealyMachine, MooreState, MooreMachine, \ Sevpa, SevpaState, SevpaAlphabet from aalpy.base import Oracle, SUL -from aalpy.utils.HelperFunctions import print_learning_info, visualize_classification_tree +from aalpy.utils.HelperFunctions import print_learning_info from .ClassificationTree import ClassificationTree from .CounterExampleProcessing import counterexample_successfully_processed from ...base.SUL import CacheSUL @@ -87,8 +87,6 @@ def run_KV(alphabet: Union[list, SevpaAlphabet], sul: SUL, eq_oracle: Oracle, au if automaton_type == 'mealy': initial_state.output_fun[a] = sul.query((a,))[-1] - # TODO this is quite ugly... do we need input alphbabet in the constructur of SEVPA? - # Input alphbaet for SVEPA/VPA should not be in a constructor, but you can get it with get_input_alphabet() if automaton_type != 'vpa': hypothesis = automaton_class[automaton_type](initial_state, [initial_state]) else: @@ -165,4 +163,4 @@ def run_KV(alphabet: Union[list, SevpaAlphabet], sul: SUL, eq_oracle: Oracle, au if return_data: return hypothesis, info - return hypothesis \ No newline at end of file + return hypothesis From 62a72123645f7fdfb97beb64a71b7978cdeb4fe1 Mon Sep 17 00:00:00 2001 From: Edi Muskardin Date: Fri, 20 Oct 2023 14:34:58 +0200 Subject: [PATCH 24/62] update random generation --- Examples.py | 16 ++++- aalpy/automata/Sevpa.py | 75 --------------------- aalpy/learning_algs/deterministic/KV.py | 2 +- aalpy/utils/AutomatonGenerators.py | 87 ++++++++++++++++++++++++- aalpy/utils/__init__.py | 2 +- test_edi.py | 33 ++++++++-- 6 files changed, 128 insertions(+), 87 deletions(-) diff --git a/Examples.py b/Examples.py index 27af9c5c..20f8589a 100644 --- a/Examples.py +++ b/Examples.py @@ -947,10 +947,20 @@ def learning_context_free_grammar_example(): from aalpy.utils.BenchmarkSULs import get_balanced_string_sul call_return_map = {'(': ')', '[': ']'} - balanced_string_sul = get_balanced_string_sul(call_return_map, allow_empty_string=True) sevpa_alphabet = SevpaAlphabet([], list(call_return_map.keys()), list(call_return_map.values())) - eq_oracle = RandomWordEqOracle(sevpa_alphabet.get_merged_alphabet(), balanced_string_sul, num_walks=1000) + # bounded deterministic approximation + balanced_string_sul = get_balanced_string_sul(call_return_map, allow_empty_string=False) + eq_oracle = RandomWordEqOracle(sevpa_alphabet.get_merged_alphabet(), balanced_string_sul, num_walks=1000, + min_walk_len=5, max_walk_len=30) + + learned_deterministic_approximation = run_KV(sevpa_alphabet.get_merged_alphabet(), + balanced_string_sul, eq_oracle, automaton_type='dfa', + max_learning_rounds=20) + + balanced_string_sul = get_balanced_string_sul(call_return_map, allow_empty_string=False) + eq_oracle = RandomWordEqOracle(sevpa_alphabet.get_merged_alphabet(), balanced_string_sul, num_walks=1000, + min_walk_len=5, max_walk_len=30) learned_model = run_KV(sevpa_alphabet, balanced_string_sul, eq_oracle, automaton_type='vpa') - learned_model.visualize() + learned_model.visualize() \ No newline at end of file diff --git a/aalpy/automata/Sevpa.py b/aalpy/automata/Sevpa.py index 276d2849..f62352ca 100644 --- a/aalpy/automata/Sevpa.py +++ b/aalpy/automata/Sevpa.py @@ -207,78 +207,3 @@ def create_daisy_hypothesis(initial_state, alphabet): return Sevpa(initial_state, [initial_state], alphabet) -def has_transition(state: SevpaState, transition_letter, stack_guard) -> bool: - transitions = state.transitions[transition_letter] - if transitions is not None: - if stack_guard is None: # internal transition - for transition in transitions: - if transition.symbol == transition_letter: - return True - else: # return transition - for transition in transitions: - if transition.stack_guard == stack_guard and transition.symbol == transition_letter: - return True - - return False - - -def generate_random_sevpa(alphabet: SevpaAlphabet, amount_states, acceptance_prob, return_transition_prob): - # TODO: for some reason the alphabet attributes get - # treated as sets which are don't have accessible elements via index - internal_alphabet = list(alphabet.internal_alphabet) - return_alphabet = list(alphabet.return_alphabet) - call_alphabet = list(alphabet.call_alphabet) - - state_list = [SevpaState('q0', random.uniform(0.0, 1.0) < acceptance_prob)] - for i in range(1, amount_states): # add a return transition - if internal_alphabet == 0 or random.uniform(0.0, 1.0) < return_transition_prob: - while True: - from_state = state_list[random.randint(0, len(state_list) - 1)] - return_letter = return_alphabet[random.randint(0, len(return_alphabet) - 1)] - stack_state = state_list[random.randint(0, len(state_list) - 1)] - call_letter = call_alphabet[random.randint(0, len(call_alphabet) - 1)] - stack_guard = f'{stack_state}{call_letter}' - if not has_transition(from_state, return_letter, stack_guard): - break - target_state = SevpaState(f'q{i}', random.uniform(0.0, 1.0) < acceptance_prob) - state_list.append(target_state) - from_state.transitions[return_letter].append( - SevpaTransition(from_state, target_state, return_letter, 'pop', stack_guard)) - else: # add an internal transition - while True: - from_state = state_list[random.randint(0, len(state_list) - 1)] - internal_letter = internal_alphabet[random.randint(0, len(internal_alphabet) - 1)] - if not has_transition(from_state, internal_letter, None): - break - target_state = SevpaState(f'q{i}', random.uniform(0.0, 1.0) < acceptance_prob) - state_list.append(target_state) - from_state.transitions[internal_letter].append( - SevpaTransition(from_state, target_state, internal_letter, None, None)) - - assert len(state_list) == amount_states - initial_state_id = random.randint(0, amount_states) - initial_state = state_list[initial_state_id] - - for state in state_list: - for internal_letter in internal_alphabet: - if state.transitions[internal_letter] is None: - target_state = state_list[random.randint(0, len(state_list) - 1)] - state.transitions[internal_letter].append( - SevpaTransition(state, target_state, internal_letter, None, None)) - - for call_letter in call_alphabet: - for stack_state in state_list: - stack_guard = f'{stack_state.state_id}{call_letter}' - for return_letter in return_alphabet: - if not has_transition(state, return_letter, stack_guard): - target_state = state_list[random.randint(0, len(state_list) - 1)] - state.transitions[return_letter].append( - SevpaTransition(state, target_state, return_letter, 'pop', stack_guard)) - - # add call transitions - for call_letter in call_alphabet: - trans = SevpaTransition(start=state, target=initial_state, symbol=call_letter, action='push', - stack_guard=f'{state.state_id}{call_letter}') - state.transitions[call_letter].append(trans) - - return Sevpa(initial_state, state_list, alphabet) diff --git a/aalpy/learning_algs/deterministic/KV.py b/aalpy/learning_algs/deterministic/KV.py index 8e8561df..4e1045a5 100644 --- a/aalpy/learning_algs/deterministic/KV.py +++ b/aalpy/learning_algs/deterministic/KV.py @@ -27,7 +27,7 @@ def run_KV(alphabet: Union[list, SevpaAlphabet], sul: SUL, eq_oracle: Oracle, au eq_oracle: equivalence oracle - automaton_type: type of automaton to be learned. One of 'dfa', 'mealy', 'moore' + automaton_type: type of automaton to be learned. One of 'dfa', 'mealy', 'moore', 'vpa' cex_processing: None for no counterexample processing, or 'rs' for Rivest & Schapire counterexample processing diff --git a/aalpy/utils/AutomatonGenerators.py b/aalpy/utils/AutomatonGenerators.py index 461095fd..3472b396 100644 --- a/aalpy/utils/AutomatonGenerators.py +++ b/aalpy/utils/AutomatonGenerators.py @@ -2,7 +2,8 @@ import warnings from aalpy.automata import Dfa, DfaState, MdpState, Mdp, MealyMachine, MealyState, \ - MooreMachine, MooreState, OnfsmState, Onfsm, MarkovChain, McState, StochasticMealyState, StochasticMealyMachine + MooreMachine, MooreState, OnfsmState, Onfsm, MarkovChain, McState, StochasticMealyState, StochasticMealyMachine, \ + Sevpa, SevpaState, SevpaAlphabet, SevpaTransition def generate_random_deterministic_automata(automaton_type, @@ -425,3 +426,87 @@ def generate_random_markov_chain(num_states): return MarkovChain(states[0], states) +def _has_transition(state: SevpaState, transition_letter, stack_guard) -> bool: + transitions = state.transitions[transition_letter] + if transitions is not None: + if stack_guard is None: # internal transition + for transition in transitions: + if transition.symbol == transition_letter: + return True + else: # return transition + for transition in transitions: + if transition.stack_guard == stack_guard and transition.symbol == transition_letter: + return True + + return False + + +def generate_random_sevpa(num_states, internal_alphabet_size, call_alphabet_size, return_alphabet_size + , acceptance_prob, return_transition_prob): + + internal_alphabet = [f'i{i}' for i in range(internal_alphabet_size)] + call_alphabet = [f'c{i}' for i in range(call_alphabet_size)] + return_alphabet = [f'r{i}' for i in range(return_alphabet_size)] + + sevpa_alphabet = SevpaAlphabet(internal_alphabet, call_alphabet, return_alphabet) + + states = [SevpaState(f'q{i}', random.random() < acceptance_prob) for i in range(num_states)] + state_buffer = states.copy() + + for state in states: + if not internal_alphabet or random.uniform(0.0, 1.0) < return_transition_prob: + while True: + return_letter = random.choice(return_alphabet) + stack_state = random.choice(states) if not state_buffer else random.choice(state_buffer) + if stack_state in state_buffer: + state_buffer.remove(stack_state) + + call_letter = random.choice(call_alphabet) + stack_guard = (stack_state.state_id, call_letter) + + if not _has_transition(state, return_letter, stack_guard): + break + + target_state = random.choice(states) + state.transitions[return_letter].append( + SevpaTransition(state, target_state, return_letter, 'pop', stack_guard)) + else: + # add an internal transition + while True: + internal_letter = random.choice(internal_alphabet) + if not _has_transition(state, internal_letter, None): + break + + target_state = random.choice(states) if not state_buffer else random.choice(state_buffer) + if target_state in state_buffer: + state_buffer.remove(target_state) + + state.transitions[internal_letter].append( + SevpaTransition(state, target_state, internal_letter, None, None)) + + assert len(states) == num_states + initial_state = random.choice(states) + + for state in states: + for internal_letter in internal_alphabet: + if state.transitions[internal_letter] is None: + target_state = random.choice(states) + state.transitions[internal_letter].append( + SevpaTransition(state, target_state, internal_letter, None, None)) + + for call_letter in call_alphabet: + for stack_state in states: + stack_guard = (stack_state.state_id, call_letter) + for return_letter in return_alphabet: + if not _has_transition(state, return_letter, stack_guard): + target_state = states[random.randint(0, len(states) - 1)] + state.transitions[return_letter].append( + SevpaTransition(state, target_state, return_letter, 'pop', stack_guard)) + + # add call transitions + for call_letter in call_alphabet: + trans = SevpaTransition(start=state, target=initial_state, symbol=call_letter, action='push', + stack_guard=f'{state.state_id}{call_letter}') + state.transitions[call_letter].append(trans) + + return Sevpa(initial_state, states, sevpa_alphabet) diff --git a/aalpy/utils/__init__.py b/aalpy/utils/__init__.py index 5112e636..61ebd134 100644 --- a/aalpy/utils/__init__.py +++ b/aalpy/utils/__init__.py @@ -1,6 +1,6 @@ from .AutomatonGenerators import generate_random_dfa, generate_random_mealy_machine, generate_random_smm, \ generate_random_moore_machine, generate_random_markov_chain, generate_random_deterministic_automata -from .AutomatonGenerators import generate_random_mdp, generate_random_ONFSM +from .AutomatonGenerators import generate_random_mdp, generate_random_ONFSM, generate_random_sevpa from .BenchmarkSULs import * from .DataHandler import DataHandler, CharacterTokenizer, DelimiterTokenizer, IODelimiterTokenizer from .FileHandler import save_automaton_to_file, load_automaton_from_file, visualize_automaton diff --git a/test_edi.py b/test_edi.py index 987d9be8..e46942a4 100644 --- a/test_edi.py +++ b/test_edi.py @@ -2,10 +2,31 @@ from aalpy.SULs.AutomataSUL import SevpaSUL, VpaSUL from aalpy.learning_algs import run_KV from aalpy.oracles import RandomWordEqOracle, RandomWalkEqOracle -from aalpy.utils import visualize_automaton, get_Angluin_dfa +from aalpy.utils import visualize_automaton, get_Angluin_dfa, generate_random_sevpa from aalpy.utils.BenchmarkVpaModels import * from aalpy.utils.BenchmarkSevpaModels import * +# learning_context_free_grammar_example() + +def test_on_random_svepa(): + random_svepa = generate_random_sevpa(num_states=10, internal_alphabet_size=2, + call_alphabet_size=2, + return_alphabet_size=2, + acceptance_prob=0.4, + return_transition_prob=0.5) + + alphabet = random_svepa.input_alphabet + + sul = SevpaSUL(random_svepa, include_top=False, check_balance=False) + + eq_oracle = RandomWordEqOracle(alphabet=alphabet.get_merged_alphabet(), sul=sul, num_walks=10000, + min_walk_len=10, max_walk_len=30) + # model = run_KV_vpda(alphabet=alphabet, sul=sul, eq_oracle=eq_oracle, print_level=3,) + model = run_KV(alphabet=alphabet, sul=sul, eq_oracle=eq_oracle, automaton_type='vpa', + print_level=2, cex_processing='rs') + +test_on_random_svepa() +exit() # TODOs with priority ranking # refactor SEVPA and VPA classes, and allign with Edi if they are nice # When creating form state setup or whatever, all alphabets should be lists, not sets!!! important for reproducability @@ -14,14 +35,15 @@ # test test test -# learning_context_free_grammar_example() +learning_context_free_grammar_example() -# exit() +exit() from random import seed -for i, vpa in enumerate([vpa_for_L1(), vpa_for_L2(), vpa_for_L3(), vpa_for_L4(), vpa_for_L5(), vpa_for_L7(), vpa_for_L8(), - vpa_for_L9(), vpa_for_L10(), vpa_for_L11(), vpa_for_L12(),vpa_for_L13(), vpa_for_L14(), vpa_for_L15()]): +for i, vpa in enumerate( + [vpa_for_L1(), vpa_for_L2(), vpa_for_L3(), vpa_for_L4(), vpa_for_L5(), vpa_for_L7(), vpa_for_L8(), + vpa_for_L9(), vpa_for_L10(), vpa_for_L11(), vpa_for_L12(), vpa_for_L13(), vpa_for_L14(), vpa_for_L15()]): print(f'VPA {i + 1 if i < 6 else i + 2}') # 16 works @@ -43,4 +65,3 @@ e = 1 # exit() - From 257968fed0f87eda077c6d716c94fdc573737eac Mon Sep 17 00:00:00 2001 From: Edi Muskardin Date: Fri, 20 Oct 2023 14:45:04 +0200 Subject: [PATCH 25/62] update vpa learning --- .../deterministic/ClassificationTree.py | 2 + aalpy/utils/BenchmarkSevpaModels.py | 6 +- pda_main_experiments.py | 27 -------- sevpa_main_experiments.py | 58 ------------------ test_edi.py => test_main.py | 61 +++++++++---------- vpa_main_experiments.py | 34 ----------- 6 files changed, 34 insertions(+), 154 deletions(-) delete mode 100644 pda_main_experiments.py delete mode 100644 sevpa_main_experiments.py rename test_edi.py => test_main.py (52%) delete mode 100644 vpa_main_experiments.py diff --git a/aalpy/learning_algs/deterministic/ClassificationTree.py b/aalpy/learning_algs/deterministic/ClassificationTree.py index 4e05daac..1a3ea822 100644 --- a/aalpy/learning_algs/deterministic/ClassificationTree.py +++ b/aalpy/learning_algs/deterministic/ClassificationTree.py @@ -313,6 +313,8 @@ def update(self, cex: tuple, hypothesis): d = [] assert j is not None and d is not None + # TODO adapt for VPAs + self._insert_new_leaf(discriminator=(cex[j - 1], *d), old_leaf_access_string=hypothesis.current_state.prefix, new_leaf_access_string=tuple(cex[:j - 1]) or tuple(), diff --git a/aalpy/utils/BenchmarkSevpaModels.py b/aalpy/utils/BenchmarkSevpaModels.py index e6dac9ea..6efe96ad 100644 --- a/aalpy/utils/BenchmarkSevpaModels.py +++ b/aalpy/utils/BenchmarkSevpaModels.py @@ -28,9 +28,9 @@ def sevpa_congruence_for_vpa_paper(): # L1 is a regular language which has an even number of a's # L2 is a regular language which has an even number of b's - call_set = {'(', '['} - return_set = {')', ']'} - internal_set = {'x'} + call_set = ['(', '['] + return_set = [')', ']'] + internal_set = ['x'] input_alphabet = SevpaAlphabet(internal_alphabet=internal_set, call_alphabet=call_set, return_alphabet=return_set) diff --git a/pda_main_experiments.py b/pda_main_experiments.py deleted file mode 100644 index c8dbe4cd..00000000 --- a/pda_main_experiments.py +++ /dev/null @@ -1,27 +0,0 @@ - -from aalpy.SULs.AutomataSUL import PdaSUL -from aalpy.automata.Pda import generate_data_from_pda -from aalpy.learning_algs import run_vpda_Lstar -from aalpy.oracles import RandomWMethodEqOracle -from aalpy.utils.BenchmarkPdaModels import * - - -pda = pda_for_L12() -pda.visualize() - -input_alphabet = pda.get_input_alphabet() -sul = PdaSUL(pda, include_top=True, check_balance=True) -print(sul.query(('(',')'))) - - -# pda_sequances = generate_data_from_pda(pda, 10000) -# accepting_seq, rejecting_seq = [x[0] for x in pda_sequances if x[1]], [x[0] for x in pda_sequances if not x[1]] -# accepting_seq.sort(key=len) -# print('Positive') -# for i in range(10): -# print(accepting_seq[i]) -# exit() - -# eq_oracle = RandomWMethodEqOracle(alphabet=input_alphabet, sul=sul, walks_per_state=100, walk_len=10) -# model = run_vpda_Lstar(alphabet=input_alphabet, sul=sul, eq_oracle=eq_oracle, automaton_type="dfa", print_level=3, -# max_learning_rounds=1) \ No newline at end of file diff --git a/sevpa_main_experiments.py b/sevpa_main_experiments.py deleted file mode 100644 index 806174a6..00000000 --- a/sevpa_main_experiments.py +++ /dev/null @@ -1,58 +0,0 @@ -from aalpy.SULs.AutomataSUL import SevpaSUL, DfaSUL -from aalpy.automata.Pda import generate_data_from_pda -from aalpy.automata.Sevpa import generate_random_sevpa -from aalpy.learning_algs import run_KV_vpda, run_KV -from aalpy.oracles import RandomWordEqOracle, RandomWalkEqOracle -from aalpy.utils import visualize_automaton, get_Angluin_dfa -from aalpy.utils.BenchmarkPdaModels import * -from aalpy.utils.BenchmarkVpaModels import * -from aalpy.utils.BenchmarkSevpaModels import * - -# Example for normal KV - -# dfa = get_Angluin_dfa() -# -# visualize_automaton(dfa, path="InitialModel") -# -# alphabet = dfa.get_input_alphabet() -# -# sul = DfaSUL(dfa) -# eq_oracle = RandomWalkEqOracle(alphabet, sul, 500) -# -# learned_dfa = run_KV(alphabet, sul, eq_oracle, automaton_type='dfa', cache_and_non_det_check=True, cex_processing=None, print_level=3) -# -# learned_dfa.visualize() - -######################################## - -call_set = {'(', '['} -return_set = {')', ']'} -internal_set = {'x'} -input_alphabet = SevpaAlphabet(internal_alphabet=internal_set, call_alphabet=call_set, return_alphabet=return_set) - -random_sevpa = generate_random_sevpa(input_alphabet, 3, 0.5, 0.1) -visualize_automaton(random_sevpa, path="Random Sevpa") - -sevpa = sevpa_for_L12_refined() - -visualize_automaton(sevpa, path="InitialModel") - -print(sevpa.input_alphabet) -merged_input_alphabet = sevpa.input_alphabet.get_merged_alphabet() - -sul = SevpaSUL(sevpa, include_top=True, check_balance=True) -print(sul.query(('(', ')'))) -print(sul.query(('[', ')'))) -print(sul.query(('[', '(', ')', ']'))) - - -eq_oracle = RandomWordEqOracle(alphabet=merged_input_alphabet, sul=sul) -model = run_KV_vpda(alphabet=sevpa.input_alphabet, sul=sul, eq_oracle=eq_oracle, print_level=3, max_learning_rounds=10) - -model_sul = SevpaSUL(model, include_top=True, check_balance=True) -print(model_sul.query(('(', ')'))) -print(model_sul.query(('[', ')'))) -print(model_sul.query(('[', '(', ')', ']'))) - -model.visualize() - diff --git a/test_edi.py b/test_main.py similarity index 52% rename from test_edi.py rename to test_main.py index e46942a4..f134a9a9 100644 --- a/test_edi.py +++ b/test_main.py @@ -6,38 +6,36 @@ from aalpy.utils.BenchmarkVpaModels import * from aalpy.utils.BenchmarkSevpaModels import * -# learning_context_free_grammar_example() - -def test_on_random_svepa(): - random_svepa = generate_random_sevpa(num_states=10, internal_alphabet_size=2, - call_alphabet_size=2, - return_alphabet_size=2, - acceptance_prob=0.4, - return_transition_prob=0.5) - - alphabet = random_svepa.input_alphabet - - sul = SevpaSUL(random_svepa, include_top=False, check_balance=False) - eq_oracle = RandomWordEqOracle(alphabet=alphabet.get_merged_alphabet(), sul=sul, num_walks=10000, - min_walk_len=10, max_walk_len=30) - # model = run_KV_vpda(alphabet=alphabet, sul=sul, eq_oracle=eq_oracle, print_level=3,) - model = run_KV(alphabet=alphabet, sul=sul, eq_oracle=eq_oracle, automaton_type='vpa', - print_level=2, cex_processing='rs') - -test_on_random_svepa() -exit() -# TODOs with priority ranking -# refactor SEVPA and VPA classes, and allign with Edi if they are nice -# When creating form state setup or whatever, all alphabets should be lists, not sets!!! important for reproducability -# # Check TODOs in KV and Classification tree file -# random generation of SEVPA as done in learnlib -# test test test - - -learning_context_free_grammar_example() +# learning_context_free_grammar_example() -exit() +# TODOs +# 1. Make update function of KV work, update_rs works and most likely should be similar +# 2. Create VpaStateCoverageOracle, that behaves like StatePrefix oracle but for VPAs +# 3. Add all 15 langs as SVEPA +# 4. Implement to state setup +# 5. Create an active interface to learn a grammar of some language, like simple C or Java + + +# def test_on_random_svepa(): +# random_svepa = generate_random_sevpa(num_states=10, internal_alphabet_size=2, +# call_alphabet_size=2, +# return_alphabet_size=2, +# acceptance_prob=0.4, +# return_transition_prob=0.5) +# +# alphabet = random_svepa.input_alphabet +# +# sul = SevpaSUL(random_svepa, include_top=False, check_balance=False) +# +# eq_oracle = RandomWordEqOracle(alphabet=alphabet.get_merged_alphabet(), sul=sul, num_walks=10000, +# min_walk_len=10, max_walk_len=30) +# # model = run_KV_vpda(alphabet=alphabet, sul=sul, eq_oracle=eq_oracle, print_level=3,) +# model = run_KV(alphabet=alphabet, sul=sul, eq_oracle=eq_oracle, automaton_type='vpa', +# print_level=2, cex_processing='rs') +# +# test_on_random_svepa() +# exit() from random import seed @@ -47,7 +45,7 @@ def test_on_random_svepa(): print(f'VPA {i + 1 if i < 6 else i + 2}') # 16 works - for i in range(1): + for i in range(10): print(i) model_under_learning = vpa @@ -62,6 +60,5 @@ def test_on_random_svepa(): model = run_KV(alphabet=alphabet, sul=sul, eq_oracle=eq_oracle, automaton_type='vpa', print_level=2, cex_processing='rs') - e = 1 # exit() diff --git a/vpa_main_experiments.py b/vpa_main_experiments.py deleted file mode 100644 index d9090e5a..00000000 --- a/vpa_main_experiments.py +++ /dev/null @@ -1,34 +0,0 @@ -from aalpy.SULs.AutomataSUL import VpaSUL -from aalpy.automata.Pda import generate_data_from_pda -from aalpy.learning_algs import run_vpda_Lstar -from aalpy.oracles import RandomWMethodEqOracle -from aalpy.utils.BenchmarkPdaModels import * -from aalpy.utils.BenchmarkVpaModels import * - - -vpa = vpa_for_L12() - -vpa.visualize() - -input_alphabet = vpa.get_input_alphabet() -merged_input_alphabet = vpa.get_input_alphabet_merged() -# print("Call: " + str(input_alphabet[0]) + "\nReturn: " + str(input_alphabet[1]) + "\nInternal: " + str(input_alphabet[2])) - -sul = VpaSUL(vpa, include_top=True, check_balance=True) -print(sul.query(('(',')'))) - -assert sul.query(('(', ')'))[-1][0] == True - -# pda_sequences = generate_data_from_pda(vpa, 10000) -# accepting_seq, rejecting_seq = [x[0] for x in pda_sequences if x[1]], [x[0] for x in pda_sequences if not x[1]] -# accepting_seq.sort(key=len) -# print('Positive') -# for i in range(10): -# print(accepting_seq[i]) -# -# eq_oracle = RandomWMethodEqOracle(alphabet=merged_input_alphabet, sul=sul, walks_per_state=100, walk_len=10) -# model = run_vpda_Lstar(alphabet=input_alphabet, sul=sul, eq_oracle=eq_oracle, automaton_type="vpa", print_level=3, -# max_learning_rounds=1) -# -# model.visualize() - From 80e5de03b4d0455575ab9f4960a00fba06775817 Mon Sep 17 00:00:00 2001 From: Edi Muskardin Date: Fri, 20 Oct 2023 20:06:08 +0200 Subject: [PATCH 26/62] optimize runtime --- aalpy/automata/Sevpa.py | 60 +++++++++---------- aalpy/automata/Vpa.py | 2 +- .../deterministic/ClassificationTree.py | 3 - test_main.py | 42 +++++++------ 4 files changed, 51 insertions(+), 56 deletions(-) diff --git a/aalpy/automata/Sevpa.py b/aalpy/automata/Sevpa.py index f62352ca..c9c3a3ac 100644 --- a/aalpy/automata/Sevpa.py +++ b/aalpy/automata/Sevpa.py @@ -1,4 +1,3 @@ -import random from collections import defaultdict from typing import Union @@ -11,9 +10,6 @@ def __init__(self, internal_alphabet: list, call_alphabet: list, return_alphabet self.call_alphabet = call_alphabet self.return_alphabet = return_alphabet - def __str__(self): - return f'Internal: {self.internal_alphabet} Call: {self.call_alphabet} Return: {self.return_alphabet}' - def get_merged_alphabet(self) -> list: alphabet = list() alphabet.extend(self.internal_alphabet) @@ -21,6 +17,9 @@ def get_merged_alphabet(self) -> list: alphabet.extend(self.return_alphabet) return alphabet + def __str__(self): + return f'Internal: {self.internal_alphabet} Call: {self.call_alphabet} Return: {self.return_alphabet}' + class SevpaState(AutomatonState): """ @@ -47,7 +46,6 @@ def __str__(self): class Sevpa(Automaton): empty = "_" - error_state = SevpaState("ErrorSinkState", False) def __init__(self, initial_state: SevpaState, states: list[SevpaState], input_alphabet: SevpaAlphabet): super().__init__(initial_state, states) @@ -56,65 +54,61 @@ def __init__(self, initial_state: SevpaState, states: list[SevpaState], input_al self.input_alphabet = input_alphabet self.current_state = None self.stack = [] + self.error_state_reached = False + + # alphabet sets for faster inclusion checks (as in SevpaAlphabet we have lists, for reproducibility) + self.internal_set = set(self.input_alphabet.internal_alphabet) + self.call_set = set(self.input_alphabet.call_alphabet) + self.return_set = set(self.input_alphabet.return_alphabet) def reset_to_initial(self): super().reset_to_initial() - self.reset() - - def reset(self): self.current_state = self.initial_state self.stack = [self.empty] - return self.current_state.is_accepting and self.top() == self.empty - - def top(self): - return self.stack[-1] - - def pop(self): - return self.stack.pop() + self.error_state_reached = False + return self.current_state.is_accepting and self.stack[-1] == self.empty def step(self, letter): - if self.current_state == Sevpa.error_state: + if self.error_state_reached: return False if letter is None: - return self.current_state.is_accepting and self.top() == self.empty + return self.current_state.is_accepting and self.stack[-1] == self.empty - if letter in self.input_alphabet.call_alphabet: + if letter in self.call_set: self.stack.append((self.current_state.state_id, letter)) self.current_state = self.initial_state - return self.current_state.is_accepting and self.top() == self.empty + return self.current_state.is_accepting and self.stack[-1] == self.empty # get possible transitions transitions = self.current_state.transitions[letter] - possible_transitions = [] + taken_transition = None for t in transitions: - if t.symbol in self.input_alphabet.return_alphabet: - if t.stack_guard == self.top(): - possible_transitions.append(t) - elif t.symbol in self.input_alphabet.internal_alphabet: - possible_transitions.append(t) + if t.symbol in self.return_set: + if t.stack_guard == self.stack[-1]: + taken_transition = t + break + elif t.symbol in self.internal_set: + taken_transition = t + break else: assert False - assert len(possible_transitions) < 2 # No transition is possible - if len(possible_transitions) == 0: - self.current_state = Sevpa.error_state + if not taken_transition: + self.error_state_reached = True return False - taken_transition = possible_transitions[0] self.current_state = taken_transition.target if taken_transition.action == 'pop': - # pop letters must be in return set - assert (letter in self.input_alphabet.return_alphabet) # empty stack elem should always be on the stack if len(self.stack) <= 1: - self.current_state = Sevpa.error_state + self.error_state_reached = True return False self.stack.pop() - return self.current_state.is_accepting and self.top() == self.empty + return self.current_state.is_accepting and self.stack[-1] == self.empty def get_state_by_id(self, state_id) -> Union[SevpaState, None]: for state in self.states: diff --git a/aalpy/automata/Vpa.py b/aalpy/automata/Vpa.py index 67ef48af..e6c7c681 100644 --- a/aalpy/automata/Vpa.py +++ b/aalpy/automata/Vpa.py @@ -230,7 +230,7 @@ def generate_data_from_pda(automaton, num_examples, lens=None, classify_states=F seqs = list(product(input_al, repeat=l)) for seq in seqs: - out = automaton.reset() + out = automaton.reset_to_initial() nr_steps = 0 for inp in seq: if automaton.possible(inp) or not break_on_impossible: diff --git a/aalpy/learning_algs/deterministic/ClassificationTree.py b/aalpy/learning_algs/deterministic/ClassificationTree.py index 1a3ea822..1a084c3c 100644 --- a/aalpy/learning_algs/deterministic/ClassificationTree.py +++ b/aalpy/learning_algs/deterministic/ClassificationTree.py @@ -118,9 +118,6 @@ def _sift(self, word): the CTLeafNode that is reached by the sifting operation. """ - for letter in word: - alphabet = self.alphabet if self.automaton_type != 'vpa' else self.alphabet.get_merged_alphabet() - assert letter is None or letter in alphabet if word in self.sifting_cache: return self.sifting_cache[word] diff --git a/test_main.py b/test_main.py index f134a9a9..f31e035d 100644 --- a/test_main.py +++ b/test_main.py @@ -1,10 +1,11 @@ from Examples import learning_context_free_grammar_example from aalpy.SULs.AutomataSUL import SevpaSUL, VpaSUL from aalpy.learning_algs import run_KV -from aalpy.oracles import RandomWordEqOracle, RandomWalkEqOracle +from aalpy.oracles import RandomWordEqOracle, RandomWalkEqOracle, StatePrefixEqOracle from aalpy.utils import visualize_automaton, get_Angluin_dfa, generate_random_sevpa from aalpy.utils.BenchmarkVpaModels import * from aalpy.utils.BenchmarkSevpaModels import * +from random import seed # learning_context_free_grammar_example() @@ -16,28 +17,31 @@ # 4. Implement to state setup # 5. Create an active interface to learn a grammar of some language, like simple C or Java +def test_on_random_svepa(): + random_svepa = generate_random_sevpa(num_states=50, internal_alphabet_size=3, + call_alphabet_size=3, + return_alphabet_size=3, + acceptance_prob=0.4, + return_transition_prob=0.5) + + alphabet = random_svepa.input_alphabet + + sul = SevpaSUL(random_svepa, include_top=False, check_balance=False) -# def test_on_random_svepa(): -# random_svepa = generate_random_sevpa(num_states=10, internal_alphabet_size=2, -# call_alphabet_size=2, -# return_alphabet_size=2, -# acceptance_prob=0.4, -# return_transition_prob=0.5) -# -# alphabet = random_svepa.input_alphabet -# -# sul = SevpaSUL(random_svepa, include_top=False, check_balance=False) -# -# eq_oracle = RandomWordEqOracle(alphabet=alphabet.get_merged_alphabet(), sul=sul, num_walks=10000, -# min_walk_len=10, max_walk_len=30) -# # model = run_KV_vpda(alphabet=alphabet, sul=sul, eq_oracle=eq_oracle, print_level=3,) -# model = run_KV(alphabet=alphabet, sul=sul, eq_oracle=eq_oracle, automaton_type='vpa', -# print_level=2, cex_processing='rs') -# + eq_oracle = RandomWordEqOracle(alphabet=alphabet.get_merged_alphabet(), sul=sul, num_walks=10000, + min_walk_len=10, max_walk_len=30) + # model = run_KV_vpda(alphabet=alphabet, sul=sul, eq_oracle=eq_oracle, print_level=3,) + model = run_KV(alphabet=alphabet, sul=sul, eq_oracle=eq_oracle, automaton_type='vpa', + print_level=2, cex_processing='rs') + +# import cProfile +# pr = cProfile.Profile() +# pr.enable() # test_on_random_svepa() +# pr.disable() +# pr.print_stats(sort='tottime') # exit() -from random import seed for i, vpa in enumerate( [vpa_for_L1(), vpa_for_L2(), vpa_for_L3(), vpa_for_L4(), vpa_for_L5(), vpa_for_L7(), vpa_for_L8(), From eac49ee8bfd804a3ab4826dc6551725a565b2ac0 Mon Sep 17 00:00:00 2001 From: Edi Muskardin Date: Mon, 23 Oct 2023 17:11:35 +0200 Subject: [PATCH 27/62] add option to define exclusive call-return pairs --- aalpy/automata/Sevpa.py | 5 +++-- .../learning_algs/deterministic/ClassificationTree.py | 11 ++++++++--- test_main.py | 10 +++++++--- 3 files changed, 18 insertions(+), 8 deletions(-) diff --git a/aalpy/automata/Sevpa.py b/aalpy/automata/Sevpa.py index c9c3a3ac..d21aa933 100644 --- a/aalpy/automata/Sevpa.py +++ b/aalpy/automata/Sevpa.py @@ -5,10 +5,12 @@ class SevpaAlphabet: - def __init__(self, internal_alphabet: list, call_alphabet: list, return_alphabet: list): + def __init__(self, internal_alphabet: list, call_alphabet: list, return_alphabet: list, + exclusive_call_return_pairs: dict = None): self.internal_alphabet = internal_alphabet self.call_alphabet = call_alphabet self.return_alphabet = return_alphabet + self.exclusive_call_return_pairs = exclusive_call_return_pairs def get_merged_alphabet(self) -> list: alphabet = list() @@ -200,4 +202,3 @@ def create_daisy_hypothesis(initial_state, alphabet): initial_state.transitions[r].append(trans) return Sevpa(initial_state, [initial_state], alphabet) - diff --git a/aalpy/learning_algs/deterministic/ClassificationTree.py b/aalpy/learning_algs/deterministic/ClassificationTree.py index 1a084c3c..12d9bda6 100644 --- a/aalpy/learning_algs/deterministic/ClassificationTree.py +++ b/aalpy/learning_algs/deterministic/ClassificationTree.py @@ -216,9 +216,14 @@ def gen_hypothesis(self): # Add call transitions for call_letter in self.alphabet.call_alphabet: - for other_state in states_for_transitions: - # Add return transitions - for return_letter in self.alphabet.return_alphabet: + # Add return transitions + for return_letter in self.alphabet.return_alphabet: + # check if exclusive pairs of call and return letters are defined in an alphabets + if self.alphabet.exclusive_call_return_pairs and \ + self.alphabet.exclusive_call_return_pairs[call_letter] != return_letter: + continue + + for other_state in states_for_transitions: transition_target_node = self._sift( other_state.prefix + (call_letter,) + state.prefix + (return_letter,)) transition_target_access_string = transition_target_node.access_string diff --git a/test_main.py b/test_main.py index f31e035d..be80c98c 100644 --- a/test_main.py +++ b/test_main.py @@ -34,6 +34,7 @@ def test_on_random_svepa(): model = run_KV(alphabet=alphabet, sul=sul, eq_oracle=eq_oracle, automaton_type='vpa', print_level=2, cex_processing='rs') + # import cProfile # pr = cProfile.Profile() # pr.enable() @@ -49,14 +50,18 @@ def test_on_random_svepa(): print(f'VPA {i + 1 if i < 6 else i + 2}') # 16 works - for i in range(10): - print(i) + for s in range(10): + print(s) + seed(s) model_under_learning = vpa alphabet = SevpaAlphabet(list(model_under_learning.internal_set), list(model_under_learning.call_set), list(model_under_learning.return_set)) + if i == 9: + alphabet.exclusive_call_return_pairs = {'(': ')', '[': ']', '{': '}', '<': '>'} + sul = VpaSUL(model_under_learning, include_top=False, check_balance=False) eq_oracle = RandomWordEqOracle(alphabet=alphabet.get_merged_alphabet(), sul=sul, num_walks=10000) @@ -64,5 +69,4 @@ def test_on_random_svepa(): model = run_KV(alphabet=alphabet, sul=sul, eq_oracle=eq_oracle, automaton_type='vpa', print_level=2, cex_processing='rs') - # exit() From 10e8608119892da631134c02637589ea4019258d Mon Sep 17 00:00:00 2001 From: Edi Muskardin Date: Tue, 24 Oct 2023 09:18:15 +0200 Subject: [PATCH 28/62] update sevpa visualization --- aalpy/utils/FileHandler.py | 33 ++++++++++++++++----------------- 1 file changed, 16 insertions(+), 17 deletions(-) diff --git a/aalpy/utils/FileHandler.py b/aalpy/utils/FileHandler.py index baf2042e..dd052abd 100644 --- a/aalpy/utils/FileHandler.py +++ b/aalpy/utils/FileHandler.py @@ -10,7 +10,7 @@ file_types = ['dot', 'png', 'svg', 'pdf', 'string'] automaton_types = {Dfa: 'dfa', MealyMachine: 'mealy', MooreMachine: 'moore', Mdp: 'mdp', - StochasticMealyMachine: 'smm', Onfsm: 'onfsm', MarkovChain: 'mc', Vpa: 'vpa', Sevpa: 'vpa'} + StochasticMealyMachine: 'smm', Onfsm: 'onfsm', MarkovChain: 'mc', Vpa: 'vpa', Sevpa: 'sevpa'} def _wrap_label(label): @@ -40,11 +40,11 @@ def _get_node(state, automaton_type): return Node(state.state_id, label=_wrap_label(f'{state.output}')) if automaton_type == 'smm': return Node(state.state_id, label=_wrap_label(state.state_id)) - if automaton_type == 'pda': + if automaton_type == 'vpa': if state.is_accepting: return Node(state.state_id, label=_wrap_label(state.state_id), shape='doublecircle') return Node(state.state_id, label=_wrap_label(state.state_id)) - if automaton_type == 'vpa': + if automaton_type == 'sevpa': if state.is_accepting: return Node(state.state_id, label=_wrap_label(state.state_id), shape='doublecircle') return Node(state.state_id, label=_wrap_label(state.state_id)) @@ -90,19 +90,6 @@ def _add_transition_to_graph(graph, state, automaton_type, display_same_state_tr continue prob = round(s[2], round_floats) if round_floats else s[2] graph.add_edge(Edge(state.state_id, s[0].state_id, label=_wrap_label(f'{i}/{s[1]}:{prob}'))) - if automaton_type == 'pda': - for i in state.transitions.keys(): - transitions_list = state.transitions[i] - for transition in transitions_list: - if transition.action is None: - graph.add_edge(Edge(transition.start.state_id, transition.target.state_id, - label=_wrap_label(f'{transition.symbol}'))) - if transition.action == 'push': - graph.add_edge(Edge(transition.start.state_id, transition.target.state_id, - label=_wrap_label(f'{transition.symbol}/push(\'{transition.stack_guard}\')'))) - if transition.action == 'pop': - graph.add_edge(Edge(transition.start.state_id, transition.target.state_id, - label=_wrap_label(f'{transition.symbol}/pop(\'{transition.stack_guard}\')'))) if automaton_type == 'vpa': for i in state.transitions.keys(): transitions_list = state.transitions[i] @@ -116,7 +103,19 @@ def _add_transition_to_graph(graph, state, automaton_type, display_same_state_tr if transition.target == Vpa.error_state: edge.set_style('dashed') - + graph.add_edge(edge) + if automaton_type == 'sevpa': + for i in state.transitions.keys(): + transitions_list = state.transitions[i] + for transition in transitions_list: + if transition.action == 'pop': + edge = Edge(transition.start.state_id, transition.target.state_id, + label=_wrap_label(f'{transition.symbol} / {transition.stack_guard}')) + elif transition.action is None: + edge = Edge(transition.start.state_id, transition.target.state_id, + label=_wrap_label(f'{transition.symbol}')) + else: + assert False graph.add_edge(edge) From e54427ebfbe7c6931dc723b1f5a695dbb5380df5 Mon Sep 17 00:00:00 2001 From: Edi Muskardin Date: Wed, 25 Oct 2023 09:39:37 +0200 Subject: [PATCH 29/62] add arithmetic example --- test_main.py | 47 +++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 43 insertions(+), 4 deletions(-) diff --git a/test_main.py b/test_main.py index be80c98c..612fc17f 100644 --- a/test_main.py +++ b/test_main.py @@ -1,5 +1,8 @@ +import ast + from Examples import learning_context_free_grammar_example from aalpy.SULs.AutomataSUL import SevpaSUL, VpaSUL +from aalpy.base import SUL from aalpy.learning_algs import run_KV from aalpy.oracles import RandomWordEqOracle, RandomWalkEqOracle, StatePrefixEqOracle from aalpy.utils import visualize_automaton, get_Angluin_dfa, generate_random_sevpa @@ -12,10 +15,45 @@ # TODOs # 1. Make update function of KV work, update_rs works and most likely should be similar -# 2. Create VpaStateCoverageOracle, that behaves like StatePrefix oracle but for VPAs -# 3. Add all 15 langs as SVEPA -# 4. Implement to state setup -# 5. Create an active interface to learn a grammar of some language, like simple C or Java +# 2. Create a SEVPA function that generates random positive strings - model.generate_random_positive_string() +# 2. Add all 15 langs as SVEPA +# 4. Implement to state setup, test saving and loading to/from file +# 5. Create an active interface to learn a grammar of some language, like simplified C or Java + +def test_arithmetic_expression(): + import warnings + warnings.filterwarnings("ignore") + + class ArithmeticSUL(SUL): + def __init__(self): + super().__init__() + self.string_under_test = '' + + def pre(self): + self.string_under_test = '' + + def post(self): + pass + + def step(self, letter): + if letter: + self.string_under_test += ' ' + letter + + try: + eval(self.string_under_test) + return True + except (SyntaxError, TypeError): + return False + + sul = ArithmeticSUL() + alphabet = SevpaAlphabet(internal_alphabet=['1', '+'], call_alphabet=['('], return_alphabet=[')']) + eq_oracle = RandomWordEqOracle(alphabet.get_merged_alphabet(), sul, min_walk_len=5, + max_walk_len=20, num_walks=20000) + + learned_model = run_KV(alphabet, sul, eq_oracle, automaton_type='vpa') + learned_model.visualize() + exit() + def test_on_random_svepa(): random_svepa = generate_random_sevpa(num_states=50, internal_alphabet_size=3, @@ -35,6 +73,7 @@ def test_on_random_svepa(): print_level=2, cex_processing='rs') +test_arithmetic_expression() # import cProfile # pr = cProfile.Profile() # pr.enable() From 704d7b71ee57ad2e826b239bb3f75f70b0140c1f Mon Sep 17 00:00:00 2001 From: Edi Muskardin Date: Mon, 30 Oct 2023 12:53:57 +0100 Subject: [PATCH 30/62] add linear cex processing --- .../deterministic/ClassificationTree.py | 18 +++++-- .../deterministic/CounterExampleProcessing.py | 53 +++++++++++++++++++ aalpy/learning_algs/deterministic/KV.py | 7 +-- det_cex_processing_test.py | 21 ++++++++ .../deterministic/ClassificationTree.html | 14 ++--- .../aalpy/learning_algs/deterministic/KV.html | 4 +- test_main.py | 4 +- 7 files changed, 100 insertions(+), 21 deletions(-) create mode 100644 det_cex_processing_test.py diff --git a/aalpy/learning_algs/deterministic/ClassificationTree.py b/aalpy/learning_algs/deterministic/ClassificationTree.py index 12d9bda6..432662b5 100644 --- a/aalpy/learning_algs/deterministic/ClassificationTree.py +++ b/aalpy/learning_algs/deterministic/ClassificationTree.py @@ -4,7 +4,7 @@ from aalpy.automata import DfaState, Dfa, MealyState, MealyMachine, MooreState, MooreMachine, \ SevpaAlphabet, SevpaState, SevpaTransition, Sevpa from aalpy.base import SUL -from aalpy.learning_algs.deterministic.CounterExampleProcessing import rs_cex_processing +from aalpy.learning_algs.deterministic.CounterExampleProcessing import rs_cex_processing, linear_cex_processing automaton_class = {'dfa': Dfa, 'mealy': MealyMachine, 'moore': MooreMachine} @@ -315,14 +315,12 @@ def update(self, cex: tuple, hypothesis): d = [] assert j is not None and d is not None - # TODO adapt for VPAs - self._insert_new_leaf(discriminator=(cex[j - 1], *d), old_leaf_access_string=hypothesis.current_state.prefix, new_leaf_access_string=tuple(cex[:j - 1]) or tuple(), new_leaf_position=self.sul.query((*cex[:j - 1], *(cex[j - 1], *d)))[-1]) - def update_rs(self, cex: tuple, hypothesis): + def process_counterexample(self, cex: tuple, hypothesis, cex_processing_fun): """ Updates the classification tree based on a counterexample, using Rivest & Schapire's counterexample processing @@ -336,9 +334,19 @@ def update_rs(self, cex: tuple, hypothesis): Args: cex: the counterexample used to update the tree hypothesis: the former (wrong) hypothesis + cex_processing_fun: string choosing which cex_processing to use """ - v = max(rs_cex_processing(self.sul, cex, hypothesis, is_vpa=self.automaton_type == 'vpa'), key=len) + v = None + if 'linear' in cex_processing_fun: + direction = cex_processing_fun[-3:] + v = linear_cex_processing(self.sul, cex, hypothesis, is_vpa=self.automaton_type == 'vpa', + direction=direction, suffix_closedness=False)[0] + if cex_processing_fun == 'rs': + v = rs_cex_processing(self.sul, cex, hypothesis, is_vpa=self.automaton_type == 'vpa', + suffix_closedness=False)[0] + + assert v a = cex[len(cex) - len(v) - 1] u = cex[:len(cex) - len(v) - 1] assert (*u, a, *v) == cex diff --git a/aalpy/learning_algs/deterministic/CounterExampleProcessing.py b/aalpy/learning_algs/deterministic/CounterExampleProcessing.py index 16903a25..82f3be3d 100644 --- a/aalpy/learning_algs/deterministic/CounterExampleProcessing.py +++ b/aalpy/learning_algs/deterministic/CounterExampleProcessing.py @@ -104,3 +104,56 @@ def rs_cex_processing(sul: SUL, cex: tuple, hypothesis, suffix_closedness=True, else: suffix_to_query = [suffix] return suffix_to_query + + +def linear_cex_processing(sul: SUL, cex: tuple, hypothesis, suffix_closedness=True, closedness='suffix', + direction='fwd', is_vpa=False): + assert direction in {'fwd', 'bwd'} + + direction = 'fwd' + + distinguishing_suffix = None + previous_output = None + + for i in range(0, len(cex)): + bp = i if direction == 'fwd' else -i - 1 + prefix = cex[:bp] + suffix = cex[bp:] + assert cex == prefix + suffix + + hypothesis.reset_to_initial() + hypothesis.execute_sequence(hypothesis.initial_state, prefix) + + if not is_vpa: + s_bracket = hypothesis.current_state.prefix + else: + s_bracket = tuple(hypothesis.transform_access_sequance(hypothesis.current_state)) + + sul_out = sul.query(s_bracket + suffix)[-1] + + if previous_output is None: + previous_output = sul_out + continue + + if sul_out != previous_output: + distinguishing_suffix = suffix if direction == 'fwd' else cex[bp + 1:] + break + + previous_output = sul_out + + assert distinguishing_suffix + if suffix_closedness: + suffixes = all_suffixes(distinguishing_suffix) if closedness == 'suffix' else all_prefixes( + distinguishing_suffix) + suffixes.reverse() + suffix_to_query = suffixes + else: + suffix_to_query = [distinguishing_suffix] + + return suffix_to_query + + +def exponential_cex_processing(sul: SUL, cex: tuple, hypothesis, suffix_closedness=True, closedness='suffix', + direction='fwd', is_vpa=False): + assert direction in {'fwd', 'bwd'} + pass diff --git a/aalpy/learning_algs/deterministic/KV.py b/aalpy/learning_algs/deterministic/KV.py index 4e1045a5..b0e208d0 100644 --- a/aalpy/learning_algs/deterministic/KV.py +++ b/aalpy/learning_algs/deterministic/KV.py @@ -10,7 +10,7 @@ from ...base.SUL import CacheSUL print_options = [0, 1, 2, 3] -counterexample_processing_strategy = [None, 'rs'] +counterexample_processing_strategy = ['rs', 'linear_fwd', 'linear_bwd'] automaton_class = {'dfa': Dfa, 'mealy': MealyMachine, 'moore': MooreMachine, 'vpa': Sevpa} @@ -133,10 +133,7 @@ def run_KV(alphabet: Union[list, SevpaAlphabet], sul: SUL, eq_oracle: Oracle, au if print_level == 3: print('Counterexample', cex) - if cex_processing == 'rs': - classification_tree.update_rs(cex, hypothesis) - else: - classification_tree.update(cex, hypothesis) + classification_tree.process_counterexample(cex, hypothesis, cex_processing) total_time = round(time.time() - start_time, 2) eq_query_time = round(eq_query_time, 2) diff --git a/det_cex_processing_test.py b/det_cex_processing_test.py new file mode 100644 index 00000000..fa29d55d --- /dev/null +++ b/det_cex_processing_test.py @@ -0,0 +1,21 @@ +from aalpy.utils import generate_random_deterministic_automata +from aalpy.SULs import MealySUL +from aalpy.oracles import RandomWMethodEqOracle +from aalpy.learning_algs import run_KV + +for x in ['linear_fwd', 'linear_bwd']: + for i in range(100): + print(i) + model_type = 'mealy' # or 'moore', 'dfa' + + # for random dfa's you can also define num_accepting_states + random_model = generate_random_deterministic_automata(automaton_type=model_type, num_states=100, + input_alphabet_size=3, output_alphabet_size=4) + + sul = MealySUL(random_model) + input_alphabet = random_model.get_input_alphabet() + + # select any of the oracles + eq_oracle = RandomWMethodEqOracle(input_alphabet, sul, walks_per_state=10, walk_len=20) + + learned_model = run_KV(input_alphabet, sul, eq_oracle, model_type, cex_processing=x, print_level=0) diff --git a/docs/documentation/aalpy/learning_algs/deterministic/ClassificationTree.html b/docs/documentation/aalpy/learning_algs/deterministic/ClassificationTree.html index 8c54b7b6..32b48b1d 100644 --- a/docs/documentation/aalpy/learning_algs/deterministic/ClassificationTree.html +++ b/docs/documentation/aalpy/learning_algs/deterministic/ClassificationTree.html @@ -97,7 +97,7 @@

API Documentation

update
  • - update_rs + process_counterexample
  • @@ -404,7 +404,7 @@

    new_leaf_access_string=tuple(cex[:j - 1]) or tuple(), new_leaf_position=self.sul.query((*cex[:j - 1], *(cex[j - 1], *d)))[-1]) - def update_rs(self, cex: tuple, hypothesis): + def process_counterexample(self, cex: tuple, hypothesis): """ Updates the classification tree based on a counterexample, using Rivest & Schapire's counterexample processing @@ -1035,7 +1035,7 @@

    Inherited Members
    new_leaf_access_string=tuple(cex[:j - 1]) or tuple(), new_leaf_position=self.sul.query((*cex[:j - 1], *(cex[j - 1], *d)))[-1]) - def update_rs(self, cex: tuple, hypothesis): + def process_counterexample(self, cex: tuple, hypothesis): """ Updates the classification tree based on a counterexample, using Rivest & Schapire's counterexample processing @@ -1351,17 +1351,17 @@
    Inherited Members
    -
    -
    #   +
    +
    #   def - update_rs(self, cex: tuple, hypothesis): + process_counterexample(self, cex: tuple, hypothesis):
    View Source -
        def update_rs(self, cex: tuple, hypothesis):
    +            
        def process_counterexample(self, cex: tuple, hypothesis):
             """
             Updates the classification tree based on a counterexample,
             using Rivest & Schapire's counterexample processing
    diff --git a/docs/documentation/aalpy/learning_algs/deterministic/KV.html b/docs/documentation/aalpy/learning_algs/deterministic/KV.html
    index 445a0094..b648f732 100644
    --- a/docs/documentation/aalpy/learning_algs/deterministic/KV.html
    +++ b/docs/documentation/aalpy/learning_algs/deterministic/KV.html
    @@ -182,7 +182,7 @@ 

    print('Counterexample', cex) if cex_processing == 'rs': - classification_tree.update_rs(cex, hypothesis) + classification_tree.process_counterexample(cex, hypothesis) else: classification_tree.update(cex, hypothesis) @@ -358,7 +358,7 @@

    print('Counterexample', cex) if cex_processing == 'rs': - classification_tree.update_rs(cex, hypothesis) + classification_tree.process_counterexample(cex, hypothesis) else: classification_tree.update(cex, hypothesis) diff --git a/test_main.py b/test_main.py index 612fc17f..c68238f4 100644 --- a/test_main.py +++ b/test_main.py @@ -73,7 +73,7 @@ def test_on_random_svepa(): print_level=2, cex_processing='rs') -test_arithmetic_expression() +# test_arithmetic_expression() # import cProfile # pr = cProfile.Profile() # pr.enable() @@ -106,6 +106,6 @@ def test_on_random_svepa(): eq_oracle = RandomWordEqOracle(alphabet=alphabet.get_merged_alphabet(), sul=sul, num_walks=10000) # model = run_KV_vpda(alphabet=alphabet, sul=sul, eq_oracle=eq_oracle, print_level=3,) model = run_KV(alphabet=alphabet, sul=sul, eq_oracle=eq_oracle, automaton_type='vpa', - print_level=2, cex_processing='rs') + print_level=2, cex_processing='linear_bwd') # exit() From 5f9c0951a0c5857bb93587e234daf23e09905356 Mon Sep 17 00:00:00 2001 From: Edi Muskardin Date: Mon, 30 Oct 2023 12:56:36 +0100 Subject: [PATCH 31/62] update todos --- test_main.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/test_main.py b/test_main.py index c68238f4..d5c33049 100644 --- a/test_main.py +++ b/test_main.py @@ -14,10 +14,10 @@ # learning_context_free_grammar_example() # TODOs -# 1. Make update function of KV work, update_rs works and most likely should be similar +# 1. exponential cex processing in CounterExampleProcessing.py # 2. Create a SEVPA function that generates random positive strings - model.generate_random_positive_string() # 2. Add all 15 langs as SVEPA -# 4. Implement to state setup, test saving and loading to/from file +# 4. Implement and test to_state_setup, test saving and loading to/from file # 5. Create an active interface to learn a grammar of some language, like simplified C or Java def test_arithmetic_expression(): @@ -98,8 +98,8 @@ def test_on_random_svepa(): list(model_under_learning.call_set), list(model_under_learning.return_set)) - if i == 9: - alphabet.exclusive_call_return_pairs = {'(': ')', '[': ']', '{': '}', '<': '>'} + #if i == 9: + # alphabet.exclusive_call_return_pairs = {'(': ')', '[': ']', '{': '}', '<': '>'} sul = VpaSUL(model_under_learning, include_top=False, check_balance=False) From f2c79490c81ef1873bdeba3922dd792817c3780e Mon Sep 17 00:00:00 2001 From: Moritz Pistauer Date: Mon, 30 Oct 2023 14:28:52 +0100 Subject: [PATCH 32/62] Create Function Signatures and Refactoring of VPA --- aalpy/automata/Sevpa.py | 96 +++++++- aalpy/automata/Vpa.py | 210 ++++++++---------- .../deterministic/ClassificationTree.py | 5 +- .../deterministic/CounterExampleProcessing.py | 2 +- aalpy/utils/BenchmarkVpaModels.py | 129 +++++++---- 5 files changed, 262 insertions(+), 180 deletions(-) diff --git a/aalpy/automata/Sevpa.py b/aalpy/automata/Sevpa.py index d21aa933..c8075ccb 100644 --- a/aalpy/automata/Sevpa.py +++ b/aalpy/automata/Sevpa.py @@ -4,22 +4,46 @@ from aalpy.base import Automaton, AutomatonState +from typing import List, Dict + + class SevpaAlphabet: - def __init__(self, internal_alphabet: list, call_alphabet: list, return_alphabet: list, - exclusive_call_return_pairs: dict = None): + """ + The Alphabet of a 1-SEVPA. + + Attributes: + internal_alphabet (List[str]): Letters for internal transitions. + call_alphabet (List[str]): Letters for push transitions. + return_alphabet (List[str]): Letters for pop transitions. + exclusive_call_return_pairs (Dict[str, str]): A dictionary representing exclusive pairs + of call and return symbols. + """ + + def __init__(self, internal_alphabet: List[str], call_alphabet: List[str], return_alphabet: List[str], + exclusive_call_return_pairs: Dict[str, str] = None): self.internal_alphabet = internal_alphabet self.call_alphabet = call_alphabet self.return_alphabet = return_alphabet self.exclusive_call_return_pairs = exclusive_call_return_pairs - def get_merged_alphabet(self) -> list: + def get_merged_alphabet(self) -> List[str]: + """ + Get the merged alphabet, including internal, call, and return symbols. + + Returns: + List[str]: A list of all symbols in the alphabet. + """ alphabet = list() alphabet.extend(self.internal_alphabet) alphabet.extend(self.call_alphabet) alphabet.extend(self.return_alphabet) return alphabet - def __str__(self): + def __str__(self) -> str: + """ + Returns: + str: A string representation of the alphabet. + """ return f'Internal: {self.internal_alphabet} Call: {self.call_alphabet} Return: {self.return_alphabet}' @@ -27,7 +51,6 @@ class SevpaState(AutomatonState): """ Single state of a 1-SEVPA. """ - def __init__(self, state_id, is_accepting=False): super().__init__(state_id) self.transitions = defaultdict(list[SevpaTransition]) @@ -35,6 +58,16 @@ def __init__(self, state_id, is_accepting=False): class SevpaTransition: + """ + Represents a transition in a 1-SEVPA. + + Attributes: + start (SevpaState): The starting state of the transition. + target (SevpaState): The target state of the transition. + symbol: The symbol associated with the transition. + action: The action performed during the transition (push | pop | None). + stack_guard: The stack symbol to be pushed/popped. + """ def __init__(self, start: SevpaState, target: SevpaState, symbol, action, stack_guard=None): self.start = start self.target = target @@ -43,10 +76,17 @@ def __init__(self, start: SevpaState, target: SevpaState, symbol, action, stack_ self.stack_guard = stack_guard def __str__(self): + """ + Returns: + str: A string representation of the transition. + """ return f"{self.symbol}: {self.start.state_id} --> {self.target.state_id} | {self.action}: {self.stack_guard}" class Sevpa(Automaton): + """ + 1-Module Single Entry Visibly Pushdown Automaton. + """ empty = "_" def __init__(self, initial_state: SevpaState, states: list[SevpaState], input_alphabet: SevpaAlphabet): @@ -71,6 +111,15 @@ def reset_to_initial(self): return self.current_state.is_accepting and self.stack[-1] == self.empty def step(self, letter): + """ + Perform a single step on the 1-SEVPA by transitioning with the given input letter. + + Args: + letter: A single input that is looked up in the transition table of the SevpaState. + + Returns: + bool: True if the reached state is an accepting state and the stack is empty, False otherwise. + """ if self.error_state_reached: return False @@ -120,7 +169,7 @@ def get_state_by_id(self, state_id) -> Union[SevpaState, None]: def execute_sequence(self, origin_state, seq): if origin_state.prefix != self.initial_state.prefix: - assert False, 'execute_sequance for Sevpa only is only supported from the initial state.' + assert False, 'execute_sequence for Sevpa only is only supported from the initial state.' self.reset_to_initial() self.current_state = origin_state return [self.step(s) for s in seq] @@ -169,18 +218,33 @@ def from_state_setup(state_setup: dict, init_state_id, input_alphabet: SevpaAlph init_state = states[init_state_id] return Sevpa(init_state, [state for state in states.values()], input_alphabet) - def transform_access_sequance(self, state=None, stack_content=None) -> list[str]: + def transform_access_string(self, state=None, stack_content=None) -> List[str]: + """ + Transform the access string by omitting redundant call and return letters, as well as internal letters. + This function creates the following word: + For every element in the stack (except the first element '_'): + - Append the state prefix from where the stack element was pushed + - Append the call letter + Append the state prefix from the state where you are calling this function from. + + Args: + state: The state from which the transformation is initiated (default: initial state). + stack_content: The content of the stack for transformation (default: Current Stack content). + + Returns: + List[str]: The transformed access string. + """ word = [] calling_state = self.initial_state if not state else state stack = self.stack if not stack_content else stack_content for index, stack_elem in enumerate(stack): - # skip the first element because it's the start of the stack '_ + # Skip the first element because it's the start of the stack '_' if index == 0: continue - from_state_id = stack_elem[0] # the corresponding state where the stack element got pushed from - call_letter = stack_elem[1] # the call letter that was pushed on the stack + from_state_id = stack_elem[0] # The corresponding state where the stack element was pushed from + call_letter = stack_elem[1] # The call letter that was pushed on the stack from_state = self.get_state_by_id(from_state_id) if from_state.prefix != (): word.extend(from_state.prefix) @@ -190,7 +254,19 @@ def transform_access_sequance(self, state=None, stack_content=None) -> list[str] @staticmethod def create_daisy_hypothesis(initial_state, alphabet): + """ + Create a Daisy Hypothesis 1-SEVPA using the given initial state and alphabet. + + This function creates self-loop transitions for the internal state on every internal letter. + Additionally, it creates self-loop transitions with a pop action for every call letter. + + Args: + initial_state (SevpaState): The initial state of the 1-SEVPA. + alphabet (SevpaAlphabet): The alphabet for the 1-SEVPA. + Returns: + Sevpa: The created 1-SEVPA with the specified initial state and alphabet. + """ for i in alphabet.internal_alphabet: trans = SevpaTransition(start=initial_state, target=initial_state, symbol=i, action=None) initial_state.transitions[i].append(trans) diff --git a/aalpy/automata/Vpa.py b/aalpy/automata/Vpa.py index e6c7c681..795a3297 100644 --- a/aalpy/automata/Vpa.py +++ b/aalpy/automata/Vpa.py @@ -1,13 +1,53 @@ from collections import defaultdict +from typing import List, Dict from aalpy.base import Automaton, AutomatonState -class VpaState(AutomatonState): +class VpaAlphabet: """ - Single state of a deterministic finite automaton. + The Alphabet of a VPA. + + Attributes: + internal_alphabet (List[str]): Letters for internal transitions. + call_alphabet (List[str]): Letters for push transitions. + return_alphabet (List[str]): Letters for pop transitions. + exclusive_call_return_pairs (Dict[str, str]): A dictionary representing exclusive pairs + of call and return symbols. """ + def __init__(self, internal_alphabet: List[str], call_alphabet: List[str], return_alphabet: List[str], + exclusive_call_return_pairs: Dict[str, str] = None): + self.internal_alphabet = internal_alphabet + self.call_alphabet = call_alphabet + self.return_alphabet = return_alphabet + self.exclusive_call_return_pairs = exclusive_call_return_pairs + + def get_merged_alphabet(self) -> List[str]: + """ + Get the merged alphabet, including internal, call, and return symbols. + + Returns: + List[str]: A list of all symbols in the alphabet. + """ + alphabet = list() + alphabet.extend(self.internal_alphabet) + alphabet.extend(self.call_alphabet) + alphabet.extend(self.return_alphabet) + return alphabet + + def __str__(self) -> str: + """ + Returns: + str: A string representation of the alphabet. + """ + return f'Internal: {self.internal_alphabet} Call: {self.call_alphabet} Return: {self.return_alphabet}' + + +class VpaState(AutomatonState): + """ + Single state of a VPA. + """ def __init__(self, state_id, is_accepting=False): super().__init__(state_id) self.transitions = defaultdict(list) @@ -15,6 +55,16 @@ def __init__(self, state_id, is_accepting=False): class VpaTransition: + """ + Represents a transition in a VPA. + + Attributes: + start (VpaState): The starting state of the transition. + target (VpaState): The target state of the transition. + symbol: The symbol associated with the transition. + action: The action performed during the transition (push | pop | None). + stack_guard: The stack symbol to be pushed/popped. + """ def __init__(self, start: VpaState, target: VpaState, symbol, action, stack_guard=None): self.start = start self.target = target @@ -27,20 +77,26 @@ def __str__(self): class Vpa(Automaton): + """ + Visibly Pushdown Automaton. + """ empty = "_" error_state = VpaState("ErrorSinkState", False) - def __init__(self, initial_state: VpaState, states, call_set, return_set, internal_set): + def __init__(self, initial_state: VpaState, states, input_alphabet: VpaAlphabet): super().__init__(initial_state, states) self.initial_state = initial_state self.states = states - self.call_set = call_set - self.return_set = return_set - self.internal_set = internal_set + self.input_alphabet = input_alphabet self.current_state = None self.call_balance = 0 self.stack = [] + # alphabet sets for faster inclusion checks (as in VpaAlphabet we have lists, for reproducibility) + self.internal_set = set(self.input_alphabet.internal_alphabet) + self.call_set = set(self.input_alphabet.call_alphabet) + self.return_set = set(self.input_alphabet.return_alphabet) + def reset_to_initial(self): super().reset_to_initial() self.reset() @@ -60,8 +116,6 @@ def pop(self): def possible(self, letter): """ Checks if a certain step on the automaton is possible - - TODO: Adaptation for Stack content ? """ if self.current_state == Vpa.error_state: return True @@ -78,7 +132,6 @@ def possible(self, letter): possible_trans.append(t) else: assert False and print(f'Letter {letter} is not part of any alphabet') - # trans = [t for t in transitions if t.stack_guard is None or self.top() == t.stack_guard] assert len(possible_trans) < 2 if len(possible_trans) == 0: return False @@ -87,6 +140,15 @@ def possible(self, letter): return False def step(self, letter): + """ + Perform a single step on the VPA by transitioning with the given input letter. + + Args: + letter: A single input that is looked up in the transition table of the VpaState. + + Returns: + bool: True if the reached state is an accepting state and the stack is empty, False otherwise. + """ if self.current_state == Vpa.error_state: return False if not self.possible(letter): @@ -121,25 +183,6 @@ def step(self, letter): return self.current_state.is_accepting and self.top() == self.empty - # def compute_output_seq(self, state, sequence): - # if not sequence: - # return [state.is_accepting] - # return super(Dfa, self).compute_output_seq(state, sequence) - - def get_input_alphabet(self) -> list: - alphabet_list = list() - alphabet_list.append(self.call_set) - alphabet_list.append(self.return_set) - alphabet_list.append(self.internal_set) - return alphabet_list - - def get_input_alphabet_merged(self) -> list: - alphabet = list() - alphabet.extend(self.call_set) - alphabet.extend(self.return_set) - alphabet.extend(self.internal_set) - return alphabet - def to_state_setup(self): state_setup_dict = {} @@ -154,26 +197,33 @@ def to_state_setup(self): return state_setup_dict @staticmethod - def from_state_setup(state_setup: dict, init_state_id, call_set, return_set, internal_set): + def from_state_setup(state_setup: dict, init_state_id: str, input_alphabet: VpaAlphabet): """ - First state in the state setup is the initial state. - Example state setup: + Create a VPA from a state setup. + + Example state setup: state_setup = { - "a": (True, {"x": ("b1",PUSH), "y": ("a", NONE)}), - "b1": (False, {"x": ("b2", PUSH), "y": "a"}), - "b2": (True, {"x": "b3", "y": "a"}), - "b3": (False, {"x": "b4", "y": "a"}), - "b4": (False, {"x": "c", "y": "a"}), - "c": (True, {"x": "a", "y": "a"}), - } + "q0": (False, {"(": [("q1", 'push', "(")], + "[": [("q1", 'push', "[")], # exclude empty seq + }), + "q1": (False, {"(": [("q1", 'push', "(")], + "[": [("q1", 'push', "[")], + ")": [("q2", 'pop', "(")], + "]": [("q2", 'pop', "[")]}), + "q2": (True, { + ")": [("q2", 'pop', "(")], + "]": [("q2", 'pop', "[")] + }), Args: - - state_setup: map from state_id to tuple(output and transitions_dict) + state_setup (dict): A dictionary mapping from state IDs to tuples containing + (is_accepting: bool, transitions_dict: dict), where transitions_dict maps input symbols to + lists of tuples (target_state_id, action, stack_guard). + init_state_id (str): The state ID for the initial state of the VPA. + input_alphabet (VpaAlphabet): The alphabet for the VPA. Returns: - - PDA + Vpa: The constructed Variable Pushdown Automaton. """ # state_setup should map from state_id to tuple(is_accepting and transitions_dict) @@ -186,7 +236,6 @@ def from_state_setup(state_setup: dict, init_state_id, call_set, return_set, int continue for _input, trans_spec in state_setup[state_id][1].items(): for (target_state_id, action, stack_guard) in trans_spec: - # action = Action[action_string] trans = VpaTransition(start=state, target=states[target_state_id], symbol=_input, action=action, stack_guard=stack_guard) state.transitions[_input].append(trans) @@ -195,78 +244,5 @@ def from_state_setup(state_setup: dict, init_state_id, call_set, return_set, int # states to list states = [state for state in states.values()] - vpa = Vpa(init_state, states, call_set, return_set, internal_set) + vpa = Vpa(init_state, states, input_alphabet) return vpa - - -def generate_data_from_pda(automaton, num_examples, lens=None, classify_states=False, stack_limit=None, - break_on_impossible=False, possible_prob=0.75): - import random - from itertools import product - - input_al = automaton.get_input_alphabet() - - if lens is None: - lens = list(range(1, 15)) - - sum_lens = sum(lens) - # key is length, value is number of examples for said length - ex_per_len = dict() - - additional_seq = 0 - for l in lens: - ex_per_len[l] = int(num_examples * (l / sum_lens)) + 1 - if ex_per_len[l] > pow(len(input_al), l): - additional_seq += ex_per_len[l] - pow(len(input_al), l) - ex_per_len[l] = 'comb' - - additional_seq = additional_seq // len([i for i in ex_per_len.values() if i != 'comb']) - - training_data = [] - for l in ex_per_len.keys(): - seqs = [] - if ex_per_len[l] == 'comb': - - seqs = list(product(input_al, repeat=l)) - for seq in seqs: - - out = automaton.reset_to_initial() - nr_steps = 0 - for inp in seq: - if automaton.possible(inp) or not break_on_impossible: - nr_steps += 1 - if stack_limit and len(automaton.stack) > stack_limit: - break - if break_on_impossible and not automaton.possible(inp): - break - out = automaton.step(inp) - seq = seq[:nr_steps] - training_data.append((tuple(seq), out if not classify_states else automaton.current_state.state_id)) - - else: - for _ in range(ex_per_len[l] + additional_seq): - # seq = [random.choice(input_al) for _ in range(l)] - out = automaton.reset() - nr_steps = 0 - seq = [] - for i in range(l): - possible_inp = [inp for inp in input_al if automaton.possible(inp)] - if len(possible_inp) == 0: - inp = random.choice(input_al) - else: - if random.random() <= possible_prob: - inp = random.choice(possible_inp) - else: - inp = random.choice(input_al) - seq.append(inp) - if automaton.possible(inp) or not break_on_impossible: - nr_steps += 1 - if stack_limit and len(automaton.stack) > stack_limit: - break - if break_on_impossible and not automaton.possible(inp): - break - out = automaton.step(inp) - seq = seq[:nr_steps] - training_data.append((tuple(seq), out)) - - return training_data diff --git a/aalpy/learning_algs/deterministic/ClassificationTree.py b/aalpy/learning_algs/deterministic/ClassificationTree.py index 12d9bda6..7bce42f5 100644 --- a/aalpy/learning_algs/deterministic/ClassificationTree.py +++ b/aalpy/learning_algs/deterministic/ClassificationTree.py @@ -305,7 +305,6 @@ def update(self, cex: tuple, hypothesis): s_i = self._sift(cex[:i]).access_string hypothesis.execute_sequence(hypothesis.initial_state, cex[:i]) s_star_i = hypothesis.current_state.prefix - if s_i != s_star_i: j = i d = self._least_common_ancestor(s_i, s_star_i) @@ -315,7 +314,7 @@ def update(self, cex: tuple, hypothesis): d = [] assert j is not None and d is not None - # TODO adapt for VPAs + hypothesis.execute_sequence(hypothesis.initial_state, cex[:j - 1] or tuple()) self._insert_new_leaf(discriminator=(cex[j - 1], *d), old_leaf_access_string=hypothesis.current_state.prefix, @@ -354,7 +353,7 @@ def update_rs(self, cex: tuple, hypothesis): # get discriminator and new_leaf_access_string if self.automaton_type == 'vpa': - discriminator = (tuple(hypothesis.transform_access_sequance()), tuple(v)) + discriminator = (tuple(hypothesis.transform_access_string()), tuple(v)) if a in self.alphabet.internal_alphabet: new_leaf_access_string = (*u_state.prefix, a) diff --git a/aalpy/learning_algs/deterministic/CounterExampleProcessing.py b/aalpy/learning_algs/deterministic/CounterExampleProcessing.py index 16903a25..345bb367 100644 --- a/aalpy/learning_algs/deterministic/CounterExampleProcessing.py +++ b/aalpy/learning_algs/deterministic/CounterExampleProcessing.py @@ -81,7 +81,7 @@ def rs_cex_processing(sul: SUL, cex: tuple, hypothesis, suffix_closedness=True, if not is_vpa: s_bracket = hypothesis.current_state.prefix else: - s_bracket = tuple(hypothesis.transform_access_sequance(hypothesis.current_state)) + s_bracket = tuple(hypothesis.transform_access_string(hypothesis.current_state)) d = tuple(cex_input[mid:]) mq = sul.query(s_bracket + d) diff --git a/aalpy/utils/BenchmarkVpaModels.py b/aalpy/utils/BenchmarkVpaModels.py index 1a18e079..90163c4f 100644 --- a/aalpy/utils/BenchmarkVpaModels.py +++ b/aalpy/utils/BenchmarkVpaModels.py @@ -1,26 +1,30 @@ -from aalpy.automata.Vpa import Vpa +from aalpy.automata.Vpa import Vpa, VpaAlphabet def vpa_for_L1(): # we always ensure that n >= 1 - call_set = {'a'} - return_set = {'b'} - internal_set = {} + call_set = ['a'] + return_set = ['b'] + internal_set = [] + + input_alphabet = VpaAlphabet(internal_alphabet=internal_set, call_alphabet=call_set, return_alphabet=return_set) state_setup = { "q0": (False, {"a": [("q1", 'push', "a")], "b": [(Vpa.error_state.state_id, None, None)]}), "q1": (False, {"a": [("q1", 'push', "a")], "b": [("q2", 'pop', "a")]}), "q2": (True, {"a": [(Vpa.error_state.state_id, None, None)], "b": [("q2", 'pop', "a")]}), } - vpa = Vpa.from_state_setup(state_setup, "q0", call_set, return_set, internal_set) + vpa = Vpa.from_state_setup(state_setup, "q0", input_alphabet) return vpa def vpa_for_L2(): - call_set = {'a', 'b'} - return_set = {'c', 'd'} - internal_set = {} + call_set = ['a', 'b'] + return_set = ['c', 'd'] + internal_set = [] + + input_alphabet = VpaAlphabet(internal_alphabet=internal_set, call_alphabet=call_set, return_alphabet=return_set) state_setup = { "q0": (False, {"a": [("q1", 'push', "a")], "b": [("q1", 'push', "b")], @@ -34,14 +38,16 @@ def vpa_for_L2(): "c": [("q2", 'pop', "a"), ("q2", 'pop', "b")], "d": [("q2", 'pop', "a"), ("q2", 'pop', "b")]}), } - vpa = Vpa.from_state_setup(state_setup, "q0", call_set, return_set, internal_set) + vpa = Vpa.from_state_setup(state_setup, "q0", input_alphabet) return vpa def vpa_for_L3(): - call_set = {'a', 'c', 'b', 'd'} - return_set = {'e', 'g', 'f', 'h'} - internal_set = {} + call_set = ['a', 'c', 'b', 'd'] + return_set = ['e', 'g', 'f', 'h'] + internal_set = [] + + input_alphabet = VpaAlphabet(internal_alphabet=internal_set, call_alphabet=call_set, return_alphabet=return_set) state_setup = { "q0": (False, {"a": [("q0a", 'push', "a")], @@ -61,14 +67,16 @@ def vpa_for_L3(): "q2": (True, {"e": [("q2e", 'pop', "b"), ("q2e", 'pop', "d")], "g": [("q2g", 'pop', "b"), ("q2g", 'pop', "d")]}) } - vpa = Vpa.from_state_setup(state_setup, "q0", call_set, return_set, internal_set) + vpa = Vpa.from_state_setup(state_setup, "q0", input_alphabet) return vpa def vpa_for_L4(): - call_set = {'a', 'b'} - return_set = {'c', 'd'} - internal_set = {} + call_set = ['a', 'b'] + return_set = ['c', 'd'] + internal_set = [] + + input_alphabet = VpaAlphabet(internal_alphabet=internal_set, call_alphabet=call_set, return_alphabet=return_set) state_setup = { "q0": (False, {"a": [("q01", 'push', "a")], "b": [(Vpa.error_state.state_id, None, None)]}), @@ -80,14 +88,16 @@ def vpa_for_L4(): "q21": (False, {"d": [("q2", 'pop', "a")]}), "q2": (True, {"c": [("q21", 'pop', "b")]}), } - vpa = Vpa.from_state_setup(state_setup, "q0", call_set, return_set, internal_set) + vpa = Vpa.from_state_setup(state_setup, "q0", input_alphabet) return vpa def vpa_for_L5(): - call_set = {'a', 'b', 'c'} - return_set = {'d', 'e', 'f'} - internal_set = {} + call_set = ['a', 'b', 'c'] + return_set = ['d', 'e', 'f'] + internal_set = [] + + input_alphabet = VpaAlphabet(internal_alphabet=internal_set, call_alphabet=call_set, return_alphabet=return_set) state_setup = { "q0": (False, {"a": [("q01", 'push', "a")]}), @@ -101,16 +111,18 @@ def vpa_for_L5(): "q22": (False, {"f": [("q2", 'pop', "a")]}), "q2": (True, {"d": [("q21", 'pop', "c")]}), } - vpa = Vpa.from_state_setup(state_setup, "q0", call_set, return_set, internal_set) + vpa = Vpa.from_state_setup(state_setup, "q0", input_alphabet) return vpa def vpa_for_L7(): # Dyck order 2 - call_set = {'(', '['} - return_set = {')', ']'} - internal_set = {} + call_set = ['(', '['] + return_set = [')', ']'] + internal_set = [] + + input_alphabet = VpaAlphabet(internal_alphabet=internal_set, call_alphabet=call_set, return_alphabet=return_set) state_setup = { "q0": (False, {"(": [("q1", 'push', '(')], @@ -122,16 +134,18 @@ def vpa_for_L7(): "]": [("q1", 'pop', "[")] }), } - vpa = Vpa.from_state_setup(state_setup, "q0", call_set, return_set, internal_set) + vpa = Vpa.from_state_setup(state_setup, "q0", input_alphabet) return vpa def vpa_for_L8(): # Dyck order 3 - call_set = {'(', '[', '{'} - return_set = {')', ']', '}'} - internal_set = {} + call_set = ['(', '[', '{'] + return_set = [')', ']', '}'] + internal_set = [] + + input_alphabet = VpaAlphabet(internal_alphabet=internal_set, call_alphabet=call_set, return_alphabet=return_set) state_setup = { "q0": (False, {"(": [("q1", 'push', '(')], @@ -146,16 +160,18 @@ def vpa_for_L8(): "}": [("q1", 'pop', "{")], }), } - vpa = Vpa.from_state_setup(state_setup, "q0", call_set, return_set, internal_set) + vpa = Vpa.from_state_setup(state_setup, "q0", input_alphabet) return vpa def vpa_for_L9(): # Dyck order 4 - call_set = {'(', '[', '{', '<'} - return_set = {')', ']', '}', '>'} - internal_set = {} + call_set = ['(', '[', '{', '<'] + return_set = [')', ']', '}', '>'] + internal_set = [] + + input_alphabet = VpaAlphabet(internal_alphabet=internal_set, call_alphabet=call_set, return_alphabet=return_set) state_setup = { "q0": (False, {"(": [("q1", 'push', '(')], @@ -173,16 +189,18 @@ def vpa_for_L9(): ">": [("q1", 'pop', ">")], }), } - vpa = Vpa.from_state_setup(state_setup, "q0", call_set, return_set, internal_set) + vpa = Vpa.from_state_setup(state_setup, "q0", input_alphabet) return vpa def vpa_for_L10(): # RE Dyck order 1 - call_set = {'a'} - return_set = {'v'} - internal_set = {'b', 'c', 'd', ' e', 'w', 'x', 'y', 'z'} + call_set = ['a'] + return_set = ['v'] + internal_set = ['b', 'c', 'd', 'e', 'w', 'x', 'y', 'z'] + + input_alphabet = VpaAlphabet(internal_alphabet=internal_set, call_alphabet=call_set, return_alphabet=return_set) state_setup = { "q0": (False, {"a": [("qa", 'push', "a")], @@ -202,7 +220,7 @@ def vpa_for_L10(): "qx": (False, {"y": [("qy", None, None)]}), "qy": (False, {"z": [("q1", None, None)]}) } - vpa = Vpa.from_state_setup(state_setup, "q0", call_set, return_set, internal_set) + vpa = Vpa.from_state_setup(state_setup, "q0", input_alphabet) return vpa @@ -213,6 +231,8 @@ def vpa_for_L11(): return_set = ['r1', 'r2'] internal_set = ['i1', 'i2'] + input_alphabet = VpaAlphabet(internal_alphabet=internal_set, call_alphabet=call_set, return_alphabet=return_set) + state_setup = { "q0": (False, {"c1": [("qa", 'push', "c1")], "c2": [("q1", 'push', "c2")], @@ -225,7 +245,7 @@ def vpa_for_L11(): "r2": [("q1", 'pop', "c1"), ("q1", 'pop', "c2")]}), "qd": (False, {"i2": [("q1", None, None)]}) } - vpa = Vpa.from_state_setup(state_setup, "q0", call_set, return_set, internal_set) + vpa = Vpa.from_state_setup(state_setup, "q0", input_alphabet) return vpa @@ -236,6 +256,8 @@ def vpa_for_L12(): return_set = [')', ']'] internal_set = [] + input_alphabet = VpaAlphabet(internal_alphabet=internal_set, call_alphabet=call_set, return_alphabet=return_set) + state_setup = { "q0": (False, {"(": [("q1", 'push', "(")], "[": [("q1", 'push', "[")], # exclude empty seq @@ -249,16 +271,18 @@ def vpa_for_L12(): "]": [("q2", 'pop', "[")] }), } - vpa = Vpa.from_state_setup(state_setup, "q0", call_set, return_set, internal_set) + vpa = Vpa.from_state_setup(state_setup, "q0", input_alphabet) return vpa def vpa_for_L13(): # Dyck order 1 - call_set = {'('} - return_set = {')'} - internal_set = {'a', 'b', 'c'} + call_set = ['('] + return_set = [')'] + internal_set = ['a', 'b', 'c'] + + input_alphabet = VpaAlphabet(internal_alphabet=internal_set, call_alphabet=call_set, return_alphabet=return_set) state_setup = { "q0": (False, {"(": [("q1", 'push', "(")], @@ -273,7 +297,7 @@ def vpa_for_L13(): "c": [("q1", None, None)] }), } - vpa = Vpa.from_state_setup(state_setup, "q0", call_set, return_set, internal_set) + vpa = Vpa.from_state_setup(state_setup, "q0", input_alphabet) return vpa @@ -284,6 +308,8 @@ def vpa_for_L14(): return_set = [')', ']'] internal_set = ['a', 'b', 'c'] + input_alphabet = VpaAlphabet(internal_alphabet=internal_set, call_alphabet=call_set, return_alphabet=return_set) + state_setup = { "q0": (False, {"(": [("q1", 'push', "(")], "[": [("q1", 'push', "[")], @@ -300,7 +326,7 @@ def vpa_for_L14(): "c": [("q1", None, None)] }), } - vpa = Vpa.from_state_setup(state_setup, "q0", call_set, return_set, internal_set) + vpa = Vpa.from_state_setup(state_setup, "q0", input_alphabet) return vpa @@ -311,6 +337,8 @@ def vpa_for_L15(): return_set = [')'] internal_set = ['a', 'b', 'c', 'd'] + input_alphabet = VpaAlphabet(internal_alphabet=internal_set, call_alphabet=call_set, return_alphabet=return_set) + state_setup = { "q0": (False, {"(": [("q1", 'push', "(")], "a": [("qa", None, None)], @@ -326,15 +354,18 @@ def vpa_for_L15(): "qb": (False, {"c": [("q1", None, None)], }) } - vpa = Vpa.from_state_setup(state_setup, "q0", call_set, return_set, internal_set) + vpa = Vpa.from_state_setup(state_setup, "q0", input_alphabet) return vpa def vpa_for_L16(): # just a testing language - call_set = {'a'} - return_set = {'b'} - internal_set = {} + + call_set = ['a'] + return_set = ['b'] + internal_set = [] + + input_alphabet = VpaAlphabet(internal_alphabet=internal_set, call_alphabet=call_set, return_alphabet=return_set) state_setup = { "q0": (False, {"a": [("q1", 'push', "$")]}), @@ -343,5 +374,5 @@ def vpa_for_L16(): }), "q2": (True, {}) } - vpa = Vpa.from_state_setup(state_setup, "q0", call_set, return_set, internal_set) + vpa = Vpa.from_state_setup(state_setup, "q0", input_alphabet) return vpa From 6fa45e3ab6b1a932e2b6ec5a4fba49b512990893 Mon Sep 17 00:00:00 2001 From: Edi Muskardin Date: Tue, 31 Oct 2023 10:03:14 +0100 Subject: [PATCH 33/62] update function call --- aalpy/learning_algs/deterministic/CounterExampleProcessing.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/aalpy/learning_algs/deterministic/CounterExampleProcessing.py b/aalpy/learning_algs/deterministic/CounterExampleProcessing.py index 155ef5b0..cf8b1742 100644 --- a/aalpy/learning_algs/deterministic/CounterExampleProcessing.py +++ b/aalpy/learning_algs/deterministic/CounterExampleProcessing.py @@ -127,7 +127,7 @@ def linear_cex_processing(sul: SUL, cex: tuple, hypothesis, suffix_closedness=Tr if not is_vpa: s_bracket = hypothesis.current_state.prefix else: - s_bracket = tuple(hypothesis.transform_access_sequance(hypothesis.current_state)) + s_bracket = tuple(hypothesis.transform_access_string(hypothesis.current_state)) sul_out = sul.query(s_bracket + suffix)[-1] From f32291c3820328e3082041386923b3a0a1a83cf8 Mon Sep 17 00:00:00 2001 From: Edi Muskardin Date: Tue, 31 Oct 2023 10:06:58 +0100 Subject: [PATCH 34/62] add linear cex processing to Lstar --- aalpy/learning_algs/deterministic/LStar.py | 10 +++++++--- det_cex_processing_test.py | 7 ++++--- 2 files changed, 11 insertions(+), 6 deletions(-) diff --git a/aalpy/learning_algs/deterministic/LStar.py b/aalpy/learning_algs/deterministic/LStar.py index eaa0f6ef..3861a796 100644 --- a/aalpy/learning_algs/deterministic/LStar.py +++ b/aalpy/learning_algs/deterministic/LStar.py @@ -3,11 +3,11 @@ from aalpy.base import Oracle, SUL from aalpy.utils.HelperFunctions import extend_set, print_learning_info, print_observation_table, all_prefixes from .CounterExampleProcessing import longest_prefix_cex_processing, rs_cex_processing, \ - counterexample_successfully_processed + counterexample_successfully_processed, linear_cex_processing from .ObservationTable import ObservationTable from ...base.SUL import CacheSUL -counterexample_processing_strategy = [None, 'rs', 'longest_prefix'] +counterexample_processing_strategy = [None, 'rs', 'longest_prefix', 'linear_fwd', 'linear_bwd'] closedness_options = ['suffix_all', 'suffix_single'] print_options = [0, 1, 2, 3] @@ -147,8 +147,12 @@ def run_Lstar(alphabet: list, sul: SUL, eq_oracle: Oracle, automaton_type, sampl elif cex_processing == 'longest_prefix': cex_suffixes = longest_prefix_cex_processing(observation_table.S + list(observation_table.s_dot_a()), cex, closedness='suffix') - else: + elif cex_processing == 'rs': cex_suffixes = rs_cex_processing(sul, cex, hypothesis, e_set_suffix_closed, closedness='suffix') + else: + direction = cex_processing[-3:] + cex_suffixes = linear_cex_processing(sul, cex, hypothesis, e_set_suffix_closed, + direction=direction, closedness='suffix') added_suffixes = extend_set(observation_table.E, cex_suffixes) observation_table.update_obs_table(e_set=added_suffixes) diff --git a/det_cex_processing_test.py b/det_cex_processing_test.py index fa29d55d..0059c0ff 100644 --- a/det_cex_processing_test.py +++ b/det_cex_processing_test.py @@ -1,7 +1,7 @@ -from aalpy.utils import generate_random_deterministic_automata +from aalpy.utils import generate_random_deterministic_automata, bisimilar from aalpy.SULs import MealySUL from aalpy.oracles import RandomWMethodEqOracle -from aalpy.learning_algs import run_KV +from aalpy.learning_algs import run_KV, run_Lstar for x in ['linear_fwd', 'linear_bwd']: for i in range(100): @@ -18,4 +18,5 @@ # select any of the oracles eq_oracle = RandomWMethodEqOracle(input_alphabet, sul, walks_per_state=10, walk_len=20) - learned_model = run_KV(input_alphabet, sul, eq_oracle, model_type, cex_processing=x, print_level=0) + learned_model = run_Lstar(input_alphabet, sul, eq_oracle, model_type, cex_processing=x, print_level=0) + assert bisimilar(random_model, learned_model) From f176fcfa6e3639f9c6e2e8a05c72223e46e9d7b2 Mon Sep 17 00:00:00 2001 From: Moritz Pistauer Date: Thu, 2 Nov 2023 12:39:15 +0100 Subject: [PATCH 35/62] exponential_fwd cex processing --- .../deterministic/ClassificationTree.py | 10 +++- .../deterministic/CounterExampleProcessing.py | 51 +++++++++++++++++-- aalpy/learning_algs/deterministic/KV.py | 2 +- 3 files changed, 56 insertions(+), 7 deletions(-) diff --git a/aalpy/learning_algs/deterministic/ClassificationTree.py b/aalpy/learning_algs/deterministic/ClassificationTree.py index cc6eedc0..bfdeb770 100644 --- a/aalpy/learning_algs/deterministic/ClassificationTree.py +++ b/aalpy/learning_algs/deterministic/ClassificationTree.py @@ -4,7 +4,8 @@ from aalpy.automata import DfaState, Dfa, MealyState, MealyMachine, MooreState, MooreMachine, \ SevpaAlphabet, SevpaState, SevpaTransition, Sevpa from aalpy.base import SUL -from aalpy.learning_algs.deterministic.CounterExampleProcessing import rs_cex_processing, linear_cex_processing +from aalpy.learning_algs.deterministic.CounterExampleProcessing import rs_cex_processing, linear_cex_processing, \ + exponential_cex_processing automaton_class = {'dfa': Dfa, 'mealy': MealyMachine, 'moore': MooreMachine} @@ -344,7 +345,12 @@ def process_counterexample(self, cex: tuple, hypothesis, cex_processing_fun): direction = cex_processing_fun[-3:] v = linear_cex_processing(self.sul, cex, hypothesis, is_vpa=self.automaton_type == 'vpa', direction=direction, suffix_closedness=False)[0] - if cex_processing_fun == 'rs': + elif 'exponential' in cex_processing_fun: + print(f'\nCex is {cex}') + direction = cex_processing_fun[-3:] + v = exponential_cex_processing(self.sul, cex, hypothesis, is_vpa=self.automaton_type == 'vpa', + direction=direction, suffix_closedness=False)[0] + elif cex_processing_fun == 'rs': v = rs_cex_processing(self.sul, cex, hypothesis, is_vpa=self.automaton_type == 'vpa', suffix_closedness=False)[0] diff --git a/aalpy/learning_algs/deterministic/CounterExampleProcessing.py b/aalpy/learning_algs/deterministic/CounterExampleProcessing.py index cf8b1742..b3024daf 100644 --- a/aalpy/learning_algs/deterministic/CounterExampleProcessing.py +++ b/aalpy/learning_algs/deterministic/CounterExampleProcessing.py @@ -43,11 +43,14 @@ def longest_prefix_cex_processing(s_union_s_dot_a: list, cex: tuple, closedness= return suffixes -def rs_cex_processing(sul: SUL, cex: tuple, hypothesis, suffix_closedness=True, closedness='suffix', is_vpa=False): +def rs_cex_processing(sul: SUL, cex: tuple, hypothesis, suffix_closedness=True, closedness='suffix', is_vpa=False, + lower=0, upper=0): """Riverst-Schapire counter example processing. Args: + upper: upper boarder for cex (from preprocessing) + lower: lower boarder for cex (from preprocessing) sul: system under learning cex: found counterexample hypothesis: hypothesis on which counterexample was found @@ -65,8 +68,11 @@ def rs_cex_processing(sul: SUL, cex: tuple, hypothesis, suffix_closedness=True, cex_out = sul.query(cex) cex_input = list(cex) - lower = 1 - upper = len(cex_input) - 2 + if lower == 0: + lower = 1 + + if upper == 0: + upper = len(cex_input) - 2 while True: hypothesis.reset_to_initial() @@ -156,4 +162,41 @@ def linear_cex_processing(sul: SUL, cex: tuple, hypothesis, suffix_closedness=Tr def exponential_cex_processing(sul: SUL, cex: tuple, hypothesis, suffix_closedness=True, closedness='suffix', direction='fwd', is_vpa=False): assert direction in {'fwd', 'bwd'} - pass + + direction = 'fwd' + + cex_out = sul.query(cex) + + bp = 1 + bp_recent = 0 + while True: + if bp > len(cex): + bp = len(cex) + break + prefix = cex[:bp] + suffix = cex[bp:] + assert cex == prefix + suffix + + hypothesis.reset_to_initial() + hypothesis.execute_sequence(hypothesis.initial_state, prefix) + + if not is_vpa: + s_bracket = hypothesis.current_state.prefix + else: + s_bracket = tuple(hypothesis.transform_access_string(hypothesis.current_state)) + + sul_out = sul.query(s_bracket + suffix) + + if sul_out[-1] != cex_out[-1]: + break + + bp_recent = bp + bp *= 2 + + if (bp - bp_recent) == 1: + return [suffix] + else: + return rs_cex_processing(sul, cex, hypothesis, suffix_closedness, closedness, is_vpa, lower=bp_recent) + + + diff --git a/aalpy/learning_algs/deterministic/KV.py b/aalpy/learning_algs/deterministic/KV.py index b0e208d0..67dd3b74 100644 --- a/aalpy/learning_algs/deterministic/KV.py +++ b/aalpy/learning_algs/deterministic/KV.py @@ -10,7 +10,7 @@ from ...base.SUL import CacheSUL print_options = [0, 1, 2, 3] -counterexample_processing_strategy = ['rs', 'linear_fwd', 'linear_bwd'] +counterexample_processing_strategy = ['rs', 'linear_fwd', 'linear_bwd', 'exponential_fwd', 'exponential_bwd'] automaton_class = {'dfa': Dfa, 'mealy': MealyMachine, 'moore': MooreMachine, 'vpa': Sevpa} From 345c070913b360389772b908c294c34a19b1968d Mon Sep 17 00:00:00 2001 From: Moritz Pistauer Date: Thu, 2 Nov 2023 22:30:45 +0100 Subject: [PATCH 36/62] Random Word generator. --- aalpy/automata/Sevpa.py | 77 ++++++++++++++++++ aalpy/automata/Vpa.py | 79 +++++++++++++++++++ .../deterministic/ClassificationTree.py | 1 - test_main.py | 48 +++++++++++ 4 files changed, 204 insertions(+), 1 deletion(-) diff --git a/aalpy/automata/Sevpa.py b/aalpy/automata/Sevpa.py index c8075ccb..6ee929e3 100644 --- a/aalpy/automata/Sevpa.py +++ b/aalpy/automata/Sevpa.py @@ -1,3 +1,4 @@ +import random from collections import defaultdict from typing import Union @@ -278,3 +279,79 @@ def create_daisy_hypothesis(initial_state, alphabet): initial_state.transitions[r].append(trans) return Sevpa(initial_state, [initial_state], alphabet) + + def gen_random_accepting_word(self, return_letter_prob: float = 0.0, call_letter_prob: float = 0.0, + early_finish: bool = True): + """ + Create a random word that gets accepted by the automaton. + + Args: + + Returns: + """ + assert return_letter_prob + call_letter_prob <= 1.0 + word = [] + if return_letter_prob == 0.0 and call_letter_prob == 0.0: + return_letter_prob = 0.34 + call_letter_prob = 0.33 + elif return_letter_prob == 0.0 and call_letter_prob != 0.0: + return_letter_prob = (1.0 - call_letter_prob) / 2 + elif return_letter_prob != 0.0 and call_letter_prob == 0.0: + call_letter_prob = (1.0 - return_letter_prob) / 2 + + if len(self.input_alphabet.internal_alphabet) != 0: + internal_letter_prob = 1.0 - return_letter_prob - call_letter_prob + else: + internal_letter_prob = 0.0 + if return_letter_prob == 0.0 and call_letter_prob == 0.0: + return_letter_prob = 0.5 + call_letter_prob = 0.5 + elif return_letter_prob == 0.0 and call_letter_prob != 0.0: + return_letter_prob = (1.0 - call_letter_prob) + elif return_letter_prob != 0.0 and call_letter_prob == 0.0: + call_letter_prob = (1.0 - return_letter_prob) + + assert (call_letter_prob + return_letter_prob + internal_letter_prob) == 1.0 + + call_letter_boarder = call_letter_prob + return_letter_boarder = call_letter_boarder + return_letter_prob + internal_letter_boarder = return_letter_boarder + internal_letter_prob + + self.reset_to_initial() + while True: + letter_type = random.uniform(0.0, 1.0) + if 0.0 <= letter_type <= call_letter_boarder: + possible_letters = self.input_alphabet.call_alphabet + elif call_letter_boarder < letter_type <= return_letter_boarder: + # skip return letters if stack is empty or if the word is empty + if self.stack[-1] == self.empty or word == []: + continue + possible_letters = self.input_alphabet.return_alphabet + elif return_letter_boarder < letter_type <= internal_letter_boarder: + possible_letters = self.input_alphabet.internal_alphabet + else: + assert False + + assert len(possible_letters) > 0 + + letter = '' + if early_finish: + for l in possible_letters: + for transition in self.current_state.transitions[l]: + if transition.target.is_accepting: + letter = l + break + break + if letter == '': + random_trans_letter_index = random.randint(0, len(possible_letters) - 1) + letter = possible_letters[random_trans_letter_index] + self.step(letter) + if not self.error_state_reached: + word.append(letter) + else: + self.execute_sequence(self.initial_state, word) + + if self.current_state.is_accepting and self.stack[-1] == self.empty: + break + + return word diff --git a/aalpy/automata/Vpa.py b/aalpy/automata/Vpa.py index 795a3297..6f1e5944 100644 --- a/aalpy/automata/Vpa.py +++ b/aalpy/automata/Vpa.py @@ -1,3 +1,4 @@ +import random from collections import defaultdict from typing import List, Dict @@ -246,3 +247,81 @@ def from_state_setup(state_setup: dict, init_state_id: str, input_alphabet: VpaA vpa = Vpa(init_state, states, input_alphabet) return vpa + + + def gen_random_accepting_word(self, return_letter_prob: float = 0.0, call_letter_prob: float = 0.0, + early_finish: bool = True): + """ + Create a random word that gets accepted by the automaton. + + Args: + + Returns: + """ + assert return_letter_prob + call_letter_prob <= 1.0 + word = [] + if return_letter_prob == 0.0 and call_letter_prob == 0.0: + return_letter_prob = 0.34 + call_letter_prob = 0.33 + elif return_letter_prob == 0.0 and call_letter_prob != 0.0: + return_letter_prob = (1.0 - call_letter_prob) / 2 + elif return_letter_prob != 0.0 and call_letter_prob == 0.0: + call_letter_prob = (1.0 - return_letter_prob) / 2 + + if len(self.input_alphabet.internal_alphabet) != 0: + internal_letter_prob = 1.0 - return_letter_prob - call_letter_prob + else: + internal_letter_prob = 0.0 + if return_letter_prob == 0.0 and call_letter_prob == 0.0: + return_letter_prob = 0.5 + call_letter_prob = 0.5 + elif return_letter_prob == 0.0 and call_letter_prob != 0.0: + return_letter_prob = (1.0 - call_letter_prob) + elif return_letter_prob != 0.0 and call_letter_prob == 0.0: + call_letter_prob = (1.0 - return_letter_prob) + + assert (call_letter_prob + return_letter_prob + internal_letter_prob) == 1.0 + + call_letter_boarder = call_letter_prob + return_letter_boarder = call_letter_boarder + return_letter_prob + internal_letter_boarder = return_letter_boarder + internal_letter_prob + + self.reset_to_initial() + while True: + letter_type = random.uniform(0.0, 1.0) + if 0.0 <= letter_type <= call_letter_boarder: + possible_letters = self.input_alphabet.call_alphabet + elif call_letter_boarder < letter_type <= return_letter_boarder: + # skip return letters if stack is empty or if the word is empty + if self.stack[-1] == self.empty or word == []: + continue + possible_letters = self.input_alphabet.return_alphabet + elif return_letter_boarder < letter_type <= internal_letter_boarder: + possible_letters = self.input_alphabet.internal_alphabet + else: + assert False + + assert len(possible_letters) > 0 + + letter = '' + if early_finish: + for l in possible_letters: + for transition in self.current_state.transitions[l]: + if transition.target.is_accepting: + letter = l + break + break + if letter == '': + random_trans_letter_index = random.randint(0, len(possible_letters) - 1) + letter = possible_letters[random_trans_letter_index] + self.step(letter) + if not self.current_state == self.error_state: + word.append(letter) + else: + self.reset_to_initial() + self.execute_sequence(self.initial_state, word) + + if self.current_state.is_accepting and self.stack[-1] == self.empty: + break + + return word diff --git a/aalpy/learning_algs/deterministic/ClassificationTree.py b/aalpy/learning_algs/deterministic/ClassificationTree.py index bfdeb770..61a1bd2f 100644 --- a/aalpy/learning_algs/deterministic/ClassificationTree.py +++ b/aalpy/learning_algs/deterministic/ClassificationTree.py @@ -346,7 +346,6 @@ def process_counterexample(self, cex: tuple, hypothesis, cex_processing_fun): v = linear_cex_processing(self.sul, cex, hypothesis, is_vpa=self.automaton_type == 'vpa', direction=direction, suffix_closedness=False)[0] elif 'exponential' in cex_processing_fun: - print(f'\nCex is {cex}') direction = cex_processing_fun[-3:] v = exponential_cex_processing(self.sul, cex, hypothesis, is_vpa=self.automaton_type == 'vpa', direction=direction, suffix_closedness=False)[0] diff --git a/test_main.py b/test_main.py index d5c33049..a2ae3832 100644 --- a/test_main.py +++ b/test_main.py @@ -73,6 +73,54 @@ def test_on_random_svepa(): print_level=2, cex_processing='rs') +def test_random_word_gen(): + model_under_learning = vpa_for_L11() + model_under_learning.visualize('InitialModel') + + # Learn Model + alphabet = SevpaAlphabet(list(model_under_learning.internal_set), + list(model_under_learning.call_set), + list(model_under_learning.return_set)) + + sul = VpaSUL(model_under_learning, include_top=False, check_balance=False) + + eq_oracle = RandomWordEqOracle(alphabet=alphabet.get_merged_alphabet(), sul=sul, num_walks=100000) + model = run_KV(alphabet=alphabet, sul=sul, eq_oracle=eq_oracle, automaton_type='vpa', + print_level=2, cex_processing='exponential_fwd') + + sul_model = SevpaSUL(model, include_top=False, check_balance=False) + + total_len = 0 + for i in range(0, 100): + random_word = model_under_learning.gen_random_accepting_word(return_letter_prob=0.5) + total_len += len(random_word) + out_model = sul_model.query(random_word)[-1] + out_sul = sul.query(random_word)[-1] + assert out_model == out_sul and out_model + + print(f'All tests passed average word length: {total_len/100}') + + total_len = 0 + for i in range(0, 100): + random_word = model_under_learning.gen_random_accepting_word(return_letter_prob=0.5, early_finish=False) + total_len += len(random_word) + out_model = sul_model.query(random_word)[-1] + out_sul = sul.query(random_word)[-1] + assert out_model == out_sul and out_model + + print(f'All tests passed average word length: {total_len/100}') + + total_len = 0 + for i in range(0, 100): + random_word = model_under_learning.gen_random_accepting_word(early_finish=False) + total_len += len(random_word) + out_model = sul_model.query(random_word)[-1] + out_sul = sul.query(random_word)[-1] + assert out_model == out_sul and out_model + + print(f'All tests passed average word length: {total_len/100}') + + # test_arithmetic_expression() # import cProfile # pr = cProfile.Profile() From 4cef2d81ecb8e8cb1162b19aea9c7e85c6a09cc5 Mon Sep 17 00:00:00 2001 From: Moritz Pistauer Date: Fri, 3 Nov 2023 15:41:06 +0100 Subject: [PATCH 37/62] Random Word generator with BFS. --- aalpy/automata/Sevpa.py | 33 ++++++++++++++++++++++++++++++++- test_main.py | 1 - 2 files changed, 32 insertions(+), 2 deletions(-) diff --git a/aalpy/automata/Sevpa.py b/aalpy/automata/Sevpa.py index 6ee929e3..a58c14c8 100644 --- a/aalpy/automata/Sevpa.py +++ b/aalpy/automata/Sevpa.py @@ -1,5 +1,5 @@ import random -from collections import defaultdict +from collections import defaultdict, deque from typing import Union from aalpy.base import Automaton, AutomatonState @@ -280,6 +280,37 @@ def create_daisy_hypothesis(initial_state, alphabet): return Sevpa(initial_state, [initial_state], alphabet) + def gen_random_accepting_word_bfs(self, min_word_length: int = 0): + """ + Create a random word that gets accepted by the automaton with the breadth-first search approach. + + Args: + + Returns: + """ + self.reset_to_initial() + queue = deque() + shuffled_alphabet = self.input_alphabet.get_merged_alphabet() + random.shuffle(shuffled_alphabet) + for letter in shuffled_alphabet: + queue.append([letter]) + + while queue: + word = queue.popleft() + if len(word) >= min_word_length: + self.reset_to_initial() + self.execute_sequence(self.initial_state, word) + # skipping words that lead into the error state will also shorten growth of the queue + if self.error_state_reached: + continue + if self.current_state.is_accepting and self.stack[-1] == self.empty: + return word + shuffled_alphabet = self.input_alphabet.get_merged_alphabet() + random.shuffle(shuffled_alphabet) + for letter in shuffled_alphabet: + new_word = word + [letter] + queue.append(new_word) + def gen_random_accepting_word(self, return_letter_prob: float = 0.0, call_letter_prob: float = 0.0, early_finish: bool = True): """ diff --git a/test_main.py b/test_main.py index a2ae3832..c7ac0ef8 100644 --- a/test_main.py +++ b/test_main.py @@ -75,7 +75,6 @@ def test_on_random_svepa(): def test_random_word_gen(): model_under_learning = vpa_for_L11() - model_under_learning.visualize('InitialModel') # Learn Model alphabet = SevpaAlphabet(list(model_under_learning.internal_set), From 03f5c4da0f6a50aa4cfb36c482e3afec5fc4c7e7 Mon Sep 17 00:00:00 2001 From: Moritz Pistauer Date: Sat, 4 Nov 2023 15:21:49 +0100 Subject: [PATCH 38/62] Method to find error state --- aalpy/automata/Sevpa.py | 105 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 105 insertions(+) diff --git a/aalpy/automata/Sevpa.py b/aalpy/automata/Sevpa.py index a58c14c8..6441cf64 100644 --- a/aalpy/automata/Sevpa.py +++ b/aalpy/automata/Sevpa.py @@ -280,6 +280,111 @@ def create_daisy_hypothesis(initial_state, alphabet): return Sevpa(initial_state, [initial_state], alphabet) + def find_error_states(self): + """ + - if all transitions self loop to itself + - if the pop transitions from the corresponding stack symbol lead to the same state + - for example: + - all q2 transitions lead to q2 + - the pop transitions from the initial state which pop the q2+call-symbol from the stack lead to q2 as well + + - do not do if the state is the initial state or an accepting state + """ + error_states = [] + for state in self.states: + error_state = True + if state.is_accepting or state == self.initial_state: + continue + + state_target = None + # check internal and return transitions + ret_int_al = [] + ret_int_al.extend(self.input_alphabet.internal_alphabet) + ret_int_al.extend(self.input_alphabet.return_alphabet) + for letter in ret_int_al: + for transition in state.transitions[letter]: + if state_target is None: + state_target = transition.target + else: + if state_target != transition.target: + error_state = False + break + if not error_state: + break + + # check call transitions + if error_state: + for return_letter in self.input_alphabet.return_alphabet: + for transition in self.initial_state.transitions[return_letter]: + if transition.stack_guard[0] == state_target.state_id: + if transition.target != state_target: + error_state = False + break + if not error_state: + break + else: + continue + + if error_state: + error_states.append(state.state_id) + + return error_states + + def find_error_states_unfinished(self): + """ + TODO: This is an unfinished idea and might be finished or deleted + + - if the initial state with an empty stack can be reached the state is valid + - if an accepting state with an empty stack can be reached all states inbetween are valid + - store the valid states with the respective stack setting in a dict with a list of valid stack settings + + - new states we reach are getting added to the suspicious list (with the stack setting) + - if a valid state + stack setting is reached we don't care about it anymore (no more words are getting generated) + - if a state leads into another suspicious state we don't care about that state any more as well + - stop if all transitions lead to a non-valid state ? + + criteria to be an error state: + - jumping between the initial state and the supposed error state + - if all target states are the same for internal/return transitions (there are no call transitions anyway) + + """ + valid_states = defaultdict(set) + error_states = defaultdict(set) + valid_states[self.initial_state.state_id].add(self.stack) + self.reset_to_initial() + queue = deque() + for letter in self.input_alphabet.get_merged_alphabet(): + queue.append([letter]) + + while queue: + if len(valid_states) + len(error_states) == len(self.states): + return error_states + word = queue.popleft() + self.reset_to_initial() + self.execute_sequence(self.initial_state, word) + if self.error_state_reached: + continue + + for letter in word: + self.step(letter) + if self.current_state.state_id in valid_states: + if self.stack in valid_states[self.current_state.state_id]: + for letter_ in word: + self.step(letter_) + valid_states[self.current_state.state_id].add(self.stack) + continue + + # if an accepting state or the initial state + empty stack can be reached all states inbetween are valid + if (self.current_state.is_accepting or self.current_state == self.initial_state) and self.stack[-1] == self.empty: + self.reset_to_initial() + for letter in word: + self.step(letter) + valid_states[self.current_state.state_id].add(self.stack) + + for letter in self.input_alphabet.get_merged_alphabet(): + new_word = word + [letter] + queue.append(new_word) + def gen_random_accepting_word_bfs(self, min_word_length: int = 0): """ Create a random word that gets accepted by the automaton with the breadth-first search approach. From ee564f38da9d20b3850d72ee05c9f5c8a8be9972 Mon Sep 17 00:00:00 2001 From: Moritz Pistauer Date: Sat, 4 Nov 2023 21:11:15 +0100 Subject: [PATCH 39/62] Method to delete states --- aalpy/automata/Sevpa.py | 45 ++++++++++++++++++++++++++++++++--------- 1 file changed, 36 insertions(+), 9 deletions(-) diff --git a/aalpy/automata/Sevpa.py b/aalpy/automata/Sevpa.py index 6441cf64..6488ab9f 100644 --- a/aalpy/automata/Sevpa.py +++ b/aalpy/automata/Sevpa.py @@ -312,7 +312,7 @@ def find_error_states(self): if not error_state: break - # check call transitions + # check return transitions from the initial state if error_state: for return_letter in self.input_alphabet.return_alphabet: for transition in self.initial_state.transitions[return_letter]: @@ -330,6 +330,34 @@ def find_error_states(self): return error_states + def delete_state(self, state_id): + state = self.get_state_by_id(state_id) + + if state is not None: + self.states.remove(state) + else: + assert False and f'State {state_id} does not exist' + + for state in self.states: + ret_int_al = [] + ret_int_al.extend(self.input_alphabet.internal_alphabet) + ret_int_al.extend(self.input_alphabet.return_alphabet) + for letter in ret_int_al: + cleaned_transitions = [] + for transition in state.transitions[letter]: + if transition.stack_guard is not None: + if transition.stack_guard[0] == state_id: + continue + if transition.target.state_id == state_id: + continue + + cleaned_transitions.append(transition) + del state.transitions[letter] + state.transitions[letter] = cleaned_transitions + + + + def find_error_states_unfinished(self): """ TODO: This is an unfinished idea and might be finished or deleted @@ -402,14 +430,13 @@ def gen_random_accepting_word_bfs(self, min_word_length: int = 0): while queue: word = queue.popleft() - if len(word) >= min_word_length: - self.reset_to_initial() - self.execute_sequence(self.initial_state, word) - # skipping words that lead into the error state will also shorten growth of the queue - if self.error_state_reached: - continue - if self.current_state.is_accepting and self.stack[-1] == self.empty: - return word + self.reset_to_initial() + self.execute_sequence(self.initial_state, word) + # skipping words that lead into the error state will also shorten growth of the queue + if self.error_state_reached: + continue + if self.current_state.is_accepting and self.stack[-1] == self.empty and len(word) >= min_word_length: + return word shuffled_alphabet = self.input_alphabet.get_merged_alphabet() random.shuffle(shuffled_alphabet) for letter in shuffled_alphabet: From c8d522562991cab970c2931654f0c9e2e48634c7 Mon Sep 17 00:00:00 2001 From: Moritz Pistauer Date: Sun, 5 Nov 2023 13:24:39 +0100 Subject: [PATCH 40/62] to_state_setup for SEVPA --- aalpy/automata/Sevpa.py | 11 +++++------ aalpy/automata/Vpa.py | 1 - 2 files changed, 5 insertions(+), 7 deletions(-) diff --git a/aalpy/automata/Sevpa.py b/aalpy/automata/Sevpa.py index 6488ab9f..69fa6e38 100644 --- a/aalpy/automata/Sevpa.py +++ b/aalpy/automata/Sevpa.py @@ -178,13 +178,12 @@ def execute_sequence(self, origin_state, seq): def to_state_setup(self): state_setup_dict = {} - # ensure prefixes are computed - # self.compute_prefixes() - # TODO sorted_states = sorted(self.states, key=lambda x: len(x.state_id)) - for s in sorted_states: - state_setup_dict[s.state_id] = ( - s.is_accepting, {k: (v.target.state_id, v.action) for k, v in s.transitions.items()}) + for state in sorted_states: + state_setup_dict[state.state_id] = ( + state.is_accepting, {symbol: (trans.target.state_id, trans.action, trans.stack_guard) + for symbol, transitions in state.transitions.items() + for trans in transitions}) return state_setup_dict diff --git a/aalpy/automata/Vpa.py b/aalpy/automata/Vpa.py index 6f1e5944..d543c9a1 100644 --- a/aalpy/automata/Vpa.py +++ b/aalpy/automata/Vpa.py @@ -248,7 +248,6 @@ def from_state_setup(state_setup: dict, init_state_id: str, input_alphabet: VpaA vpa = Vpa(init_state, states, input_alphabet) return vpa - def gen_random_accepting_word(self, return_letter_prob: float = 0.0, call_letter_prob: float = 0.0, early_finish: bool = True): """ From 62aec921293946c60eb03d7cdb2ca5959b669a2e Mon Sep 17 00:00:00 2001 From: Moritz Pistauer Date: Mon, 6 Nov 2023 14:36:35 +0100 Subject: [PATCH 41/62] L1-L15 for SEVPA. Update to_state_setup for SEVPA. --- aalpy/automata/Sevpa.py | 22 +- aalpy/automata/Vpa.py | 2 +- aalpy/utils/BenchmarkSevpaModels.py | 418 ++++++++++++++++++++++++++-- aalpy/utils/BenchmarkVpaModels.py | 2 +- sevpa_eq_checks.py | 79 ++++++ test_main.py | 22 ++ 6 files changed, 514 insertions(+), 31 deletions(-) create mode 100644 sevpa_eq_checks.py diff --git a/aalpy/automata/Sevpa.py b/aalpy/automata/Sevpa.py index 69fa6e38..1ce31f01 100644 --- a/aalpy/automata/Sevpa.py +++ b/aalpy/automata/Sevpa.py @@ -180,10 +180,20 @@ def to_state_setup(self): sorted_states = sorted(self.states, key=lambda x: len(x.state_id)) for state in sorted_states: - state_setup_dict[state.state_id] = ( - state.is_accepting, {symbol: (trans.target.state_id, trans.action, trans.stack_guard) - for symbol, transitions in state.transitions.items() - for trans in transitions}) + transitions_for_symbol = {} + for symbol, trans_list in state.transitions.items(): + trans_list_for_setup = [] + for trans in trans_list: + trans_list_for_setup.append((trans.target.state_id, trans.action, trans.stack_guard)) + if trans_list_for_setup: + transitions_for_symbol[symbol] = trans_list_for_setup + state_setup_dict[state.state_id] = (state.is_accepting, transitions_for_symbol) + + + # for state in sorted_states: + # state_setup_dict[state.state_id] = ( + # state.is_accepting, {symbol: trans_list + # for symbol, trans_list in state.transitions.items()}) return state_setup_dict @@ -192,12 +202,10 @@ def from_state_setup(state_setup: dict, init_state_id, input_alphabet: SevpaAlph # build states with state_id and output states = {key: SevpaState(key, val[0]) for key, val in state_setup.items()} - states[Sevpa.error_state.state_id] = Sevpa.error_state # PdaState(Pda.error_state,False) + # states[Sevpa.error_state.state_id] = Sevpa.error_state # PdaState(Pda.error_state,False) # add transitions to states for state_id, state in states.items(): - if state_id == Sevpa.error_state.state_id: - continue for _input, trans_spec in state_setup[state_id][1].items(): for (target_state_id, action, stack_guard) in trans_spec: if action == 'pop': diff --git a/aalpy/automata/Vpa.py b/aalpy/automata/Vpa.py index d543c9a1..94e97816 100644 --- a/aalpy/automata/Vpa.py +++ b/aalpy/automata/Vpa.py @@ -90,7 +90,7 @@ def __init__(self, initial_state: VpaState, states, input_alphabet: VpaAlphabet) self.states = states self.input_alphabet = input_alphabet self.current_state = None - self.call_balance = 0 + self.call_balance = 0 # TODO: we don't use that self.stack = [] # alphabet sets for faster inclusion checks (as in VpaAlphabet we have lists, for reproducibility) diff --git a/aalpy/utils/BenchmarkSevpaModels.py b/aalpy/utils/BenchmarkSevpaModels.py index 6efe96ad..8b83f1df 100644 --- a/aalpy/utils/BenchmarkSevpaModels.py +++ b/aalpy/utils/BenchmarkSevpaModels.py @@ -1,48 +1,422 @@ from aalpy.automata.Sevpa import Sevpa, SevpaAlphabet -def sevpa_for_L12_refined(): - # Like L12 Language (Balanced parathesis) but the state setup is different +def sevpa_for_L1(): + call_set = ['a'] + return_set = ['b'] + internal_set = [] + input_alphabet = SevpaAlphabet( + internal_alphabet=internal_set, + call_alphabet=call_set, + return_alphabet=return_set + ) + + state_setup = { + 'q0': (False, {'b': [('q1', 'pop', ('q0', 'a'))] + }), + 'q1': (True, {'b': [('q1', 'pop', ('q0', 'a'))] + }) + } + sevpa = Sevpa.from_state_setup(state_setup, "q0", input_alphabet) + return sevpa + + +def sevpa_for_L2(): + call_set = ['a', 'b'] + return_set = ['c', 'd'] + internal_set = [] + + input_alphabet = SevpaAlphabet( + internal_alphabet=internal_set, + call_alphabet=call_set, + return_alphabet=return_set + ) + + state_setup = { + 'q0': (False, {'d': [('q1', 'pop', ('q0', 'a')), ('q1', 'pop', ('q0', 'b'))], + 'c': [('q1', 'pop', ('q0', 'a')), ('q1', 'pop', ('q0', 'b'))] + }), + 'q1': (True, {'d': [('q1', 'pop', ('q0', 'a')), ('q1', 'pop', ('q0', 'b'))], + 'c': [('q1', 'pop', ('q0', 'a')), ('q1', 'pop', ('q0', 'b'))] + }) + } + sevpa = Sevpa.from_state_setup(state_setup, "q0", input_alphabet) + return sevpa + + +def sevpa_for_L3(): + call_set = ['a', 'c', 'b', 'd'] + return_set = ['e', 'g', 'f', 'h'] + internal_set = [] + + input_alphabet = SevpaAlphabet( + internal_alphabet=internal_set, + call_alphabet=call_set, + return_alphabet=return_set + ) + + state_setup = { + 'q0': (False, {'g': [('q6', 'pop', ('q0', 'd')), + ('q4', 'pop', ('q0', 'b'))], + 'e': [('q5', 'pop', ('q0', 'd')), + ('q2', 'pop', ('q0', 'b'))] + }), + 'q1': (True, {'g': [('q6', 'pop', ('q0', 'd')), + ('q4', 'pop', ('q0', 'b'))], + 'e': [('q5', 'pop', ('q0', 'd')), + ('q2', 'pop', ('q0', 'b'))] + }), + 'q2': (False, {'f': [('q1', 'pop', ('q0', 'a'))] + }), + 'q4': (False, {'h': [('q1', 'pop', ('q0', 'a'))] + }), + 'q5': (False, {'f': [('q1', 'pop', ('q0', 'c'))] + }), + 'q6': (False, {'h': [('q1', 'pop', ('q0', 'c'))] + }) + } + sevpa = Sevpa.from_state_setup(state_setup, "q0", input_alphabet) + return sevpa + + +def sevpa_for_L4(): + call_set = ['a', 'b'] + return_set = ['c', 'd'] + internal_set = [] + + input_alphabet = SevpaAlphabet( + internal_alphabet=internal_set, + call_alphabet=call_set, + return_alphabet=return_set + ) + + state_setup = { + 'q0': (False, {'c': [('q2', 'pop', ('q0', 'b'))] + }), + 'q1': (True, {'c': [('q2', 'pop', ('q0', 'b'))] + }), + 'q2': (False, {'d': [('q1', 'pop', ('q0', 'a'))] + }) + } + sevpa = Sevpa.from_state_setup(state_setup, "q0", input_alphabet) + return sevpa + + +def sevpa_for_L5(): + call_set = ['a', 'b', 'c'] + return_set = ['d', 'e', 'f'] + internal_set = [] + + input_alphabet = SevpaAlphabet( + internal_alphabet=internal_set, + call_alphabet=call_set, + return_alphabet=return_set + ) + + state_setup = { + 'q0': (False, {'d': [('q2', 'pop', ('q0', 'c'))] + }), + 'q1': (True, {'d': [('q2', 'pop', ('q0', 'c'))] + }), + 'q2': (False, {'e': [('q3', 'pop', ('q0', 'b'))] + }), + 'q3': (False, {'f': [('q1', 'pop', ('q0', 'a'))] + }) + } + sevpa = Sevpa.from_state_setup(state_setup, "q0", input_alphabet) + return sevpa + + +def sevpa_for_L7(): call_set = ['(', '['] return_set = [')', ']'] - internal_set = ['x'] + internal_set = [] - input_alphabet = SevpaAlphabet(internal_alphabet=internal_set, call_alphabet=call_set, return_alphabet=return_set) + input_alphabet = SevpaAlphabet( + internal_alphabet=internal_set, + call_alphabet=call_set, + return_alphabet=return_set + ) + + state_setup = { + 'q0': (False, {')': [('q1', 'pop', ('q0', '(')), + ('q1', 'pop', ('q1', '('))], + ']': [('q1', 'pop', ('q0', '[')), + ('q1', 'pop', ('q1', '['))] + }), + 'q1': (True, {')': [('q1', 'pop', ('q0', '(')), + ('q1', 'pop', ('q1', '('))], + ']': [('q1', 'pop', ('q0', '[')), + ('q1', 'pop', ('q1', '['))] + }) + + } + sevpa = Sevpa.from_state_setup(state_setup, "q0", input_alphabet) + return sevpa + + +def sevpa_for_L8(): + call_set = ['(', '[', '{'] + return_set = [')', ']', '}'] + internal_set = [] + + input_alphabet = SevpaAlphabet( + internal_alphabet=internal_set, + call_alphabet=call_set, + return_alphabet=return_set + ) state_setup = { - "q0": (False, {")": [("q1", 'pop', ("q0", "("))], - "]": [("q1", 'pop', ("q0", "["))], - "x": [("q1", None, None)] + 'q0': (False, {')': [('q1', 'pop', ('q0', '(')), + ('q1', 'pop', ('q1', '('))], + '}': [('q1', 'pop', ('q0', '{')), + ('q1', 'pop', ('q1', '{'))], + ']': [('q1', 'pop', ('q0', '[')), + ('q1', 'pop', ('q1', '['))] }), - "q1": (True, {")": [("q1", 'pop', ("q0", "("))], - "]": [("q1", 'pop', ("q0", "["))], - "x": [("q0", None, None)] + 'q1': (True, {')': [('q1', 'pop', ('q0', '(')), + ('q1', 'pop', ('q1', '('))], + '}': [('q1', 'pop', ('q0', '{')), + ('q1', 'pop', ('q1', '{'))], + ']': [('q1', 'pop', ('q0', '[')), + ('q1', 'pop', ('q1', '['))] + }) + } + sevpa = Sevpa.from_state_setup(state_setup, "q0", input_alphabet) + return sevpa + + +def sevpa_for_L9(): + call_set = ['(', '[', '{', '<'] + return_set = [')', ']', '}', '>'] + internal_set = [] + + input_alphabet = SevpaAlphabet( + internal_alphabet=internal_set, + call_alphabet=call_set, + return_alphabet=return_set + ) + + state_setup = { + 'q0': (False, {']': [('q1', 'pop', ('q0', '[')), + ('q1', 'pop', ('q1', '['))], + '}': [('q1', 'pop', ('q0', '{')), + ('q1', 'pop', ('q1', '{'))], + ')': [('q1', 'pop', ('q0', '(')), + ('q1', 'pop', ('q1', '('))], + '>': [('q1', 'pop', ('q0', '<')), + ('q1', 'pop', ('q1', '<'))] + }), + 'q1': (True, {']': [('q1', 'pop', ('q0', '[')), + ('q1', 'pop', ('q1', '['))], + '}': [('q1', 'pop', ('q0', '{')), + ('q1', 'pop', ('q1', '{'))], + ')': [('q1', 'pop', ('q0', '(')), + ('q1', 'pop', ('q1', '('))], + '>': [('q1', 'pop', ('q0', '<')), + ('q1', 'pop', ('q1', '<'))] + }) + } + + sevpa = Sevpa.from_state_setup(state_setup, "q0", input_alphabet) + return sevpa + + +def sevpa_for_L10(): + call_set = ['a'] + return_set = ['v'] + internal_set = ['b', 'c', 'd', 'e', 'w', 'x', 'y', 'z'] + + input_alphabet = SevpaAlphabet( + internal_alphabet=internal_set, + call_alphabet=call_set, + return_alphabet=return_set + ) + + state_setup = { + "q0": (False, {"b": [("qb", None, None)], + }), + "qb": (False, {"c": [("qc", None, None)], + }), + "qc": (False, {"d": [("qd", None, None)], + }), + "qd": (False, {"e": [("q1", None, None)], + }), + "q1": (False, {"v": [("qv", 'pop', ('q0', 'a')), + ("qv", 'pop', ('q1', 'a')), + ("qv", 'pop', ('q2', 'a'))] + }), + "qv": (False, {"w": [("qw", None, None)] + }), + "qw": (False, {"x": [("qx", None, None)] + }), + "qx": (False, {"y": [("qy", None, None)] + }), + "qy": (False, {"z": [("q2", None, None)] + }), + "q2": (True, {"v": [("qv", 'pop', ('q0', 'a')), + ("qv", 'pop', ('q1', 'a')), + ("qv", 'pop', ('q2', 'a'))] + }) + } + sevpa = Sevpa.from_state_setup(state_setup, "q0", input_alphabet) + return sevpa + + +def sevpa_for_L11(): + call_set = ['c1', 'c2'] + return_set = ['r1', 'r2'] + internal_set = ['i1', 'i2'] + + input_alphabet = SevpaAlphabet( + internal_alphabet=internal_set, + call_alphabet=call_set, + return_alphabet=return_set + ) + + state_setup = { + 'q0': (False, {'i1': [('q2', None, None)], + 'r1': [('q3', 'pop', ('q0', 'c2')), + ('q3', 'pop', ('q1', 'c2')), + ('q5', 'pop', ('q2', 'c2'))], + 'r2': [('q1', 'pop', ('q0', 'c2')), + ('q1', 'pop', ('q1', 'c2')), + ('q2', 'pop', ('q2', 'c2'))] + }), + 'q1': (True, {'r1': [('q3', 'pop', ('q0', 'c2')), + ('q3', 'pop', ('q1', 'c2')), + ('q5', 'pop', ('q2', 'c2'))], + 'r2': [('q1', 'pop', ('q0', 'c2')), + ('q1', 'pop', ('q1', 'c2')), + ('q2', 'pop', ('q2', 'c2'))] }), + 'q2': (False, {'r1': [('q3', 'pop', ('q0', 'c1')), + ('q3', 'pop', ('q1', 'c1')), + ('q5', 'pop', ('q2', 'c1'))], + 'r2': [('q1', 'pop', ('q0', 'c1')), + ('q1', 'pop', ('q1', 'c1')), + ('q2', 'pop', ('q2', 'c1'))] + }), + 'q3': (False, {'i2': [('q1', None, None)] + }), + 'q5': (False, {'i2': [('q2', None, None)] + }) } - return Sevpa.from_state_setup(state_setup, "q0", input_alphabet) + sevpa = Sevpa.from_state_setup(state_setup, "q0", input_alphabet) + return sevpa -def sevpa_congruence_for_vpa_paper(): - # This is a 1-SEVPA which accepts the language L = c1L1r + c2L2r - # L1 is a regular language which has an even number of a's - # L2 is a regular language which has an even number of b's +def sevpa_for_L12(): call_set = ['(', '['] return_set = [')', ']'] - internal_set = ['x'] + internal_set = [] + + input_alphabet = SevpaAlphabet( + internal_alphabet=internal_set, + call_alphabet=call_set, + return_alphabet=return_set + ) + + state_setup = { + 'q0': (False, {']': [('q1', 'pop', ('q0', '['))], + ')': [('q1', 'pop', ('q0', '('))] + }), + 'q1': (True, {']': [('q1', 'pop', ('q0', '['))], + ')': [('q1', 'pop', ('q0', '('))] + }) + } + sevpa = Sevpa.from_state_setup(state_setup, "q0", input_alphabet) + return sevpa + + +def sevpa_for_L13(): + call_set = ['('] + return_set = [')'] + internal_set = ['a', 'b', 'c'] + + input_alphabet = SevpaAlphabet( + internal_alphabet=internal_set, + call_alphabet=call_set, + return_alphabet=return_set + ) + + state_setup = { + 'q0': (False, {'c': [('q1', None, None)], + 'b': [('q1', None, None)], + 'a': [('q1', None, None)], + ')': [('q1', 'pop', ('q0', '(')), + ('q1', 'pop', ('q1', '('))] + }), + 'q1': (True, {'c': [('q1', None, None)], + 'b': [('q1', None, None)], + 'a': [('q1', None, None)], + ')': [('q1', 'pop', ('q0', '(')), + ('q1', 'pop', ('q1', '('))] + }) + } + sevpa = Sevpa.from_state_setup(state_setup, "q0", input_alphabet) + return sevpa + + +def sevpa_for_L14(): + call_set = ['(', '['] + return_set = [')', ']'] + internal_set = ['a', 'b', 'c'] + + input_alphabet = SevpaAlphabet( + internal_alphabet=internal_set, + call_alphabet=call_set, + return_alphabet=return_set + ) + + state_setup = { + 'q0': (False, {'a': [('q1', None, None)], + 'b': [('q1', None, None)], + 'c': [('q1', None, None)], + ']': [('q1', 'pop', ('q0', '[')), + ('q1', 'pop', ('q1', '['))], + ')': [('q1', 'pop', ('q0', '(')), + ('q1', 'pop', ('q1', '('))] + }), + 'q1': (True, {'a': [('q1', None, None)], + 'b': [('q1', None, None)], + 'c': [('q1', None, None)], + ']': [('q1', 'pop', ('q0', '[')), + ('q1', 'pop', ('q1', '['))], + ')': [('q1', 'pop', ('q0', '(')), + ('q1', 'pop', ('q1', '('))] + }) + } + sevpa = Sevpa.from_state_setup(state_setup, "q0", input_alphabet) + return sevpa + + +def sevpa_for_L15(): + # Dyck order 1 + + call_set = ['('] + return_set = [')'] + internal_set = ['a', 'b', 'c', 'd'] input_alphabet = SevpaAlphabet(internal_alphabet=internal_set, call_alphabet=call_set, return_alphabet=return_set) state_setup = { - "q0": (False, {")": [("q1", 'pop', ("q0", "("))], - "]": [("q1", 'pop', ("q0", "["))], - "x": [("q1", None, None)] + 'q0': (False, {'d': [('q1', None, None)], + 'a': [('q2', None, None)], + ')': [('q1', 'pop', ('q0', '(')), + ('q1', 'pop', ('q1', '('))] }), - "q1": (True, {")": [("q1", 'pop', ("q0", "("))], - "]": [("q1", 'pop', ("q0", "["))], - "x": [("q0", None, None)] + 'q1': (True, {'d': [('q1', None, None)], + 'a': [('q2', None, None)], + ')': [('q1', 'pop', ('q0', '(')), + ('q1', 'pop', ('q1', '('))] }), + 'q2': (False, {'b': [('q3', None, None)] + }), + 'q3': (False, {'c': [('q1', None, None)] + }) } sevpa = Sevpa.from_state_setup(state_setup, "q0", input_alphabet) return sevpa diff --git a/aalpy/utils/BenchmarkVpaModels.py b/aalpy/utils/BenchmarkVpaModels.py index 90163c4f..ed51c615 100644 --- a/aalpy/utils/BenchmarkVpaModels.py +++ b/aalpy/utils/BenchmarkVpaModels.py @@ -186,7 +186,7 @@ def vpa_for_L9(): ")": [("q1", 'pop', "(")], "]": [("q1", 'pop', "[")], "}": [("q1", 'pop', "{")], - ">": [("q1", 'pop', ">")], + ">": [("q1", 'pop', "<")], }), } vpa = Vpa.from_state_setup(state_setup, "q0", input_alphabet) diff --git a/sevpa_eq_checks.py b/sevpa_eq_checks.py new file mode 100644 index 00000000..59473d0d --- /dev/null +++ b/sevpa_eq_checks.py @@ -0,0 +1,79 @@ +from aalpy.SULs.AutomataSUL import VpaSUL, SevpaSUL +import random +import aalpy.utils.BenchmarkSevpaModels as SEVPAs +import aalpy.utils.BenchmarkVpaModels as VPAs + +amount_languages = 15 + +missing_languages = {6} + +sevpa_suls = [] +vpa_suls = [] +alphabets = [] + +for l in range(1, amount_languages+1): + if l in missing_languages: + sevpa_suls.append(None) + vpa_suls.append(None) + alphabets.append(None) + continue + + language_sevpa = f'sevpa_for_L{l}' + language_vpa = f'vpa_for_L{l}' + + # Get SEVPAs + if hasattr(SEVPAs, language_sevpa): + sevpa = getattr(SEVPAs, language_sevpa)() + else: + print(f"Function {language_sevpa} not found") + continue + sevpa_input_alphabet = sevpa.get_input_alphabet() + sevpa_sul = SevpaSUL(sevpa, include_top=False, check_balance=False) + sevpa_suls.append(sevpa_sul) + alphabets.append(sevpa_input_alphabet) + + # Get VPA + if hasattr(VPAs, language_vpa): + vpa = getattr(VPAs, language_vpa)() + else: + print(f"Function {language_vpa} not found") + continue + vpa_input_alphabet = vpa.input_alphabet.get_merged_alphabet + vpa_sul = VpaSUL(vpa, include_top=False, check_balance=False) + vpa_suls.append(vpa_sul) + +for l in range(0, amount_languages): + language_index = l+1 + print(f'Checking Language L{language_index}') + if language_index in missing_languages: + print(f'Skipping L{language_index}') + continue + tests_passed = True + for i in range(0, 100000): + word_length = random.randint(1, 100) + word = [] + for j in range(0, word_length): + word.append(random.choice(alphabets[l])) + + pda_out = sevpa_suls[l].query(tuple(word)) + vpa_out = vpa_suls[l].query(tuple(word)) + + if pda_out == vpa_out: + continue + else: + print(f'Language L{language_index} failed on following test:') + print(f'Input: {word}') + print(f'Pda out: {pda_out} \nVpa out: {vpa_out}') + tests_passed = False + break + + if tests_passed: + print(f'Language L{language_index} passed') + else: + print(f'Language L{language_index} failed') + + + + + + diff --git a/test_main.py b/test_main.py index c7ac0ef8..ac17127d 100644 --- a/test_main.py +++ b/test_main.py @@ -120,6 +120,28 @@ def test_random_word_gen(): print(f'All tests passed average word length: {total_len/100}') +def visual_test_to_state_setup_sevpa(): + vpa = vpa_for_L11() + + alphabet = SevpaAlphabet(list(vpa.internal_set), + list(vpa.call_set), + list(vpa.return_set)) + sul = VpaSUL(vpa, include_top=False, check_balance=False) + eq_oracle = RandomWordEqOracle(alphabet=alphabet.get_merged_alphabet(), sul=sul, num_walks=100000) + model = run_KV(alphabet=alphabet, sul=sul, eq_oracle=eq_oracle, automaton_type='vpa', + print_level=2, cex_processing='rs') + + error_states = model.find_error_states() + if error_states is not None: + model.delete_state(error_states[0]) + + # for visual comparison + model.visualize() + state_setup_dict = model.to_state_setup() + model_from_setup = Sevpa.from_state_setup(state_setup_dict, "q0", alphabet) + visualize_automaton(model_from_setup, 'ModelFromSetup') + + # test_arithmetic_expression() # import cProfile # pr = cProfile.Profile() From c71df5b76edd09568c2308d2de87c21dce5d53da Mon Sep 17 00:00:00 2001 From: Moritz Pistauer Date: Mon, 6 Nov 2023 19:25:17 +0100 Subject: [PATCH 42/62] Exponential Backward Cex Processing + test function for all cex processing strategies --- .../deterministic/CounterExampleProcessing.py | 39 ++++++++++---- test_main.py | 51 +++++++++++++++++++ 2 files changed, 79 insertions(+), 11 deletions(-) diff --git a/aalpy/learning_algs/deterministic/CounterExampleProcessing.py b/aalpy/learning_algs/deterministic/CounterExampleProcessing.py index b3024daf..b4250f1c 100644 --- a/aalpy/learning_algs/deterministic/CounterExampleProcessing.py +++ b/aalpy/learning_algs/deterministic/CounterExampleProcessing.py @@ -163,16 +163,25 @@ def exponential_cex_processing(sul: SUL, cex: tuple, hypothesis, suffix_closedne direction='fwd', is_vpa=False): assert direction in {'fwd', 'bwd'} - direction = 'fwd' - cex_out = sul.query(cex) - bp = 1 - bp_recent = 0 + bwd_subtrahend = 1 + if direction == 'fwd': + bp_recent = 0 + bp = 1 + else: + bp_recent = len(cex) + bp = len(cex)-1 while True: - if bp > len(cex): - bp = len(cex) - break + if direction == 'fwd': + if bp > len(cex): + bp = len(cex) + break + else: + if bp < 1: + bp = 1 + break + prefix = cex[:bp] suffix = cex[bp:] assert cex == prefix + suffix @@ -187,16 +196,24 @@ def exponential_cex_processing(sul: SUL, cex: tuple, hypothesis, suffix_closedne sul_out = sul.query(s_bracket + suffix) - if sul_out[-1] != cex_out[-1]: + if sul_out[-1] != cex_out[-1] and direction == 'fwd': + break + elif sul_out[-1] == cex_out[-1] and direction == 'bwd': break bp_recent = bp - bp *= 2 + if direction == 'fwd': + bp *= 2 + else: + bp -= bwd_subtrahend + bwd_subtrahend *= 2 if (bp - bp_recent) == 1: return [suffix] else: - return rs_cex_processing(sul, cex, hypothesis, suffix_closedness, closedness, is_vpa, lower=bp_recent) - + if direction == 'fwd': + return rs_cex_processing(sul, cex, hypothesis, suffix_closedness, closedness, is_vpa, lower=bp_recent) + else: + return rs_cex_processing(sul, cex, hypothesis, suffix_closedness, closedness, is_vpa, upper=bp_recent) diff --git a/test_main.py b/test_main.py index ac17127d..f67612ed 100644 --- a/test_main.py +++ b/test_main.py @@ -1,4 +1,5 @@ import ast +import random from Examples import learning_context_free_grammar_example from aalpy.SULs.AutomataSUL import SevpaSUL, VpaSUL @@ -142,6 +143,56 @@ def visual_test_to_state_setup_sevpa(): visualize_automaton(model_from_setup, 'ModelFromSetup') +def test_cex_processing_strategies_vpa(): + + cex_processing_strategies = ['linear_fwd', 'linear_bwd', 'exponential_fwd', 'exponential_bwd', 'rs'] + + for i, vpa in enumerate( + [vpa_for_L1(), vpa_for_L2(), vpa_for_L3(), vpa_for_L4(), vpa_for_L5(), vpa_for_L7(), vpa_for_L8(), + vpa_for_L9(), vpa_for_L10(), vpa_for_L11(), vpa_for_L12(), vpa_for_L13(), vpa_for_L14(), vpa_for_L15()]): + + print(f'VPA {i + 1 if i < 6 else i + 2}') + + model_under_learning = vpa + + alphabet = SevpaAlphabet(list(model_under_learning.internal_set), + list(model_under_learning.call_set), + list(model_under_learning.return_set)) + + + for cex_processing in cex_processing_strategies: + sul = VpaSUL(model_under_learning, include_top=False, check_balance=False) + eq_oracle = RandomWordEqOracle(alphabet=alphabet.get_merged_alphabet(), sul=sul, num_walks=100000) + model = run_KV(alphabet=alphabet, sul=sul, eq_oracle=eq_oracle, automaton_type='vpa', + print_level=1, cex_processing=cex_processing) + + error_states = model.find_error_states() + if error_states: + model.delete_state(error_states[0]) + sul_learned_model = SevpaSUL(model, include_top=False, check_balance=False) + + print(f'Checking {cex_processing}') + for i in range(0, 500000): + word_length = random.randint(1, 100) + word = [] + for j in range(0, word_length): + word.append(random.choice(alphabet.get_merged_alphabet())) + + vpa_out = sul.query(tuple(word)) + learned_model_out = sul_learned_model.query(tuple(word)) + + if vpa_out == learned_model_out: + continue + else: + print(f'{cex_processing} failed on following test:') + print(f'Input: {word}') + print(f'Vpa out: {vpa_out} \nLearned vpa out: {learned_model_out}') + break + + +test_cex_processing_strategies_vpa() +exit() + # test_arithmetic_expression() # import cProfile # pr = cProfile.Profile() From fa5907d5de3560f8cc50a1052cfaf7b0cb5d9473 Mon Sep 17 00:00:00 2001 From: Moritz Pistauer Date: Mon, 6 Nov 2023 19:52:39 +0100 Subject: [PATCH 43/62] Adapt gen_random_accepting_word_bfs for SEVPA --- aalpy/automata/Sevpa.py | 7 +++++-- test_main.py | 10 +++++++++- 2 files changed, 14 insertions(+), 3 deletions(-) diff --git a/aalpy/automata/Sevpa.py b/aalpy/automata/Sevpa.py index 1ce31f01..59ecfe07 100644 --- a/aalpy/automata/Sevpa.py +++ b/aalpy/automata/Sevpa.py @@ -420,7 +420,7 @@ def find_error_states_unfinished(self): new_word = word + [letter] queue.append(new_word) - def gen_random_accepting_word_bfs(self, min_word_length: int = 0): + def gen_random_accepting_word_bfs(self, min_word_length: int = 0, amount_words: int = 1): """ Create a random word that gets accepted by the automaton with the breadth-first search approach. @@ -435,6 +435,7 @@ def gen_random_accepting_word_bfs(self, min_word_length: int = 0): for letter in shuffled_alphabet: queue.append([letter]) + found_words = set() while queue: word = queue.popleft() self.reset_to_initial() @@ -443,7 +444,9 @@ def gen_random_accepting_word_bfs(self, min_word_length: int = 0): if self.error_state_reached: continue if self.current_state.is_accepting and self.stack[-1] == self.empty and len(word) >= min_word_length: - return word + found_words.add(tuple(word)) + if len(found_words) >= amount_words: + return found_words shuffled_alphabet = self.input_alphabet.get_merged_alphabet() random.shuffle(shuffled_alphabet) for letter in shuffled_alphabet: diff --git a/test_main.py b/test_main.py index f67612ed..e266ad96 100644 --- a/test_main.py +++ b/test_main.py @@ -90,6 +90,14 @@ def test_random_word_gen(): sul_model = SevpaSUL(model, include_top=False, check_balance=False) + random_word_list = model.gen_random_accepting_word_bfs(min_word_length=3, amount_words=10) + for random_word in random_word_list: + out_model = sul_model.query(random_word)[-1] + out_sul = sul.query(random_word)[-1] + assert out_model == out_sul and out_model + + print(f'All tests passed for gen_random_accepting_word_bfs') + total_len = 0 for i in range(0, 100): random_word = model_under_learning.gen_random_accepting_word(return_letter_prob=0.5) @@ -190,7 +198,7 @@ def test_cex_processing_strategies_vpa(): break -test_cex_processing_strategies_vpa() +test_random_word_gen() exit() # test_arithmetic_expression() From 4064d40864782e9661448a34d7228801d75a1f07 Mon Sep 17 00:00:00 2001 From: Edi Muskardin Date: Tue, 7 Nov 2023 10:32:06 +0100 Subject: [PATCH 44/62] add exponential to Lstar --- aalpy/learning_algs/deterministic/LStar.py | 13 +++++++++---- det_cex_processing_test.py | 6 +++--- test_main.py | 16 +++++++++------- 3 files changed, 21 insertions(+), 14 deletions(-) diff --git a/aalpy/learning_algs/deterministic/LStar.py b/aalpy/learning_algs/deterministic/LStar.py index 3861a796..17fda72c 100644 --- a/aalpy/learning_algs/deterministic/LStar.py +++ b/aalpy/learning_algs/deterministic/LStar.py @@ -3,11 +3,12 @@ from aalpy.base import Oracle, SUL from aalpy.utils.HelperFunctions import extend_set, print_learning_info, print_observation_table, all_prefixes from .CounterExampleProcessing import longest_prefix_cex_processing, rs_cex_processing, \ - counterexample_successfully_processed, linear_cex_processing + counterexample_successfully_processed, linear_cex_processing, exponential_cex_processing from .ObservationTable import ObservationTable from ...base.SUL import CacheSUL -counterexample_processing_strategy = [None, 'rs', 'longest_prefix', 'linear_fwd', 'linear_bwd'] +counterexample_processing_strategy = [None, 'rs', 'longest_prefix', 'linear_fwd', 'linear_bwd', 'exponential_fwd', + 'exponential_bwd'] closedness_options = ['suffix_all', 'suffix_single'] print_options = [0, 1, 2, 3] @@ -151,8 +152,12 @@ def run_Lstar(alphabet: list, sul: SUL, eq_oracle: Oracle, automaton_type, sampl cex_suffixes = rs_cex_processing(sul, cex, hypothesis, e_set_suffix_closed, closedness='suffix') else: direction = cex_processing[-3:] - cex_suffixes = linear_cex_processing(sul, cex, hypothesis, e_set_suffix_closed, - direction=direction, closedness='suffix') + if 'linear' in cex_processing: + cex_suffixes = linear_cex_processing(sul, cex, hypothesis, e_set_suffix_closed, + direction=direction, closedness='suffix') + else: + cex_suffixes = exponential_cex_processing(sul, cex, hypothesis, e_set_suffix_closed, + direction=direction, closedness='suffix') added_suffixes = extend_set(observation_table.E, cex_suffixes) observation_table.update_obs_table(e_set=added_suffixes) diff --git a/det_cex_processing_test.py b/det_cex_processing_test.py index 0059c0ff..957908e7 100644 --- a/det_cex_processing_test.py +++ b/det_cex_processing_test.py @@ -3,8 +3,8 @@ from aalpy.oracles import RandomWMethodEqOracle from aalpy.learning_algs import run_KV, run_Lstar -for x in ['linear_fwd', 'linear_bwd']: - for i in range(100): +for x in ['exponential_fwd', 'exponential_bwd']: + for i in range(5): print(i) model_type = 'mealy' # or 'moore', 'dfa' @@ -18,5 +18,5 @@ # select any of the oracles eq_oracle = RandomWMethodEqOracle(input_alphabet, sul, walks_per_state=10, walk_len=20) - learned_model = run_Lstar(input_alphabet, sul, eq_oracle, model_type, cex_processing=x, print_level=0) + learned_model = run_KV(input_alphabet, sul, eq_oracle, model_type, cex_processing=x, print_level=0) assert bisimilar(random_model, learned_model) diff --git a/test_main.py b/test_main.py index e266ad96..f4a10eb6 100644 --- a/test_main.py +++ b/test_main.py @@ -106,7 +106,7 @@ def test_random_word_gen(): out_sul = sul.query(random_word)[-1] assert out_model == out_sul and out_model - print(f'All tests passed average word length: {total_len/100}') + print(f'All tests passed average word length: {total_len / 100}') total_len = 0 for i in range(0, 100): @@ -116,7 +116,7 @@ def test_random_word_gen(): out_sul = sul.query(random_word)[-1] assert out_model == out_sul and out_model - print(f'All tests passed average word length: {total_len/100}') + print(f'All tests passed average word length: {total_len / 100}') total_len = 0 for i in range(0, 100): @@ -126,7 +126,7 @@ def test_random_word_gen(): out_sul = sul.query(random_word)[-1] assert out_model == out_sul and out_model - print(f'All tests passed average word length: {total_len/100}') + print(f'All tests passed average word length: {total_len / 100}') def visual_test_to_state_setup_sevpa(): @@ -152,7 +152,6 @@ def visual_test_to_state_setup_sevpa(): def test_cex_processing_strategies_vpa(): - cex_processing_strategies = ['linear_fwd', 'linear_bwd', 'exponential_fwd', 'exponential_bwd', 'rs'] for i, vpa in enumerate( @@ -167,7 +166,6 @@ def test_cex_processing_strategies_vpa(): list(model_under_learning.call_set), list(model_under_learning.return_set)) - for cex_processing in cex_processing_strategies: sul = VpaSUL(model_under_learning, include_top=False, check_balance=False) eq_oracle = RandomWordEqOracle(alphabet=alphabet.get_merged_alphabet(), sul=sul, num_walks=100000) @@ -226,7 +224,7 @@ def test_cex_processing_strategies_vpa(): list(model_under_learning.call_set), list(model_under_learning.return_set)) - #if i == 9: + # if i == 9: # alphabet.exclusive_call_return_pairs = {'(': ')', '[': ']', '{': '}', '<': '>'} sul = VpaSUL(model_under_learning, include_top=False, check_balance=False) @@ -236,4 +234,8 @@ def test_cex_processing_strategies_vpa(): model = run_KV(alphabet=alphabet, sul=sul, eq_oracle=eq_oracle, automaton_type='vpa', print_level=2, cex_processing='linear_bwd') - # exit() + e = model.gen_random_accepting_word_bfs(min_word_length=5, amount_words=100) + print(e[:25]) + print('generate random acc') + for _ in range(10): + print(model.gen_random_accepting_word()) From 8e34aaed14e3c1c634ca205f0d5c6f841b947cc0 Mon Sep 17 00:00:00 2001 From: Edi Muskardin Date: Tue, 7 Nov 2023 10:52:41 +0100 Subject: [PATCH 45/62] add thesis structure --- test_main.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/test_main.py b/test_main.py index f4a10eb6..a71326f0 100644 --- a/test_main.py +++ b/test_main.py @@ -21,6 +21,24 @@ # 4. Implement and test to_state_setup, test saving and loading to/from file # 5. Create an active interface to learn a grammar of some language, like simplified C or Java +# Thesis +# 1. Intro +# 2. Preliminaries (very important) +# 2.1 CFG, context pairs, well matched words +# 2.2 What are SEVPA and why we use those instead of VPAs +# 2.3 Example SEVPA and how to read/interpret it (Important on a small example) +# 2.4 Automata Learning and KV +# ... +# 3. KV for CFG inference (intuition behind everything and how it fits with preliminaries) +# 3.1 Explain alg in detail, like Maxi +# 3.2 Explain CEX processing/transform access string, also on example and intuition +# 3.3 Important: Run of the algorithm, visualize classification tree... +# 4. Evaluation +# - number of steps/queries for models of growing alphabet, state size, ... +# - on 15 languages +# - on random languages +# - on something cool + def test_arithmetic_expression(): import warnings warnings.filterwarnings("ignore") From 8841ed3c6cb81e3f64c28e6d7af5f56a1c68c89a Mon Sep 17 00:00:00 2001 From: Edi Muskardin Date: Tue, 7 Nov 2023 14:37:58 +0100 Subject: [PATCH 46/62] fix find_error_state --- aalpy/automata/Sevpa.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/aalpy/automata/Sevpa.py b/aalpy/automata/Sevpa.py index 59ecfe07..9bf19e7e 100644 --- a/aalpy/automata/Sevpa.py +++ b/aalpy/automata/Sevpa.py @@ -335,7 +335,7 @@ def find_error_states(self): if error_state: error_states.append(state.state_id) - return error_states + return error_states def delete_state(self, state_id): state = self.get_state_by_id(state_id) From 5206213998e784963988033d0b67f2fefb8db414 Mon Sep 17 00:00:00 2001 From: Edi Muskardin Date: Tue, 7 Nov 2023 14:45:15 +0100 Subject: [PATCH 47/62] add initial removal of error state in learning --- .../deterministic/ClassificationTree.py | 14 +++++++++++++- test_main.py | 10 +--------- 2 files changed, 14 insertions(+), 10 deletions(-) diff --git a/aalpy/learning_algs/deterministic/ClassificationTree.py b/aalpy/learning_algs/deterministic/ClassificationTree.py index 61a1bd2f..8fb4ed17 100644 --- a/aalpy/learning_algs/deterministic/ClassificationTree.py +++ b/aalpy/learning_algs/deterministic/ClassificationTree.py @@ -69,6 +69,9 @@ def __init__(self, alphabet: Union[list, SevpaAlphabet], sul: SUL, automaton_typ self.sifting_cache = {} + # prefix of identified error state in VPDA learning + self.error_state_prefix = None + if self.automaton_type != 'mealy': initial_output = sul.query(())[-1] cex_output = sul.query(cex)[-1] @@ -225,6 +228,8 @@ def gen_hypothesis(self): continue for other_state in states_for_transitions: + if other_state.prefix == self.error_state_prefix: # TODO WIP + continue transition_target_node = self._sift( other_state.prefix + (call_letter,) + state.prefix + (return_letter,)) transition_target_access_string = transition_target_node.access_string @@ -235,7 +240,14 @@ def gen_hypothesis(self): state.transitions[return_letter].append(trans) if self.automaton_type == 'vpa': - return Sevpa(initial_state=initial_state, states=list(states.values()), input_alphabet=self.alphabet) + hypothesis = Sevpa(initial_state=initial_state, states=list(states.values()), input_alphabet=self.alphabet) + # WIP + error_states = hypothesis.find_error_states() + if error_states: + self.error_state_prefix = next((state.prefix for state in hypothesis.states + if state.state_id == error_states[0]), None) + assert len(error_states) <= 1 + return hypothesis return automaton_class[self.automaton_type](initial_state=initial_state, states=list(states.values())) diff --git a/test_main.py b/test_main.py index a71326f0..964beec6 100644 --- a/test_main.py +++ b/test_main.py @@ -34,7 +34,7 @@ # 3.2 Explain CEX processing/transform access string, also on example and intuition # 3.3 Important: Run of the algorithm, visualize classification tree... # 4. Evaluation -# - number of steps/queries for models of growing alphabet, state size, ... +# - number of steps/queries for models of growing alphabet, state size, ...] # - on 15 languages # - on random languages # - on something cool @@ -214,9 +214,6 @@ def test_cex_processing_strategies_vpa(): break -test_random_word_gen() -exit() - # test_arithmetic_expression() # import cProfile # pr = cProfile.Profile() @@ -252,8 +249,3 @@ def test_cex_processing_strategies_vpa(): model = run_KV(alphabet=alphabet, sul=sul, eq_oracle=eq_oracle, automaton_type='vpa', print_level=2, cex_processing='linear_bwd') - e = model.gen_random_accepting_word_bfs(min_word_length=5, amount_words=100) - print(e[:25]) - print('generate random acc') - for _ in range(10): - print(model.gen_random_accepting_word()) From a35d24fe097ce70e3cce6073dd0c43760bf10728 Mon Sep 17 00:00:00 2001 From: Moritz Pistauer Date: Thu, 23 Nov 2023 12:34:09 +0100 Subject: [PATCH 48/62] New random_word_gen for SEVPA --- aalpy/automata/Sevpa.py | 202 ++++++++++++++++++++++++++++------------ 1 file changed, 140 insertions(+), 62 deletions(-) diff --git a/aalpy/automata/Sevpa.py b/aalpy/automata/Sevpa.py index 9bf19e7e..027dfffd 100644 --- a/aalpy/automata/Sevpa.py +++ b/aalpy/automata/Sevpa.py @@ -362,71 +362,16 @@ def delete_state(self, state_id): del state.transitions[letter] state.transitions[letter] = cleaned_transitions - - - - def find_error_states_unfinished(self): + def gen_random_accepting_word_bfs(self, min_word_length: int = 0, amount_words: int = 1) -> set: """ - TODO: This is an unfinished idea and might be finished or deleted - - - if the initial state with an empty stack can be reached the state is valid - - if an accepting state with an empty stack can be reached all states inbetween are valid - - store the valid states with the respective stack setting in a dict with a list of valid stack settings - - - new states we reach are getting added to the suspicious list (with the stack setting) - - if a valid state + stack setting is reached we don't care about it anymore (no more words are getting generated) - - if a state leads into another suspicious state we don't care about that state any more as well - - stop if all transitions lead to a non-valid state ? - - criteria to be an error state: - - jumping between the initial state and the supposed error state - - if all target states are the same for internal/return transitions (there are no call transitions anyway) - - """ - valid_states = defaultdict(set) - error_states = defaultdict(set) - valid_states[self.initial_state.state_id].add(self.stack) - self.reset_to_initial() - queue = deque() - for letter in self.input_alphabet.get_merged_alphabet(): - queue.append([letter]) - - while queue: - if len(valid_states) + len(error_states) == len(self.states): - return error_states - word = queue.popleft() - self.reset_to_initial() - self.execute_sequence(self.initial_state, word) - if self.error_state_reached: - continue - - for letter in word: - self.step(letter) - if self.current_state.state_id in valid_states: - if self.stack in valid_states[self.current_state.state_id]: - for letter_ in word: - self.step(letter_) - valid_states[self.current_state.state_id].add(self.stack) - continue - - # if an accepting state or the initial state + empty stack can be reached all states inbetween are valid - if (self.current_state.is_accepting or self.current_state == self.initial_state) and self.stack[-1] == self.empty: - self.reset_to_initial() - for letter in word: - self.step(letter) - valid_states[self.current_state.state_id].add(self.stack) - - for letter in self.input_alphabet.get_merged_alphabet(): - new_word = word + [letter] - queue.append(new_word) - - def gen_random_accepting_word_bfs(self, min_word_length: int = 0, amount_words: int = 1): - """ - Create a random word that gets accepted by the automaton with the breadth-first search approach. + Generate a list of random words that are accepted by the automaton using the breadth-first search approach. Args: + - min_word_length (int): Minimum length of the generated words. + - amount_words (int): Number of words to generate. Returns: + - set: A set of randomly generated words that are accepted by the automaton. """ self.reset_to_initial() queue = deque() @@ -453,8 +398,132 @@ def gen_random_accepting_word_bfs(self, min_word_length: int = 0, amount_words: new_word = word + [letter] queue.append(new_word) - def gen_random_accepting_word(self, return_letter_prob: float = 0.0, call_letter_prob: float = 0.0, - early_finish: bool = True): + def get_allowed_call_transitions(self): + """ + Returns a dict of states that are allowed to push a call letters on the stack. + + For all states that are connected via internal transitions from the initial state on, the state_id and + call_letter of the stack_guard from every return transition is used. + + States are not allowed to push something somthing on the stack if there is no possibility to pop the + stack guard, where their state_id is used, from the stack, which would lead into a dead-end otherwise. + + Returns: + - dict: A dictionary where keys are the state_id and values are sets of the call_letters. + """ + + # get all states that are connected via internal transitions by using BFS + connected_states = set() + queue = deque([self.initial_state]) + while queue: + current_state = queue.popleft() + connected_states.add(current_state) + + for internal_letter in self.input_alphabet.internal_alphabet: + for internal_trans in current_state.transitions[internal_letter]: + target_state = internal_trans.target + if target_state not in connected_states: + queue.append(target_state) + + allowed_call_transitions = defaultdict(set) + for state in connected_states: + for return_letter in self.input_alphabet.return_alphabet: + for trans in state.transitions[return_letter]: + allowed_call_transitions[trans.stack_guard[1]].add(trans.stack_guard[0]) + + return allowed_call_transitions + + def gen_random_accepting_word(self, return_letter_prob: float = 0.5, min_length: int = 0) -> list: + """ + Generate a random word that is accepted by the automaton. + + Only internal letters and return letters will be chosen. If a return letter is randomly chosen a random + stack guard will be selected. Then the stack needed stack configuration will bne searched by using BFS + + Args: + - return_letter_prob (float): Probability for selecting a letter from the return alphabet. + - min_length (int): Minimum length of the generated word. + + Returns: + - list: A randomly generated word that gets accepted by the automaton. + """ + assert return_letter_prob <= 1.0 + word = [] + + internal_letter_prob = 0.0 + if len(self.input_alphabet.internal_alphabet) != 0: + internal_letter_prob = 1.0 - return_letter_prob + + assert (return_letter_prob + internal_letter_prob) == 1.0 + + return_letter_boarder = return_letter_prob + internal_letter_boarder = return_letter_boarder + internal_letter_prob + + self.reset_to_initial() + + while True: + letter_type = random.uniform(0.0, 1.0) + is_return_letter = False + if letter_type <= return_letter_boarder: + possible_letters = self.input_alphabet.return_alphabet + is_return_letter = True + elif return_letter_boarder < letter_type <= internal_letter_boarder: + possible_letters = self.input_alphabet.internal_alphabet + else: + assert False + + assert len(possible_letters) > 0 + + random_trans_letter_index = random.randint(0, len(possible_letters) - 1) + letter = possible_letters[random_trans_letter_index] + + # find the sub-word so the right stack guard + if is_return_letter: + # randomly select one of the return transitions with the respective return symbol + random_stack_guard_index = random.randint(0, len(self.current_state.transitions[letter]) - 1) + random_stack_guard = self.current_state.transitions[letter][random_stack_guard_index].stack_guard + + sub_word = [] + needed_stack = self.stack.copy() + needed_stack.append(random_stack_guard) + queue = deque() + for letter in self.input_alphabet.get_merged_alphabet(): + queue.append([letter]) + + while queue: + sub_word = queue.popleft() + self.reset_to_initial() + self.execute_sequence(self.initial_state, word + sub_word) + if self.error_state_reached: + continue + if self.stack == needed_stack: + break + + for letter in self.input_alphabet.get_merged_alphabet(): + new_word = sub_word + [letter] + queue.append(new_word) + + self.step(letter) + if not self.error_state_reached: + sub_word.append(letter) + word = word + sub_word + else: + self.execute_sequence(self.initial_state, word) + + else: + self.step(letter) + if not self.error_state_reached: + word.append(letter) + else: + self.execute_sequence(self.initial_state, word) + + if self.current_state.is_accepting and self.stack[-1] == self.empty and len(word) >= min_length: + break + + return word + + def gen_random_accepting_word_2(self, return_letter_prob: float = 0.0, call_letter_prob: float = 0.0, + early_finish: bool = True): """ Create a random word that gets accepted by the automaton. @@ -491,10 +560,13 @@ def gen_random_accepting_word(self, return_letter_prob: float = 0.0, call_letter internal_letter_boarder = return_letter_boarder + internal_letter_prob self.reset_to_initial() + allowed_call_transitions = self.get_allowed_call_transitions() while True: letter_type = random.uniform(0.0, 1.0) + is_call_letter = False if 0.0 <= letter_type <= call_letter_boarder: possible_letters = self.input_alphabet.call_alphabet + is_call_letter = True elif call_letter_boarder < letter_type <= return_letter_boarder: # skip return letters if stack is empty or if the word is empty if self.stack[-1] == self.empty or word == []: @@ -518,6 +590,12 @@ def gen_random_accepting_word(self, return_letter_prob: float = 0.0, call_letter if letter == '': random_trans_letter_index = random.randint(0, len(possible_letters) - 1) letter = possible_letters[random_trans_letter_index] + + # check if it is allowed to make push the selected letter on the stack from the current position + if is_call_letter: + if self.current_state.state_id not in allowed_call_transitions[letter]: + continue + self.step(letter) if not self.error_state_reached: word.append(letter) From be5169b867dd245596a8409feaba3504501411cc Mon Sep 17 00:00:00 2001 From: Moritz Pistauer Date: Thu, 23 Nov 2023 17:07:27 +0100 Subject: [PATCH 49/62] Bug Fix + Performance improvement --- aalpy/automata/Sevpa.py | 203 ++++++++++++++-------------------------- test_main.py | 12 +++ 2 files changed, 82 insertions(+), 133 deletions(-) diff --git a/aalpy/automata/Sevpa.py b/aalpy/automata/Sevpa.py index 027dfffd..20e4b08f 100644 --- a/aalpy/automata/Sevpa.py +++ b/aalpy/automata/Sevpa.py @@ -362,6 +362,41 @@ def delete_state(self, state_id): del state.transitions[letter] state.transitions[letter] = cleaned_transitions + def get_allowed_call_transitions(self): + """ + Returns a dict of states that are allowed to push a call letters on the stack. + + For all states that are connected via internal transitions from the initial state on, the state_id and + call_letter of the stack_guard from every return transition is used. + + States are not allowed to push something somthing on the stack if there is no possibility to pop the + stack guard, where their state_id is used, from the stack, which would lead into a dead-end otherwise. + + Returns: + - dict: A dictionary where keys are the call_letters and values are sets of the allowed states. + """ + + # get all states that are connected via internal transitions by using BFS + connected_states = set() + queue = deque([self.initial_state]) + while queue: + current_state = queue.popleft() + connected_states.add(current_state) + + for internal_letter in self.input_alphabet.internal_alphabet: + for internal_trans in current_state.transitions[internal_letter]: + target_state = internal_trans.target + if target_state not in connected_states: + queue.append(target_state) + + allowed_call_transitions = defaultdict(set) + for state in connected_states: + for return_letter in self.input_alphabet.return_alphabet: + for trans in state.transitions[return_letter]: + allowed_call_transitions[trans.stack_guard[1]].add(trans.stack_guard[0]) + + return allowed_call_transitions + def gen_random_accepting_word_bfs(self, min_word_length: int = 0, amount_words: int = 1) -> set: """ Generate a list of random words that are accepted by the automaton using the breadth-first search approach. @@ -373,6 +408,7 @@ def gen_random_accepting_word_bfs(self, min_word_length: int = 0, amount_words: Returns: - set: A set of randomly generated words that are accepted by the automaton. """ + allowed_call_trans = self.get_allowed_call_transitions() self.reset_to_initial() queue = deque() shuffled_alphabet = self.input_alphabet.get_merged_alphabet() @@ -395,44 +431,13 @@ def gen_random_accepting_word_bfs(self, min_word_length: int = 0, amount_words: shuffled_alphabet = self.input_alphabet.get_merged_alphabet() random.shuffle(shuffled_alphabet) for letter in shuffled_alphabet: + if letter in allowed_call_trans: + # skip words where it's not possible to pop the stack_guard + if self.current_state.state_id not in allowed_call_trans[letter]: + continue new_word = word + [letter] queue.append(new_word) - def get_allowed_call_transitions(self): - """ - Returns a dict of states that are allowed to push a call letters on the stack. - - For all states that are connected via internal transitions from the initial state on, the state_id and - call_letter of the stack_guard from every return transition is used. - - States are not allowed to push something somthing on the stack if there is no possibility to pop the - stack guard, where their state_id is used, from the stack, which would lead into a dead-end otherwise. - - Returns: - - dict: A dictionary where keys are the state_id and values are sets of the call_letters. - """ - - # get all states that are connected via internal transitions by using BFS - connected_states = set() - queue = deque([self.initial_state]) - while queue: - current_state = queue.popleft() - connected_states.add(current_state) - - for internal_letter in self.input_alphabet.internal_alphabet: - for internal_trans in current_state.transitions[internal_letter]: - target_state = internal_trans.target - if target_state not in connected_states: - queue.append(target_state) - - allowed_call_transitions = defaultdict(set) - for state in connected_states: - for return_letter in self.input_alphabet.return_alphabet: - for trans in state.transitions[return_letter]: - allowed_call_transitions[trans.stack_guard[1]].add(trans.stack_guard[0]) - - return allowed_call_transitions - def gen_random_accepting_word(self, return_letter_prob: float = 0.5, min_length: int = 0) -> list: """ Generate a random word that is accepted by the automaton. @@ -459,6 +464,8 @@ def gen_random_accepting_word(self, return_letter_prob: float = 0.5, min_length: return_letter_boarder = return_letter_prob internal_letter_boarder = return_letter_boarder + internal_letter_prob + allowed_call_trans = self.get_allowed_call_transitions() + self.reset_to_initial() while True: @@ -475,15 +482,24 @@ def gen_random_accepting_word(self, return_letter_prob: float = 0.5, min_length: assert len(possible_letters) > 0 random_trans_letter_index = random.randint(0, len(possible_letters) - 1) - letter = possible_letters[random_trans_letter_index] + letter_for_word = possible_letters[random_trans_letter_index] - # find the sub-word so the right stack guard + # find the sub-word for the needed stack guard beginning from the initial state + # the new word will be: letter_prefix + word + letter if is_return_letter: # randomly select one of the return transitions with the respective return symbol - random_stack_guard_index = random.randint(0, len(self.current_state.transitions[letter]) - 1) - random_stack_guard = self.current_state.transitions[letter][random_stack_guard_index].stack_guard + if len(self.current_state.transitions[letter_for_word]) == 0: + continue + elif len(self.current_state.transitions[letter_for_word]) == 1: + random_stack_guard = self.current_state.transitions[letter_for_word][0].stack_guard + else: + random_stack_guard_index = random.randint(0, len(self.current_state.transitions[letter_for_word]) - 1) + random_stack_guard = self.current_state.transitions[letter_for_word][random_stack_guard_index].stack_guard - sub_word = [] + # start from the initial state + self.reset_to_initial() + + letter_prefix = [] needed_stack = self.stack.copy() needed_stack.append(random_stack_guard) queue = deque() @@ -491,29 +507,35 @@ def gen_random_accepting_word(self, return_letter_prob: float = 0.5, min_length: queue.append([letter]) while queue: - sub_word = queue.popleft() + letter_prefix = queue.popleft() self.reset_to_initial() - self.execute_sequence(self.initial_state, word + sub_word) + self.execute_sequence(self.initial_state, letter_prefix) if self.error_state_reached: continue if self.stack == needed_stack: break for letter in self.input_alphabet.get_merged_alphabet(): - new_word = sub_word + [letter] + if letter in allowed_call_trans: + # skip words where it's not possible to pop the stack_guard + if self.current_state.state_id not in allowed_call_trans[letter]: + continue + new_word = letter_prefix + [letter] queue.append(new_word) - self.step(letter) + for letter in word: + self.step(letter) + self.step(letter_for_word) if not self.error_state_reached: - sub_word.append(letter) - word = word + sub_word + word = letter_prefix + word + word.append(letter_for_word) else: self.execute_sequence(self.initial_state, word) else: - self.step(letter) + self.step(letter_for_word) if not self.error_state_reached: - word.append(letter) + word.append(letter_for_word) else: self.execute_sequence(self.initial_state, word) @@ -521,88 +543,3 @@ def gen_random_accepting_word(self, return_letter_prob: float = 0.5, min_length: break return word - - def gen_random_accepting_word_2(self, return_letter_prob: float = 0.0, call_letter_prob: float = 0.0, - early_finish: bool = True): - """ - Create a random word that gets accepted by the automaton. - - Args: - - Returns: - """ - assert return_letter_prob + call_letter_prob <= 1.0 - word = [] - if return_letter_prob == 0.0 and call_letter_prob == 0.0: - return_letter_prob = 0.34 - call_letter_prob = 0.33 - elif return_letter_prob == 0.0 and call_letter_prob != 0.0: - return_letter_prob = (1.0 - call_letter_prob) / 2 - elif return_letter_prob != 0.0 and call_letter_prob == 0.0: - call_letter_prob = (1.0 - return_letter_prob) / 2 - - if len(self.input_alphabet.internal_alphabet) != 0: - internal_letter_prob = 1.0 - return_letter_prob - call_letter_prob - else: - internal_letter_prob = 0.0 - if return_letter_prob == 0.0 and call_letter_prob == 0.0: - return_letter_prob = 0.5 - call_letter_prob = 0.5 - elif return_letter_prob == 0.0 and call_letter_prob != 0.0: - return_letter_prob = (1.0 - call_letter_prob) - elif return_letter_prob != 0.0 and call_letter_prob == 0.0: - call_letter_prob = (1.0 - return_letter_prob) - - assert (call_letter_prob + return_letter_prob + internal_letter_prob) == 1.0 - - call_letter_boarder = call_letter_prob - return_letter_boarder = call_letter_boarder + return_letter_prob - internal_letter_boarder = return_letter_boarder + internal_letter_prob - - self.reset_to_initial() - allowed_call_transitions = self.get_allowed_call_transitions() - while True: - letter_type = random.uniform(0.0, 1.0) - is_call_letter = False - if 0.0 <= letter_type <= call_letter_boarder: - possible_letters = self.input_alphabet.call_alphabet - is_call_letter = True - elif call_letter_boarder < letter_type <= return_letter_boarder: - # skip return letters if stack is empty or if the word is empty - if self.stack[-1] == self.empty or word == []: - continue - possible_letters = self.input_alphabet.return_alphabet - elif return_letter_boarder < letter_type <= internal_letter_boarder: - possible_letters = self.input_alphabet.internal_alphabet - else: - assert False - - assert len(possible_letters) > 0 - - letter = '' - if early_finish: - for l in possible_letters: - for transition in self.current_state.transitions[l]: - if transition.target.is_accepting: - letter = l - break - break - if letter == '': - random_trans_letter_index = random.randint(0, len(possible_letters) - 1) - letter = possible_letters[random_trans_letter_index] - - # check if it is allowed to make push the selected letter on the stack from the current position - if is_call_letter: - if self.current_state.state_id not in allowed_call_transitions[letter]: - continue - - self.step(letter) - if not self.error_state_reached: - word.append(letter) - else: - self.execute_sequence(self.initial_state, word) - - if self.current_state.is_accepting and self.stack[-1] == self.empty: - break - - return word diff --git a/test_main.py b/test_main.py index 964beec6..f3753fed 100644 --- a/test_main.py +++ b/test_main.py @@ -108,6 +108,7 @@ def test_random_word_gen(): sul_model = SevpaSUL(model, include_top=False, check_balance=False) + # Test SEVPA random word gen random_word_list = model.gen_random_accepting_word_bfs(min_word_length=3, amount_words=10) for random_word in random_word_list: out_model = sul_model.query(random_word)[-1] @@ -116,6 +117,17 @@ def test_random_word_gen(): print(f'All tests passed for gen_random_accepting_word_bfs') + total_len = 0 + for i in range(0, 100): + random_word = model.gen_random_accepting_word() + total_len += len(random_word) + out_model = sul_model.query(random_word)[-1] + out_sul = sul.query(random_word)[-1] + assert out_model == out_sul and out_model + + print(f'All tests passed average word length: {total_len/100}') + + # Test VPA random word gen total_len = 0 for i in range(0, 100): random_word = model_under_learning.gen_random_accepting_word(return_letter_prob=0.5) From c4f6ba71c11f2802fd78585c98d8d96918a12380 Mon Sep 17 00:00:00 2001 From: Moritz Pistauer Date: Fri, 24 Nov 2023 17:29:44 +0100 Subject: [PATCH 50/62] Bug fix for exponential_bwd --- aalpy/automata/Sevpa.py | 6 ------ .../learning_algs/deterministic/CounterExampleProcessing.py | 4 ++-- 2 files changed, 2 insertions(+), 8 deletions(-) diff --git a/aalpy/automata/Sevpa.py b/aalpy/automata/Sevpa.py index 20e4b08f..f94fae12 100644 --- a/aalpy/automata/Sevpa.py +++ b/aalpy/automata/Sevpa.py @@ -189,12 +189,6 @@ def to_state_setup(self): transitions_for_symbol[symbol] = trans_list_for_setup state_setup_dict[state.state_id] = (state.is_accepting, transitions_for_symbol) - - # for state in sorted_states: - # state_setup_dict[state.state_id] = ( - # state.is_accepting, {symbol: trans_list - # for symbol, trans_list in state.transitions.items()}) - return state_setup_dict @staticmethod diff --git a/aalpy/learning_algs/deterministic/CounterExampleProcessing.py b/aalpy/learning_algs/deterministic/CounterExampleProcessing.py index b4250f1c..43b02062 100644 --- a/aalpy/learning_algs/deterministic/CounterExampleProcessing.py +++ b/aalpy/learning_algs/deterministic/CounterExampleProcessing.py @@ -174,11 +174,11 @@ def exponential_cex_processing(sul: SUL, cex: tuple, hypothesis, suffix_closedne bp = len(cex)-1 while True: if direction == 'fwd': - if bp > len(cex): + if bp >= len(cex): bp = len(cex) break else: - if bp < 1: + if bp <= 1: bp = 1 break From ff0efceb757b88cdd67587719a5d636ada0b3e95 Mon Sep 17 00:00:00 2001 From: Edi Muskardin Date: Wed, 29 Nov 2023 11:00:45 +0100 Subject: [PATCH 51/62] fix error state processing in KV --- aalpy/SULs/AutomataSUL.py | 24 ++------ aalpy/automata/Sevpa.py | 51 ++++++++--------- .../deterministic/ClassificationTree.py | 13 ++--- .../deterministic/CounterExampleProcessing.py | 20 +++---- aalpy/learning_algs/deterministic/KV.py | 3 + det_cex_processing_test.py | 29 +++++----- test_main.py | 55 +++++-------------- 7 files changed, 80 insertions(+), 115 deletions(-) diff --git a/aalpy/SULs/AutomataSUL.py b/aalpy/SULs/AutomataSUL.py index a6d63329..220b6463 100644 --- a/aalpy/SULs/AutomataSUL.py +++ b/aalpy/SULs/AutomataSUL.py @@ -168,11 +168,9 @@ def step(self, letter): class VpaSUL(SUL): - def __init__(self, vpa: Vpa, include_top=True, check_balance=True): + def __init__(self, vpa: Vpa): super().__init__() self.vpa = vpa - self.include_top = include_top - self.check_balance = check_balance def pre(self): self.vpa.reset_to_initial() @@ -181,21 +179,13 @@ def post(self): pass def step(self, letter): - output = self.vpa.step(letter) - top = self.vpa.top() - if self.include_top: - if self.check_balance and self.vpa.call_balance < 0: - return output, '-' - return output, top - return output + return self.vpa.step(letter) class SevpaSUL(SUL): - def __init__(self, sevpa: Sevpa, include_top=True, check_balance=True): + def __init__(self, sevpa: Sevpa): super().__init__() self.sevpa = sevpa - self.include_top = include_top - self.check_balance = check_balance def pre(self): self.sevpa.reset_to_initial() @@ -204,10 +194,4 @@ def post(self): pass def step(self, letter): - output = self.sevpa.step(letter) - # top = self.sevpa.top() - if self.include_top: - if self.check_balance and self.sevpa.call_balance < 0: - return output, '-' - return output - return output \ No newline at end of file + return self.sevpa.step(letter) diff --git a/aalpy/automata/Sevpa.py b/aalpy/automata/Sevpa.py index f94fae12..8e87353a 100644 --- a/aalpy/automata/Sevpa.py +++ b/aalpy/automata/Sevpa.py @@ -281,19 +281,21 @@ def create_daisy_hypothesis(initial_state, alphabet): return Sevpa(initial_state, [initial_state], alphabet) - def find_error_states(self): + def get_error_state(self): """ - - if all transitions self loop to itself - - if the pop transitions from the corresponding stack symbol lead to the same state - - for example: - - all q2 transitions lead to q2 - - the pop transitions from the initial state which pop the q2+call-symbol from the stack lead to q2 as well - - - do not do if the state is the initial state or an accepting state + A state is an error state iff: + - if all transitions self loop to itself + - if the pop transitions from the corresponding stack symbol lead to the same state + - for example: + - all q2 transitions lead to q2 + - the pop transitions from the initial state which pop the q2+call-symbol from the stack lead to q2 as well + + - Not an error state if it is the initial state or an accepting state """ - error_states = [] + for state in self.states: - error_state = True + + is_error_state = True if state.is_accepting or state == self.initial_state: continue @@ -308,36 +310,35 @@ def find_error_states(self): state_target = transition.target else: if state_target != transition.target: - error_state = False + is_error_state = False break - if not error_state: + if not is_error_state: break # check return transitions from the initial state - if error_state: + if is_error_state: for return_letter in self.input_alphabet.return_alphabet: for transition in self.initial_state.transitions[return_letter]: if transition.stack_guard[0] == state_target.state_id: if transition.target != state_target: - error_state = False + is_error_state = False break - if not error_state: + if not is_error_state: break else: continue - if error_state: - error_states.append(state.state_id) + if is_error_state: + return state - return error_states + return None - def delete_state(self, state_id): - state = self.get_state_by_id(state_id) + def delete_state(self, state_to_remove): - if state is not None: - self.states.remove(state) + if state_to_remove is not None: + self.states.remove(state_to_remove) else: - assert False and f'State {state_id} does not exist' + return for state in self.states: ret_int_al = [] @@ -347,9 +348,9 @@ def delete_state(self, state_id): cleaned_transitions = [] for transition in state.transitions[letter]: if transition.stack_guard is not None: - if transition.stack_guard[0] == state_id: + if transition.stack_guard[0] == state_to_remove.state_id: continue - if transition.target.state_id == state_id: + if transition.target.state_id == state_to_remove.state_id: continue cleaned_transitions.append(transition) diff --git a/aalpy/learning_algs/deterministic/ClassificationTree.py b/aalpy/learning_algs/deterministic/ClassificationTree.py index 8fb4ed17..0418cfcb 100644 --- a/aalpy/learning_algs/deterministic/ClassificationTree.py +++ b/aalpy/learning_algs/deterministic/ClassificationTree.py @@ -228,7 +228,8 @@ def gen_hypothesis(self): continue for other_state in states_for_transitions: - if other_state.prefix == self.error_state_prefix: # TODO WIP + # ignore other state if other state is error state + if other_state.prefix == self.error_state_prefix: continue transition_target_node = self._sift( other_state.prefix + (call_letter,) + state.prefix + (return_letter,)) @@ -241,12 +242,10 @@ def gen_hypothesis(self): if self.automaton_type == 'vpa': hypothesis = Sevpa(initial_state=initial_state, states=list(states.values()), input_alphabet=self.alphabet) - # WIP - error_states = hypothesis.find_error_states() - if error_states: - self.error_state_prefix = next((state.prefix for state in hypothesis.states - if state.state_id == error_states[0]), None) - assert len(error_states) <= 1 + if not self.error_state_prefix: + error_state = hypothesis.get_error_state() + if error_state: + self.error_state_prefix = error_state.prefix return hypothesis return automaton_class[self.automaton_type](initial_state=initial_state, states=list(states.values())) diff --git a/aalpy/learning_algs/deterministic/CounterExampleProcessing.py b/aalpy/learning_algs/deterministic/CounterExampleProcessing.py index 43b02062..5503946a 100644 --- a/aalpy/learning_algs/deterministic/CounterExampleProcessing.py +++ b/aalpy/learning_algs/deterministic/CounterExampleProcessing.py @@ -43,14 +43,13 @@ def longest_prefix_cex_processing(s_union_s_dot_a: list, cex: tuple, closedness= return suffixes -def rs_cex_processing(sul: SUL, cex: tuple, hypothesis, suffix_closedness=True, closedness='suffix', is_vpa=False, - lower=0, upper=0): - """Riverst-Schapire counter example processing. +def rs_cex_processing(sul: SUL, cex: tuple, hypothesis, suffix_closedness=True, closedness='suffix', + is_vpa=False, lower=None, upper=None): + """ + Riverst-Schapire counter example processing. Args: - upper: upper boarder for cex (from preprocessing) - lower: lower boarder for cex (from preprocessing) sul: system under learning cex: found counterexample hypothesis: hypothesis on which counterexample was found @@ -59,6 +58,8 @@ def rs_cex_processing(sul: SUL, cex: tuple, hypothesis, suffix_closedness=True, sul: SUL: system under learning cex: tuple: counterexample is_vpa: system under learning behaves as a context free language + upper: upper boarder for cex (from preprocessing), None will set it to 1 + lower: lower boarder for cex (from preprocessing), None will set it to len(cex_input) - 2 Returns: @@ -68,11 +69,8 @@ def rs_cex_processing(sul: SUL, cex: tuple, hypothesis, suffix_closedness=True, cex_out = sul.query(cex) cex_input = list(cex) - if lower == 0: - lower = 1 - - if upper == 0: - upper = len(cex_input) - 2 + lower = 1 if lower is None else lower + upper = len(cex_input) - 2 if upper is None else upper while True: hypothesis.reset_to_initial() @@ -172,6 +170,8 @@ def exponential_cex_processing(sul: SUL, cex: tuple, hypothesis, suffix_closedne else: bp_recent = len(cex) bp = len(cex)-1 + + suffix = None while True: if direction == 'fwd': if bp >= len(cex): diff --git a/aalpy/learning_algs/deterministic/KV.py b/aalpy/learning_algs/deterministic/KV.py index 67dd3b74..9ae944eb 100644 --- a/aalpy/learning_algs/deterministic/KV.py +++ b/aalpy/learning_algs/deterministic/KV.py @@ -135,6 +135,9 @@ def run_KV(alphabet: Union[list, SevpaAlphabet], sul: SUL, eq_oracle: Oracle, au classification_tree.process_counterexample(cex, hypothesis, cex_processing) + if automaton_type == 'vpa': + hypothesis.delete_state(hypothesis.get_error_state()) + total_time = round(time.time() - start_time, 2) eq_query_time = round(eq_query_time, 2) learning_time = round(total_time - eq_query_time, 2) diff --git a/det_cex_processing_test.py b/det_cex_processing_test.py index 957908e7..432bf602 100644 --- a/det_cex_processing_test.py +++ b/det_cex_processing_test.py @@ -3,20 +3,23 @@ from aalpy.oracles import RandomWMethodEqOracle from aalpy.learning_algs import run_KV, run_Lstar -for x in ['exponential_fwd', 'exponential_bwd']: - for i in range(5): - print(i) - model_type = 'mealy' # or 'moore', 'dfa' +for x in ['rs', 'linear_fwd', 'linear_bwd', 'exponential_fwd', 'exponential_bwd']: + for at in ['moore', 'dfa', 'mealy']: + for i in range(50): + print(x, at, i) + model_type = at # or 'moore', 'dfa' - # for random dfa's you can also define num_accepting_states - random_model = generate_random_deterministic_automata(automaton_type=model_type, num_states=100, - input_alphabet_size=3, output_alphabet_size=4) + # for random dfa's you can also define num_accepting_states + random_model = generate_random_deterministic_automata(automaton_type=model_type, num_states=75, + input_alphabet_size=4, output_alphabet_size=5) - sul = MealySUL(random_model) - input_alphabet = random_model.get_input_alphabet() + sul = MealySUL(random_model) + input_alphabet = random_model.get_input_alphabet() - # select any of the oracles - eq_oracle = RandomWMethodEqOracle(input_alphabet, sul, walks_per_state=10, walk_len=20) + # select any of the oracles + eq_oracle = RandomWMethodEqOracle(input_alphabet, sul, walks_per_state=10, walk_len=15) - learned_model = run_KV(input_alphabet, sul, eq_oracle, model_type, cex_processing=x, print_level=0) - assert bisimilar(random_model, learned_model) + learned_model = run_Lstar(input_alphabet, sul, eq_oracle, model_type, cex_processing=x, print_level=0) + if not bisimilar(random_model, learned_model): + print(x, at) + print(bisimilar(random_model, learned_model, return_cex=True)) diff --git a/test_main.py b/test_main.py index f3753fed..01d049ba 100644 --- a/test_main.py +++ b/test_main.py @@ -83,7 +83,7 @@ def test_on_random_svepa(): alphabet = random_svepa.input_alphabet - sul = SevpaSUL(random_svepa, include_top=False, check_balance=False) + sul = SevpaSUL(random_svepa) eq_oracle = RandomWordEqOracle(alphabet=alphabet.get_merged_alphabet(), sul=sul, num_walks=10000, min_walk_len=10, max_walk_len=30) @@ -100,13 +100,13 @@ def test_random_word_gen(): list(model_under_learning.call_set), list(model_under_learning.return_set)) - sul = VpaSUL(model_under_learning, include_top=False, check_balance=False) + sul = VpaSUL(model_under_learning) eq_oracle = RandomWordEqOracle(alphabet=alphabet.get_merged_alphabet(), sul=sul, num_walks=100000) model = run_KV(alphabet=alphabet, sul=sul, eq_oracle=eq_oracle, automaton_type='vpa', print_level=2, cex_processing='exponential_fwd') - sul_model = SevpaSUL(model, include_top=False, check_balance=False) + sul_model = SevpaSUL(model) # Test SEVPA random word gen random_word_list = model.gen_random_accepting_word_bfs(min_word_length=3, amount_words=10) @@ -125,7 +125,7 @@ def test_random_word_gen(): out_sul = sul.query(random_word)[-1] assert out_model == out_sul and out_model - print(f'All tests passed average word length: {total_len/100}') + print(f'All tests passed average word length: {total_len / 100}') # Test VPA random word gen total_len = 0 @@ -159,30 +159,8 @@ def test_random_word_gen(): print(f'All tests passed average word length: {total_len / 100}') -def visual_test_to_state_setup_sevpa(): - vpa = vpa_for_L11() - - alphabet = SevpaAlphabet(list(vpa.internal_set), - list(vpa.call_set), - list(vpa.return_set)) - sul = VpaSUL(vpa, include_top=False, check_balance=False) - eq_oracle = RandomWordEqOracle(alphabet=alphabet.get_merged_alphabet(), sul=sul, num_walks=100000) - model = run_KV(alphabet=alphabet, sul=sul, eq_oracle=eq_oracle, automaton_type='vpa', - print_level=2, cex_processing='rs') - - error_states = model.find_error_states() - if error_states is not None: - model.delete_state(error_states[0]) - - # for visual comparison - model.visualize() - state_setup_dict = model.to_state_setup() - model_from_setup = Sevpa.from_state_setup(state_setup_dict, "q0", alphabet) - visualize_automaton(model_from_setup, 'ModelFromSetup') - - def test_cex_processing_strategies_vpa(): - cex_processing_strategies = ['linear_fwd', 'linear_bwd', 'exponential_fwd', 'exponential_bwd', 'rs'] + cex_processing_strategies = ['rs', 'linear_fwd', 'linear_bwd', 'exponential_fwd', 'exponential_bwd', ] for i, vpa in enumerate( [vpa_for_L1(), vpa_for_L2(), vpa_for_L3(), vpa_for_L4(), vpa_for_L5(), vpa_for_L7(), vpa_for_L8(), @@ -197,18 +175,15 @@ def test_cex_processing_strategies_vpa(): list(model_under_learning.return_set)) for cex_processing in cex_processing_strategies: - sul = VpaSUL(model_under_learning, include_top=False, check_balance=False) - eq_oracle = RandomWordEqOracle(alphabet=alphabet.get_merged_alphabet(), sul=sul, num_walks=100000) + sul = VpaSUL(model_under_learning) + eq_oracle = RandomWordEqOracle(alphabet=alphabet.get_merged_alphabet(), sul=sul, num_walks=20000) model = run_KV(alphabet=alphabet, sul=sul, eq_oracle=eq_oracle, automaton_type='vpa', print_level=1, cex_processing=cex_processing) - error_states = model.find_error_states() - if error_states: - model.delete_state(error_states[0]) - sul_learned_model = SevpaSUL(model, include_top=False, check_balance=False) + sul_learned_model = SevpaSUL(model) print(f'Checking {cex_processing}') - for i in range(0, 500000): + for i in range(0, 10000): word_length = random.randint(1, 100) word = [] for j in range(0, word_length): @@ -223,10 +198,11 @@ def test_cex_processing_strategies_vpa(): print(f'{cex_processing} failed on following test:') print(f'Input: {word}') print(f'Vpa out: {vpa_out} \nLearned vpa out: {learned_model_out}') - break + assert False - -# test_arithmetic_expression() +# test_cex_processing_strategies_vpa() +test_arithmetic_expression() +# test_on_random_svepa() # import cProfile # pr = cProfile.Profile() # pr.enable() @@ -254,10 +230,9 @@ def test_cex_processing_strategies_vpa(): # if i == 9: # alphabet.exclusive_call_return_pairs = {'(': ')', '[': ']', '{': '}', '<': '>'} - sul = VpaSUL(model_under_learning, include_top=False, check_balance=False) + sul = VpaSUL(model_under_learning) eq_oracle = RandomWordEqOracle(alphabet=alphabet.get_merged_alphabet(), sul=sul, num_walks=10000) # model = run_KV_vpda(alphabet=alphabet, sul=sul, eq_oracle=eq_oracle, print_level=3,) model = run_KV(alphabet=alphabet, sul=sul, eq_oracle=eq_oracle, automaton_type='vpa', - print_level=2, cex_processing='linear_bwd') - + print_level=2, cex_processing='exponential_fwd') From a91490e5800d072b55090698050dd8606ae89f58 Mon Sep 17 00:00:00 2001 From: Edi Muskardin Date: Wed, 29 Nov 2023 11:07:44 +0100 Subject: [PATCH 52/62] update signature of to_state_setup --- aalpy/automata/Dfa.py | 2 +- aalpy/automata/MarkovChain.py | 2 +- aalpy/automata/Mdp.py | 20 +++------------- aalpy/automata/MealyMachine.py | 2 +- aalpy/automata/MooreMachine.py | 2 +- aalpy/automata/Onfsm.py | 2 +- aalpy/automata/Sevpa.py | 5 +++- aalpy/automata/StochasticMealyMachine.py | 2 +- aalpy/automata/Vpa.py | 5 +++- aalpy/base/Automaton.py | 2 +- aalpy/utils/BenchmarkVpaModels.py | 30 ++++++++++++------------ test_main.py | 3 +-- 12 files changed, 34 insertions(+), 43 deletions(-) diff --git a/aalpy/automata/Dfa.py b/aalpy/automata/Dfa.py index 7dd8c3f0..aea2953c 100644 --- a/aalpy/automata/Dfa.py +++ b/aalpy/automata/Dfa.py @@ -61,7 +61,7 @@ def to_state_setup(self): return state_setup_dict @staticmethod - def from_state_setup(state_setup : dict): + def from_state_setup(state_setup : dict, **kwargs): """ First state in the state setup is the initial state. Example state setup: diff --git a/aalpy/automata/MarkovChain.py b/aalpy/automata/MarkovChain.py index 7fd77388..ef94a9f2 100644 --- a/aalpy/automata/MarkovChain.py +++ b/aalpy/automata/MarkovChain.py @@ -62,7 +62,7 @@ def step_to(self, input): return None @staticmethod - def from_state_setup(state_setup : dict): + def from_state_setup(state_setup : dict, **kwargs): raise NotImplementedError() # TODO implement def to_state_setup(self): diff --git a/aalpy/automata/Mdp.py b/aalpy/automata/Mdp.py index 6a57f967..dc8c4b87 100644 --- a/aalpy/automata/Mdp.py +++ b/aalpy/automata/Mdp.py @@ -3,29 +3,15 @@ from typing import Dict, Generic, List, Tuple from aalpy.base import Automaton, AutomatonState -# TODO edit setup to include this in base? from aalpy.base.Automaton import OutputType, InputType -def from_state_setup(state_setup: dict): - states_map = {key: MdpState(key, output=value[0]) for key, value in state_setup.items()} - - for key, values in state_setup.items(): - source = states_map[key] - for i, transitions in values[1].items(): - for node, prob in transitions: - source.transitions[i].append((states_map[node], prob)) - - initial_state = states_map[list(state_setup.keys())[0]] - return Mdp(initial_state, list(states_map.values())) - - class MdpState(AutomatonState, Generic[InputType, OutputType]): def __init__(self, state_id, output=None): super().__init__(state_id) - self.output : OutputType = output + self.output: OutputType = output # each child is a tuple (Node(output), probability) - self.transitions : Dict[InputType, List[Tuple[MdpState, float]]] = defaultdict(list) + self.transitions: Dict[InputType, List[Tuple[MdpState, float]]] = defaultdict(list) class Mdp(Automaton[MdpState[InputType, OutputType]]): @@ -92,7 +78,7 @@ def to_state_setup(self): return state_setup_dict @staticmethod - def from_state_setup(state_setup : dict): + def from_state_setup(state_setup: dict, **kwargs): states_map = {key: MdpState(key, output=value[0]) for key, value in state_setup.items()} for key, values in state_setup.items(): diff --git a/aalpy/automata/MealyMachine.py b/aalpy/automata/MealyMachine.py index 9f0b97a8..7ab95502 100644 --- a/aalpy/automata/MealyMachine.py +++ b/aalpy/automata/MealyMachine.py @@ -49,7 +49,7 @@ def to_state_setup(self): return state_setup_dict @staticmethod - def from_state_setup(state_setup : dict): + def from_state_setup(state_setup : dict, **kwargs): """ First state in the state setup is the initial state. state_setup = { diff --git a/aalpy/automata/MooreMachine.py b/aalpy/automata/MooreMachine.py index 52c06a3e..ac9a3adb 100644 --- a/aalpy/automata/MooreMachine.py +++ b/aalpy/automata/MooreMachine.py @@ -61,7 +61,7 @@ def to_state_setup(self): return state_setup_dict @staticmethod - def from_state_setup(state_setup : dict): + def from_state_setup(state_setup : dict, **kwargs): """ First state in the state setup is the initial state. Example state setup: diff --git a/aalpy/automata/Onfsm.py b/aalpy/automata/Onfsm.py index da92664b..fb0bc9a3 100644 --- a/aalpy/automata/Onfsm.py +++ b/aalpy/automata/Onfsm.py @@ -102,7 +102,7 @@ def step_to(self, inp, out): return None @staticmethod - def from_state_setup(state_setup : dict): + def from_state_setup(state_setup : dict, **kwargs): raise NotImplementedError() # TODO implement def to_state_setup(self): diff --git a/aalpy/automata/Sevpa.py b/aalpy/automata/Sevpa.py index 8e87353a..97188e53 100644 --- a/aalpy/automata/Sevpa.py +++ b/aalpy/automata/Sevpa.py @@ -192,7 +192,10 @@ def to_state_setup(self): return state_setup_dict @staticmethod - def from_state_setup(state_setup: dict, init_state_id, input_alphabet: SevpaAlphabet): + def from_state_setup(state_setup: dict, **kwargs): + + init_state_id = kwargs['init_state_id'] + input_alphabet = kwargs['input_alphabet'] # build states with state_id and output states = {key: SevpaState(key, val[0]) for key, val in state_setup.items()} diff --git a/aalpy/automata/StochasticMealyMachine.py b/aalpy/automata/StochasticMealyMachine.py index 2b164eec..beee32da 100644 --- a/aalpy/automata/StochasticMealyMachine.py +++ b/aalpy/automata/StochasticMealyMachine.py @@ -86,7 +86,7 @@ def to_state_setup(self): return state_setup_dict @staticmethod - def from_state_setup(state_setup: dict): + def from_state_setup(state_setup : dict, **kwargs): states_map = {key: StochasticMealyState(key) for key in state_setup.keys()} for key, values in state_setup.items(): diff --git a/aalpy/automata/Vpa.py b/aalpy/automata/Vpa.py index 94e97816..8805d8a8 100644 --- a/aalpy/automata/Vpa.py +++ b/aalpy/automata/Vpa.py @@ -198,7 +198,7 @@ def to_state_setup(self): return state_setup_dict @staticmethod - def from_state_setup(state_setup: dict, init_state_id: str, input_alphabet: VpaAlphabet): + def from_state_setup(state_setup: dict, **kwargs): """ Create a VPA from a state setup. @@ -228,6 +228,9 @@ def from_state_setup(state_setup: dict, init_state_id: str, input_alphabet: VpaA """ # state_setup should map from state_id to tuple(is_accepting and transitions_dict) + init_state_id = kwargs['init_state_id'] + input_alphabet = kwargs['input_alphabet'] + # build states with state_id and output states = {key: VpaState(key, val[0]) for key, val in state_setup.items()} states[Vpa.error_state.state_id] = Vpa.error_state # PdaState(Pda.error_state,False) diff --git a/aalpy/base/Automaton.py b/aalpy/base/Automaton.py index a0bd23ef..02e47e42 100644 --- a/aalpy/base/Automaton.py +++ b/aalpy/base/Automaton.py @@ -155,7 +155,7 @@ def visualize(self, path='LearnedModel', file_type='pdf', display_same_state_tra @staticmethod @abstractmethod - def from_state_setup(state_setup: dict): + def from_state_setup(state_setup: dict, **kwargs): pass @abstractmethod diff --git a/aalpy/utils/BenchmarkVpaModels.py b/aalpy/utils/BenchmarkVpaModels.py index ed51c615..26fc54af 100644 --- a/aalpy/utils/BenchmarkVpaModels.py +++ b/aalpy/utils/BenchmarkVpaModels.py @@ -15,7 +15,7 @@ def vpa_for_L1(): "q1": (False, {"a": [("q1", 'push', "a")], "b": [("q2", 'pop', "a")]}), "q2": (True, {"a": [(Vpa.error_state.state_id, None, None)], "b": [("q2", 'pop', "a")]}), } - vpa = Vpa.from_state_setup(state_setup, "q0", input_alphabet) + vpa = Vpa.from_state_setup(state_setup, init_state_id="q0", input_alphabet=input_alphabet) return vpa @@ -38,7 +38,7 @@ def vpa_for_L2(): "c": [("q2", 'pop', "a"), ("q2", 'pop', "b")], "d": [("q2", 'pop', "a"), ("q2", 'pop', "b")]}), } - vpa = Vpa.from_state_setup(state_setup, "q0", input_alphabet) + vpa = Vpa.from_state_setup(state_setup, init_state_id="q0", input_alphabet=input_alphabet) return vpa @@ -67,7 +67,7 @@ def vpa_for_L3(): "q2": (True, {"e": [("q2e", 'pop', "b"), ("q2e", 'pop', "d")], "g": [("q2g", 'pop', "b"), ("q2g", 'pop', "d")]}) } - vpa = Vpa.from_state_setup(state_setup, "q0", input_alphabet) + vpa = Vpa.from_state_setup(state_setup, init_state_id="q0", input_alphabet=input_alphabet) return vpa @@ -88,7 +88,7 @@ def vpa_for_L4(): "q21": (False, {"d": [("q2", 'pop', "a")]}), "q2": (True, {"c": [("q21", 'pop', "b")]}), } - vpa = Vpa.from_state_setup(state_setup, "q0", input_alphabet) + vpa = Vpa.from_state_setup(state_setup, init_state_id="q0", input_alphabet=input_alphabet) return vpa @@ -111,7 +111,7 @@ def vpa_for_L5(): "q22": (False, {"f": [("q2", 'pop', "a")]}), "q2": (True, {"d": [("q21", 'pop', "c")]}), } - vpa = Vpa.from_state_setup(state_setup, "q0", input_alphabet) + vpa = Vpa.from_state_setup(state_setup, init_state_id="q0", input_alphabet=input_alphabet) return vpa @@ -134,7 +134,7 @@ def vpa_for_L7(): "]": [("q1", 'pop', "[")] }), } - vpa = Vpa.from_state_setup(state_setup, "q0", input_alphabet) + vpa = Vpa.from_state_setup(state_setup, init_state_id="q0", input_alphabet=input_alphabet) return vpa @@ -160,7 +160,7 @@ def vpa_for_L8(): "}": [("q1", 'pop', "{")], }), } - vpa = Vpa.from_state_setup(state_setup, "q0", input_alphabet) + vpa = Vpa.from_state_setup(state_setup, init_state_id="q0", input_alphabet=input_alphabet) return vpa @@ -189,7 +189,7 @@ def vpa_for_L9(): ">": [("q1", 'pop', "<")], }), } - vpa = Vpa.from_state_setup(state_setup, "q0", input_alphabet) + vpa = Vpa.from_state_setup(state_setup, init_state_id="q0", input_alphabet=input_alphabet) return vpa @@ -220,7 +220,7 @@ def vpa_for_L10(): "qx": (False, {"y": [("qy", None, None)]}), "qy": (False, {"z": [("q1", None, None)]}) } - vpa = Vpa.from_state_setup(state_setup, "q0", input_alphabet) + vpa = Vpa.from_state_setup(state_setup, init_state_id="q0", input_alphabet=input_alphabet) return vpa @@ -245,7 +245,7 @@ def vpa_for_L11(): "r2": [("q1", 'pop', "c1"), ("q1", 'pop', "c2")]}), "qd": (False, {"i2": [("q1", None, None)]}) } - vpa = Vpa.from_state_setup(state_setup, "q0", input_alphabet) + vpa = Vpa.from_state_setup(state_setup, init_state_id="q0", input_alphabet=input_alphabet) return vpa @@ -271,7 +271,7 @@ def vpa_for_L12(): "]": [("q2", 'pop', "[")] }), } - vpa = Vpa.from_state_setup(state_setup, "q0", input_alphabet) + vpa = Vpa.from_state_setup(state_setup, init_state_id="q0", input_alphabet=input_alphabet) return vpa @@ -297,7 +297,7 @@ def vpa_for_L13(): "c": [("q1", None, None)] }), } - vpa = Vpa.from_state_setup(state_setup, "q0", input_alphabet) + vpa = Vpa.from_state_setup(state_setup, init_state_id="q0", input_alphabet=input_alphabet) return vpa @@ -326,7 +326,7 @@ def vpa_for_L14(): "c": [("q1", None, None)] }), } - vpa = Vpa.from_state_setup(state_setup, "q0", input_alphabet) + vpa = Vpa.from_state_setup(state_setup, init_state_id="q0", input_alphabet=input_alphabet) return vpa @@ -354,7 +354,7 @@ def vpa_for_L15(): "qb": (False, {"c": [("q1", None, None)], }) } - vpa = Vpa.from_state_setup(state_setup, "q0", input_alphabet) + vpa = Vpa.from_state_setup(state_setup, init_state_id="q0", input_alphabet=input_alphabet) return vpa @@ -374,5 +374,5 @@ def vpa_for_L16(): }), "q2": (True, {}) } - vpa = Vpa.from_state_setup(state_setup, "q0", input_alphabet) + vpa = Vpa.from_state_setup(state_setup, init_state_id="q0", input_alphabet=input_alphabet) return vpa diff --git a/test_main.py b/test_main.py index 01d049ba..ae4521bc 100644 --- a/test_main.py +++ b/test_main.py @@ -71,7 +71,6 @@ def step(self, letter): learned_model = run_KV(alphabet, sul, eq_oracle, automaton_type='vpa') learned_model.visualize() - exit() def test_on_random_svepa(): @@ -201,7 +200,7 @@ def test_cex_processing_strategies_vpa(): assert False # test_cex_processing_strategies_vpa() -test_arithmetic_expression() +# test_arithmetic_expression() # test_on_random_svepa() # import cProfile # pr = cProfile.Profile() From c5edd2ca0761641db029b5c29859ccf187421ace Mon Sep 17 00:00:00 2001 From: Edi Muskardin Date: Wed, 29 Nov 2023 11:20:58 +0100 Subject: [PATCH 53/62] merge with upstram master --- aalpy/base/Automaton.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/aalpy/base/Automaton.py b/aalpy/base/Automaton.py index af40d675..2e114ee2 100644 --- a/aalpy/base/Automaton.py +++ b/aalpy/base/Automaton.py @@ -155,7 +155,7 @@ def visualize(self, path='LearnedModel', file_type='pdf', display_same_state_tra @staticmethod @abstractmethod - def from_state_setup(state_setup: dict, **kwargs): + def from_state_setup(state_setup: dict, **kwargs) -> 'Automaton': pass @abstractmethod From 00933e7cc774c2371597834843fee72db0f6b824 Mon Sep 17 00:00:00 2001 From: Moritz Pistauer Date: Sun, 3 Dec 2023 20:56:15 +0100 Subject: [PATCH 54/62] Benchmark vpa learning --- .../vpa_benchmarking/benchmark_vpa.py | 260 ++++++++++++++++++ aalpy/utils/AutomatonGenerators.py | 22 +- 2 files changed, 275 insertions(+), 7 deletions(-) create mode 100644 Benchmarking/vpa_benchmarking/benchmark_vpa.py diff --git a/Benchmarking/vpa_benchmarking/benchmark_vpa.py b/Benchmarking/vpa_benchmarking/benchmark_vpa.py new file mode 100644 index 00000000..54355f8f --- /dev/null +++ b/Benchmarking/vpa_benchmarking/benchmark_vpa.py @@ -0,0 +1,260 @@ +from collections import defaultdict + +import matplotlib.pyplot as plt +import numpy as np +import pickle + +from aalpy.SULs.AutomataSUL import SevpaSUL, VpaSUL, DfaSUL +from aalpy.automata import SevpaAlphabet +from aalpy.learning_algs import run_KV +from aalpy.oracles import RandomWordEqOracle +from aalpy.utils import generate_random_sevpa, visualize_automaton +from aalpy.utils.BenchmarkVpaModels import * + + +def state_increasing(): + print("Benchmarking for increasing state size") + max_number_states = 100 + step_size = 10 + repeats = 10 + + cex_processing = ['rs', 'linear_fwd', 'linear_bwd', 'exponential_fwd', 'exponential_bwd'] + # cex_processing = ['rs'] + data_dict = defaultdict(tuple) + + for cex in cex_processing: + states_data_median = [] + query_data_median = [] + for number_states in range(10, max_number_states + 1, step_size): + print(number_states) + states_data = [] + query_data = [] + for x in range(repeats): + random_svepa = generate_random_sevpa(num_states=number_states, internal_alphabet_size=3, + call_alphabet_size=3, + return_alphabet_size=3, + acceptance_prob=0.4, + return_transition_prob=0.5) + + alphabet = random_svepa.input_alphabet + + sul = SevpaSUL(random_svepa) + + eq_oracle = RandomWordEqOracle(alphabet=alphabet.get_merged_alphabet(), sul=sul, num_walks=10000, + min_walk_len=10, max_walk_len=30) + + model, data = run_KV(alphabet=alphabet, sul=sul, eq_oracle=eq_oracle, automaton_type='vpa', + print_level=0, cex_processing=cex, return_data=True) + states_data.append(number_states) + query_data.append(data['queries_learning']) + + states_data_median.append(np.median(states_data)) + query_data_median.append(np.median(query_data)) + + data_dict[cex] = (states_data_median, query_data_median) + + # Save data_dict to a pickle file + with open('state_increasing.pickle', 'wb') as file: + pickle.dump(data_dict, file) + + # plot + plt.figure() + plt.xlabel('Number of states') + plt.ylabel('Number of membership queries') + plt.title('Query growth of a random SEVPA with increasing state size') + for key in data_dict: + plt.plot(data_dict[key][0], data_dict[key][1], label=key) + plt.legend() + plt.savefig('state_increasing.png') + + +def alphabet_increasing(): + print("Benchmarking for increasing alphabet size") + repeats = 10 + max_alphabet_size = 15 + + cex_processing = ['rs', 'linear_fwd', 'linear_bwd', 'exponential_fwd', 'exponential_bwd'] + # cex_processing = ['rs'] + data_dict = defaultdict(tuple) + + for cex in cex_processing: + states_data_median = [] + query_data_median = [] + for alphabet_size in range(1, max_alphabet_size): + print(alphabet_size) + for x in range(repeats): + random_svepa = generate_random_sevpa(num_states=100, internal_alphabet_size=alphabet_size, + call_alphabet_size=alphabet_size, + return_alphabet_size=alphabet_size, + acceptance_prob=0.4, + return_transition_prob=0.5) + + alphabet = random_svepa.input_alphabet + + sul = SevpaSUL(random_svepa) + + eq_oracle = RandomWordEqOracle(alphabet=alphabet.get_merged_alphabet(), sul=sul, num_walks=10000, + min_walk_len=10, max_walk_len=30) + + states_data = [] + query_data = [] + model, data = run_KV(alphabet=alphabet, sul=sul, eq_oracle=eq_oracle, automaton_type='vpa', + print_level=0, cex_processing=cex, return_data=True) + states_data.append(alphabet_size * 3) + query_data.append(data['queries_learning']) + + states_data_median.append(np.median(states_data)) + query_data_median.append(np.median(query_data)) + + data_dict[cex] = (states_data_median, query_data_median) + + # Save data_dict to a pickle file + with open('alphabet_increasing.pickle', 'wb') as file: + pickle.dump(data_dict, file) + + # plot + plt.figure() + plt.xlabel('Size of the input alphabet') + plt.ylabel('Number of membership queries') + plt.title('Query growth of a random SEVPA with increasing alphabet size') + for key in data_dict: + plt.plot(data_dict[key][0], data_dict[key][1], label=key) + plt.legend() + plt.savefig('alphabet_increasing.png') + + +def alphabet_increasing_variable(): + print("Benchmarking for variably increasing alphabet size") + repeats = 10 + max_alphabet_size = 15 + + data_dict = defaultdict(tuple) + alphabet_types = ['int', 'call', 'ret'] + + for alphabet_type in alphabet_types: + states_data_median = [] + query_data_median = [] + for alphabet_size in range(1, max_alphabet_size): + print(alphabet_size) + for x in range(repeats): + if alphabet_type == 'int': + random_svepa = generate_random_sevpa(num_states=100, internal_alphabet_size=alphabet_size, + call_alphabet_size=1, + return_alphabet_size=1, + acceptance_prob=0.4, + return_transition_prob=0.5) + elif alphabet_type == 'call': + random_svepa = generate_random_sevpa(num_states=100, internal_alphabet_size=alphabet_size, + call_alphabet_size=1, + return_alphabet_size=1, + acceptance_prob=0.4, + return_transition_prob=0.5) + elif alphabet_type == 'ret': + random_svepa = generate_random_sevpa(num_states=100, internal_alphabet_size=alphabet_size, + call_alphabet_size=1, + return_alphabet_size=1, + acceptance_prob=0.4, + return_transition_prob=0.5) + + alphabet = random_svepa.input_alphabet + + sul = SevpaSUL(random_svepa) + + eq_oracle = RandomWordEqOracle(alphabet=alphabet.get_merged_alphabet(), sul=sul, num_walks=10000, + min_walk_len=10, max_walk_len=30) + + states_data = [] + query_data = [] + model, data = run_KV(alphabet=alphabet, sul=sul, eq_oracle=eq_oracle, automaton_type='vpa', + print_level=0, cex_processing='rs', return_data=True) + states_data.append(alphabet_size) + query_data.append(data['queries_learning']) + + states_data_median.append(np.median(states_data)) + query_data_median.append(np.median(query_data)) + + data_dict[alphabet_type] = (states_data_median, query_data_median) + + # Save data_dict to a pickle file + with open('alphabet_increasing_variable.pickle', 'wb') as file: + pickle.dump(data_dict, file) + + # plot + plt.figure() + plt.xlabel('Size of the input alphabet') + plt.ylabel('Number of membership queries') + plt.title('Query growth of a random SEVPA with increasing alphabet size') + for key in data_dict: + plt.plot(data_dict[key][0], data_dict[key][1], label=key) + plt.legend() + plt.savefig('alphabet_increasing_variable.png') + + +def benchmark_vpa_dfa(): + max_learning_rounds = 100 + data_dict = defaultdict(tuple) + label_data = [] + + for i, vpa in enumerate( + [vpa_for_L1(), vpa_for_L2(), vpa_for_L3(), vpa_for_L4(), vpa_for_L5(), vpa_for_L7(), vpa_for_L8(), + vpa_for_L9(), vpa_for_L10(), vpa_for_L11(), vpa_for_L12(), vpa_for_L13(), vpa_for_L14(), vpa_for_L15()]): + print(f'VPA {i + 1 if i < 6 else i + 2}') + label_data.append(f'VPA {i + 1 if i < 6 else i + 2}') + + model_under_learning = vpa + + alphabet_sevpa = SevpaAlphabet(list(model_under_learning.internal_set), + list(model_under_learning.call_set), + list(model_under_learning.return_set)) + + alphabet_dfa = model_under_learning.input_alphabet.get_merged_alphabet() + + sul_vpa = VpaSUL(vpa) + sul_dfa = DfaSUL(vpa) + + eq_oracle_vpa = RandomWordEqOracle(alphabet=alphabet_sevpa.get_merged_alphabet(), sul=sul_vpa, num_walks=10000, + min_walk_len=10, max_walk_len=30) + eq_oracle_dfa = RandomWordEqOracle(alphabet=alphabet_sevpa.get_merged_alphabet(), sul=sul_vpa, num_walks=10000, + min_walk_len=10, max_walk_len=30) + + model_vpa, data_vpa = run_KV(alphabet=alphabet_sevpa, sul=sul_vpa, eq_oracle=eq_oracle_vpa, automaton_type='vpa', + print_level=0, cex_processing='rs', return_data=True, + max_learning_rounds=max_learning_rounds) + + model_dfa, data_dfa = run_KV(alphabet=alphabet_dfa, sul=sul_dfa, eq_oracle=eq_oracle_dfa, automaton_type='dfa', + print_level=0, cex_processing='rs', return_data=True, + max_learning_rounds=max_learning_rounds) + + print(data_dfa['queries_learning']) + + data_dict[vpa] = (data_vpa['queries_learning'], data_dfa['queries_learning']) + + + # Save data_dict to a pickle file + with open('benchmark_vpa_dfa.pickle', 'wb') as file: + pickle.dump(data_dict, file) + + #plotting + keys = list(data_dict.keys()) + values = list(data_dict.values()) + data1, data2 = zip(*values) + + # Creating bar graph + bar_width = 0.35 + index = np.arange(len(keys)) + plt.bar(index, data1, bar_width, label='Data VPA', align='center') + plt.bar(index + bar_width, data2, bar_width, label='Data DFA', align='center') + + plt.xlabel('VPA Instances') + plt.ylabel('Number of Queries') + plt.title('Bar Graph of Queries for VPA and DFA') + plt.xticks(index + bar_width / 2, label_data) + plt.legend() + plt.show() + + +# choose which benchmark to execute +state_increasing() +alphabet_increasing() +alphabet_increasing_variable() +benchmark_vpa_dfa() diff --git a/aalpy/utils/AutomatonGenerators.py b/aalpy/utils/AutomatonGenerators.py index 4c687ade..9dedc49b 100644 --- a/aalpy/utils/AutomatonGenerators.py +++ b/aalpy/utils/AutomatonGenerators.py @@ -451,6 +451,20 @@ def _has_transition(state: SevpaState, transition_letter, stack_guard) -> bool: def generate_random_sevpa(num_states, internal_alphabet_size, call_alphabet_size, return_alphabet_size , acceptance_prob, return_transition_prob): + """ + Generate a random Single Entry Visibly Pushdown Automaton (SEVPA). + + Args: + num_states (int): The number of states in the SEVPA. + internal_alphabet_size (int): The size of the internal alphabet. + call_alphabet_size (int): The size of the call alphabet. + return_alphabet_size (int): The size of the return alphabet. + acceptance_prob (float): The probability of a state being an accepting state. + return_transition_prob (float): The probability of generating a return transition. + + Returns: + Sevpa: A randomly generated SEVPA. + """ internal_alphabet = [f'i{i}' for i in range(internal_alphabet_size)] call_alphabet = [f'c{i}' for i in range(call_alphabet_size)] @@ -463,6 +477,7 @@ def generate_random_sevpa(num_states, internal_alphabet_size, call_alphabet_size for state in states: if not internal_alphabet or random.uniform(0.0, 1.0) < return_transition_prob: + # add return transition while True: return_letter = random.choice(return_alphabet) stack_state = random.choice(states) if not state_buffer else random.choice(state_buffer) @@ -488,7 +503,6 @@ def generate_random_sevpa(num_states, internal_alphabet_size, call_alphabet_size target_state = random.choice(states) if not state_buffer else random.choice(state_buffer) if target_state in state_buffer: state_buffer.remove(target_state) - state.transitions[internal_letter].append( SevpaTransition(state, target_state, internal_letter, None, None)) @@ -511,10 +525,4 @@ def generate_random_sevpa(num_states, internal_alphabet_size, call_alphabet_size state.transitions[return_letter].append( SevpaTransition(state, target_state, return_letter, 'pop', stack_guard)) - # add call transitions - for call_letter in call_alphabet: - trans = SevpaTransition(start=state, target=initial_state, symbol=call_letter, action='push', - stack_guard=f'{state.state_id}{call_letter}') - state.transitions[call_letter].append(trans) - return Sevpa(initial_state, states, sevpa_alphabet) From a8f905214125087fab779ed016aaf4cb52b48cc2 Mon Sep 17 00:00:00 2001 From: Edi Muskardin Date: Tue, 12 Dec 2023 14:19:02 +0100 Subject: [PATCH 55/62] WIP on refactoring of VPA learning to simplfy it before merging - remove sevpa alphabet from constrctor, it can be deduced from states! --- Examples.py | 68 ++++- VPA.md | 66 ----- aalpy/SULs/AutomataSUL.py | 18 +- aalpy/SULs/__init__.py | 2 +- aalpy/automata/Sevpa.py | 23 +- aalpy/automata/Vpa.py | 329 ------------------------ aalpy/automata/__init__.py | 1 - aalpy/base/Automaton.py | 3 +- aalpy/utils/BenchmarkSevpaModels.py | 40 +-- aalpy/utils/BenchmarkVpaModels.py | 378 ---------------------------- aalpy/utils/FileHandler.py | 28 +-- det_cex_processing_test.py | 25 -- sevpa_eq_checks.py | 79 ------ test_main.py | 237 ----------------- vpa_eq_checks.py | 79 ------ 15 files changed, 122 insertions(+), 1254 deletions(-) delete mode 100644 VPA.md delete mode 100644 aalpy/automata/Vpa.py delete mode 100644 aalpy/utils/BenchmarkVpaModels.py delete mode 100644 det_cex_processing_test.py delete mode 100644 sevpa_eq_checks.py delete mode 100644 test_main.py delete mode 100644 vpa_eq_checks.py diff --git a/Examples.py b/Examples.py index 2d1fa5e6..079f4ac1 100644 --- a/Examples.py +++ b/Examples.py @@ -961,4 +961,70 @@ def learning_context_free_grammar_example(): eq_oracle = RandomWordEqOracle(sevpa_alphabet.get_merged_alphabet(), balanced_string_sul, num_walks=1000, min_walk_len=5, max_walk_len=30) learned_model = run_KV(sevpa_alphabet, balanced_string_sul, eq_oracle, automaton_type='vpa') - learned_model.visualize() \ No newline at end of file + learned_model.visualize() + + +def test_arithmetic_expression(): + from aalpy.base import SUL + from aalpy.automata import SevpaAlphabet + from aalpy.oracles import RandomWordEqOracle + from aalpy.learning_algs import run_KV + import warnings + warnings.filterwarnings("ignore") + + class ArithmeticSUL(SUL): + def __init__(self): + super().__init__() + self.string_under_test = '' + + def pre(self): + self.string_under_test = '' + + def post(self): + pass + + def step(self, letter): + if letter: + self.string_under_test += ' ' + letter + + try: + eval(self.string_under_test) + return True + except (SyntaxError, TypeError): + return False + + sul = ArithmeticSUL() + + alphabet = SevpaAlphabet(internal_alphabet=['1', '+'], call_alphabet=['('], return_alphabet=[')']) + + eq_oracle = RandomWordEqOracle(alphabet.get_merged_alphabet(), sul, min_walk_len=5, + max_walk_len=20, num_walks=20000) + + learned_model = run_KV(alphabet, sul, eq_oracle, automaton_type='vpa') + learned_model.visualize() + + +def test_on_random_svepa(): + from aalpy.SULs import SevpaSUL + from aalpy.oracles import RandomWordEqOracle + from aalpy.learning_algs import run_KV + from aalpy.utils import generate_random_sevpa + + random_svepa = generate_random_sevpa(num_states=50, internal_alphabet_size=3, + call_alphabet_size=3, + return_alphabet_size=3, + acceptance_prob=0.4, + return_transition_prob=0.5) + + # from aalpy.utils.BenchmarkVpaModels import vpa_for_L11 + # balanced_parentheses = vpa_for_L11() + + alphabet = random_svepa.input_alphabet + + sul = SevpaSUL(random_svepa) + + eq_oracle = RandomWordEqOracle(alphabet=alphabet.get_merged_alphabet(), sul=sul, num_walks=10000, + min_walk_len=10, max_walk_len=30) + + model = run_KV(alphabet=alphabet, sul=sul, eq_oracle=eq_oracle, automaton_type='vpa', + print_level=2, cex_processing='rs') diff --git a/VPA.md b/VPA.md deleted file mode 100644 index 6dfb1518..00000000 --- a/VPA.md +++ /dev/null @@ -1,66 +0,0 @@ -# Theory of VPA -A VPA (Visible Pushdown Automata) is similar to a regular Pushdown Automata with the difference of a split-up alphabet. - -The alphabet of a VPA is a triple with: -- Σcall: call set (set of all call letters --> letters that are used for push actions on the automata) -- Σret: return set (set of all return letters --> letters that are used for pop actions on the automata) -- Σint: internal set (set of all internal letters --> letters that are used for internal transitions that don't alter the stack) - -The unification of all three sets is the alphabet. -The symmetric difference is the empty set. - -### The call/return balance -The call/return balance is a function β. The function maps a word of the language to an integer based on the composition of call and return letters. Here is how it works: -- Call letters add "1" to the balance -- Return letters subtract "1" from the balance -- Internal letters have no impact on the balance - -#### Example: -Imagine a language with: -- Σcall = {a, b} -- Σret = {c, d} -- Σint = {e, f} - -We would have the following balances for the following words: -- aabbccdd = 0 (1+1+1+1-1-1-1-1) -- abab = 4 (1+1+1+1) -- cccc = -4 (-1-1-1-1) -- cdeabef = 0 (-1-1+0+1+1+0+0) - -### Call-matched, return-matched, well-matched -By defining the call/return balance we can introduce the definition of call-matched, return-matched and well-matched words. The definition is as follows: -- **Return-matched** words have a β >= 0 (They have more call-letters and therefore the balance is greater-equal than 0) -The set of return matched words is called MR(Σ) -- **Call-matched** words have a β =< 0 (They have more return-letters and therefore the balance is greater-equal than 0) -The set of call matched words is called MC(Σ) -- **Well-matched** words have a β = 0 (The composition of call and return letters in well-matched words equalizes to zero) -The set of well matched words is called MW(Σ) - -### Context pairs (CP) -Furthermore we want to introduce context pairs, the set of context pairs is CP(Σ). Context pairs are well matched words having the form of u*v. -- u has the form of MR(Σ) * Σcall or is the empty word ε -This means if u is of the form MR(Σ) * Σcall it has at least a β >= 1 -- v is of the form MC(Σ) -- β(u) = -β(v). -- v is the matching word for u to be u*v ∈ WM(Σ) -#### Example: -If we get back at our last language with the following split: -- Σcall = {a, b} -- Σret = {c, d} -- Σint = {e, f} - -We have could have this types of context pairs: -- u = aa | v = cc -- u = aca | v = d - -### Output functions -As Malte Isberner described in his paper we want to introduce the definition of output functions as they are used for several relations and congruences. -The intuitive meaning of the output function is if the word is part of the language respectively if the automata accepts or rejects the input. -The output function is of the form: -λ: Σ* --> B -Generally it will be assumed that the output function is a well-matched output function, which is of the form: -**λ: WM(Σ*) --> B** - -### Important Congruences - -#### Nerode-congruence diff --git a/aalpy/SULs/AutomataSUL.py b/aalpy/SULs/AutomataSUL.py index 220b6463..bd89cc92 100644 --- a/aalpy/SULs/AutomataSUL.py +++ b/aalpy/SULs/AutomataSUL.py @@ -1,5 +1,5 @@ from aalpy.base import SUL -from aalpy.automata import Dfa, MealyMachine, MooreMachine, Onfsm, Mdp, StochasticMealyMachine, MarkovChain, Vpa, Sevpa +from aalpy.automata import Dfa, MealyMachine, MooreMachine, Onfsm, Mdp, StochasticMealyMachine, MarkovChain, Sevpa class DfaSUL(SUL): @@ -166,22 +166,6 @@ def step(self, letter): return self.smm.step(letter) - -class VpaSUL(SUL): - def __init__(self, vpa: Vpa): - super().__init__() - self.vpa = vpa - - def pre(self): - self.vpa.reset_to_initial() - - def post(self): - pass - - def step(self, letter): - return self.vpa.step(letter) - - class SevpaSUL(SUL): def __init__(self, sevpa: Sevpa): super().__init__() diff --git a/aalpy/SULs/__init__.py b/aalpy/SULs/__init__.py index 03fc2df7..bd58e3ea 100644 --- a/aalpy/SULs/__init__.py +++ b/aalpy/SULs/__init__.py @@ -1,4 +1,4 @@ -from .AutomataSUL import DfaSUL, MealySUL, MooreSUL, MdpSUL, OnfsmSUL, StochasticMealySUL, McSUL +from .AutomataSUL import DfaSUL, MealySUL, MooreSUL, MdpSUL, OnfsmSUL, StochasticMealySUL, McSUL, SevpaSUL from .PyMethodSUL import FunctionDecorator, PyClassSUL from .RegexSUL import RegexSUL from .TomitaSUL import TomitaSUL \ No newline at end of file diff --git a/aalpy/automata/Sevpa.py b/aalpy/automata/Sevpa.py index 97188e53..11e34758 100644 --- a/aalpy/automata/Sevpa.py +++ b/aalpy/automata/Sevpa.py @@ -66,7 +66,7 @@ class SevpaTransition: start (SevpaState): The starting state of the transition. target (SevpaState): The target state of the transition. symbol: The symbol associated with the transition. - action: The action performed during the transition (push | pop | None). + action: The action performed during the transition (pop | None). stack_guard: The stack symbol to be pushed/popped. """ def __init__(self, start: SevpaState, target: SevpaState, symbol, action, stack_guard=None): @@ -90,11 +90,11 @@ class Sevpa(Automaton): """ empty = "_" - def __init__(self, initial_state: SevpaState, states: list[SevpaState], input_alphabet: SevpaAlphabet): + def __init__(self, initial_state: SevpaState, states: list[SevpaState]): super().__init__(initial_state, states) self.initial_state = initial_state self.states = states - self.input_alphabet = input_alphabet + self.input_alphabet = self.get_input_alphabet() self.current_state = None self.stack = [] self.error_state_reached = False @@ -284,6 +284,23 @@ def create_daisy_hypothesis(initial_state, alphabet): return Sevpa(initial_state, [initial_state], alphabet) + def get_input_alphabet(self): + if self.input_alphabet: + return self.input_alphabet + + internal, ret, call = [], [], [] + for state in self.states: + for transition in state.transitions: + if transition.action == 'pop': + if transition.symbol not in ret: + ret.append(transition.symbol) + if transition.stack_guard[1] not in call: + call.append(transition.stack_guard[1]) + else: + internal.append(transition.symbol) + + return SevpaAlphabet(internal, call, ret) + def get_error_state(self): """ A state is an error state iff: diff --git a/aalpy/automata/Vpa.py b/aalpy/automata/Vpa.py deleted file mode 100644 index 8805d8a8..00000000 --- a/aalpy/automata/Vpa.py +++ /dev/null @@ -1,329 +0,0 @@ -import random -from collections import defaultdict -from typing import List, Dict - -from aalpy.base import Automaton, AutomatonState - - -class VpaAlphabet: - """ - The Alphabet of a VPA. - - Attributes: - internal_alphabet (List[str]): Letters for internal transitions. - call_alphabet (List[str]): Letters for push transitions. - return_alphabet (List[str]): Letters for pop transitions. - exclusive_call_return_pairs (Dict[str, str]): A dictionary representing exclusive pairs - of call and return symbols. - """ - - def __init__(self, internal_alphabet: List[str], call_alphabet: List[str], return_alphabet: List[str], - exclusive_call_return_pairs: Dict[str, str] = None): - self.internal_alphabet = internal_alphabet - self.call_alphabet = call_alphabet - self.return_alphabet = return_alphabet - self.exclusive_call_return_pairs = exclusive_call_return_pairs - - def get_merged_alphabet(self) -> List[str]: - """ - Get the merged alphabet, including internal, call, and return symbols. - - Returns: - List[str]: A list of all symbols in the alphabet. - """ - alphabet = list() - alphabet.extend(self.internal_alphabet) - alphabet.extend(self.call_alphabet) - alphabet.extend(self.return_alphabet) - return alphabet - - def __str__(self) -> str: - """ - Returns: - str: A string representation of the alphabet. - """ - return f'Internal: {self.internal_alphabet} Call: {self.call_alphabet} Return: {self.return_alphabet}' - - -class VpaState(AutomatonState): - """ - Single state of a VPA. - """ - def __init__(self, state_id, is_accepting=False): - super().__init__(state_id) - self.transitions = defaultdict(list) - self.is_accepting = is_accepting - - -class VpaTransition: - """ - Represents a transition in a VPA. - - Attributes: - start (VpaState): The starting state of the transition. - target (VpaState): The target state of the transition. - symbol: The symbol associated with the transition. - action: The action performed during the transition (push | pop | None). - stack_guard: The stack symbol to be pushed/popped. - """ - def __init__(self, start: VpaState, target: VpaState, symbol, action, stack_guard=None): - self.start = start - self.target = target - self.symbol = symbol - self.action = action - self.stack_guard = stack_guard - - def __str__(self): - return f"{self.symbol}: {self.start.state_id} --> {self.target.state_id} | {self.action}: {self.stack_guard}" - - -class Vpa(Automaton): - """ - Visibly Pushdown Automaton. - """ - empty = "_" - error_state = VpaState("ErrorSinkState", False) - - def __init__(self, initial_state: VpaState, states, input_alphabet: VpaAlphabet): - super().__init__(initial_state, states) - self.initial_state = initial_state - self.states = states - self.input_alphabet = input_alphabet - self.current_state = None - self.call_balance = 0 # TODO: we don't use that - self.stack = [] - - # alphabet sets for faster inclusion checks (as in VpaAlphabet we have lists, for reproducibility) - self.internal_set = set(self.input_alphabet.internal_alphabet) - self.call_set = set(self.input_alphabet.call_alphabet) - self.return_set = set(self.input_alphabet.return_alphabet) - - def reset_to_initial(self): - super().reset_to_initial() - self.reset() - - def reset(self): - self.current_state = self.initial_state - self.stack = [self.empty] - self.call_balance = 0 - return self.current_state.is_accepting and self.top() == self.empty - - def top(self): - return self.stack[-1] - - def pop(self): - return self.stack.pop() - - def possible(self, letter): - """ - Checks if a certain step on the automaton is possible - """ - if self.current_state == Vpa.error_state: - return True - if letter is not None: - transitions = self.current_state.transitions[letter] - possible_trans = [] - for t in transitions: - if t.symbol in self.call_set: - possible_trans.append(t) - elif t.symbol in self.return_set: - if t.stack_guard == self.top(): - possible_trans.append(t) - elif t.symbol in self.internal_set: - possible_trans.append(t) - else: - assert False and print(f'Letter {letter} is not part of any alphabet') - assert len(possible_trans) < 2 - if len(possible_trans) == 0: - return False - else: - return True - return False - - def step(self, letter): - """ - Perform a single step on the VPA by transitioning with the given input letter. - - Args: - letter: A single input that is looked up in the transition table of the VpaState. - - Returns: - bool: True if the reached state is an accepting state and the stack is empty, False otherwise. - """ - if self.current_state == Vpa.error_state: - return False - if not self.possible(letter): - self.current_state = Vpa.error_state - return False - if letter is not None: - transitions = self.current_state.transitions[letter] - possible_trans = [] - for t in transitions: - if t.symbol in self.call_set: - possible_trans.append(t) - elif t.symbol in self.return_set: - if t.stack_guard == self.top(): - possible_trans.append(t) - elif t.symbol in self.internal_set: - possible_trans.append(t) - else: - assert False - - assert len(possible_trans) < 2 - trans = possible_trans[0] - self.current_state = trans.target - if trans.action == 'push': - assert(letter in self.call_set) # push letters must be in call set - self.stack.append(trans.stack_guard) - elif trans.action == 'pop': - assert(letter in self.return_set) # pop letters must be in return set - if len(self.stack) <= 1: # empty stack elem should always be there - self.current_state = Vpa.error_state - return False - self.stack.pop() - - return self.current_state.is_accepting and self.top() == self.empty - - def to_state_setup(self): - state_setup_dict = {} - - # ensure prefixes are computed - # self.compute_prefixes() - - sorted_states = sorted(self.states, key=lambda x: len(x.prefix)) - for s in sorted_states: - state_setup_dict[s.state_id] = ( - s.is_accepting, {k: (v.target.state_id, v.action) for k, v in s.transitions.items()}) - - return state_setup_dict - - @staticmethod - def from_state_setup(state_setup: dict, **kwargs): - """ - Create a VPA from a state setup. - - Example state setup: - state_setup = { - "q0": (False, {"(": [("q1", 'push', "(")], - "[": [("q1", 'push', "[")], # exclude empty seq - }), - "q1": (False, {"(": [("q1", 'push', "(")], - "[": [("q1", 'push', "[")], - ")": [("q2", 'pop', "(")], - "]": [("q2", 'pop', "[")]}), - "q2": (True, { - ")": [("q2", 'pop', "(")], - "]": [("q2", 'pop', "[")] - }), - - Args: - state_setup (dict): A dictionary mapping from state IDs to tuples containing - (is_accepting: bool, transitions_dict: dict), where transitions_dict maps input symbols to - lists of tuples (target_state_id, action, stack_guard). - init_state_id (str): The state ID for the initial state of the VPA. - input_alphabet (VpaAlphabet): The alphabet for the VPA. - - Returns: - Vpa: The constructed Variable Pushdown Automaton. - """ - # state_setup should map from state_id to tuple(is_accepting and transitions_dict) - - init_state_id = kwargs['init_state_id'] - input_alphabet = kwargs['input_alphabet'] - - # build states with state_id and output - states = {key: VpaState(key, val[0]) for key, val in state_setup.items()} - states[Vpa.error_state.state_id] = Vpa.error_state # PdaState(Pda.error_state,False) - # add transitions to states - for state_id, state in states.items(): - if state_id == Vpa.error_state.state_id: - continue - for _input, trans_spec in state_setup[state_id][1].items(): - for (target_state_id, action, stack_guard) in trans_spec: - trans = VpaTransition(start=state, target=states[target_state_id], symbol=_input, action=action, - stack_guard=stack_guard) - state.transitions[_input].append(trans) - - init_state = states[init_state_id] - # states to list - states = [state for state in states.values()] - - vpa = Vpa(init_state, states, input_alphabet) - return vpa - - def gen_random_accepting_word(self, return_letter_prob: float = 0.0, call_letter_prob: float = 0.0, - early_finish: bool = True): - """ - Create a random word that gets accepted by the automaton. - - Args: - - Returns: - """ - assert return_letter_prob + call_letter_prob <= 1.0 - word = [] - if return_letter_prob == 0.0 and call_letter_prob == 0.0: - return_letter_prob = 0.34 - call_letter_prob = 0.33 - elif return_letter_prob == 0.0 and call_letter_prob != 0.0: - return_letter_prob = (1.0 - call_letter_prob) / 2 - elif return_letter_prob != 0.0 and call_letter_prob == 0.0: - call_letter_prob = (1.0 - return_letter_prob) / 2 - - if len(self.input_alphabet.internal_alphabet) != 0: - internal_letter_prob = 1.0 - return_letter_prob - call_letter_prob - else: - internal_letter_prob = 0.0 - if return_letter_prob == 0.0 and call_letter_prob == 0.0: - return_letter_prob = 0.5 - call_letter_prob = 0.5 - elif return_letter_prob == 0.0 and call_letter_prob != 0.0: - return_letter_prob = (1.0 - call_letter_prob) - elif return_letter_prob != 0.0 and call_letter_prob == 0.0: - call_letter_prob = (1.0 - return_letter_prob) - - assert (call_letter_prob + return_letter_prob + internal_letter_prob) == 1.0 - - call_letter_boarder = call_letter_prob - return_letter_boarder = call_letter_boarder + return_letter_prob - internal_letter_boarder = return_letter_boarder + internal_letter_prob - - self.reset_to_initial() - while True: - letter_type = random.uniform(0.0, 1.0) - if 0.0 <= letter_type <= call_letter_boarder: - possible_letters = self.input_alphabet.call_alphabet - elif call_letter_boarder < letter_type <= return_letter_boarder: - # skip return letters if stack is empty or if the word is empty - if self.stack[-1] == self.empty or word == []: - continue - possible_letters = self.input_alphabet.return_alphabet - elif return_letter_boarder < letter_type <= internal_letter_boarder: - possible_letters = self.input_alphabet.internal_alphabet - else: - assert False - - assert len(possible_letters) > 0 - - letter = '' - if early_finish: - for l in possible_letters: - for transition in self.current_state.transitions[l]: - if transition.target.is_accepting: - letter = l - break - break - if letter == '': - random_trans_letter_index = random.randint(0, len(possible_letters) - 1) - letter = possible_letters[random_trans_letter_index] - self.step(letter) - if not self.current_state == self.error_state: - word.append(letter) - else: - self.reset_to_initial() - self.execute_sequence(self.initial_state, word) - - if self.current_state.is_accepting and self.stack[-1] == self.empty: - break - - return word diff --git a/aalpy/automata/__init__.py b/aalpy/automata/__init__.py index 509c16aa..40fd9dcb 100644 --- a/aalpy/automata/__init__.py +++ b/aalpy/automata/__init__.py @@ -5,5 +5,4 @@ from .Onfsm import Onfsm, OnfsmState from .StochasticMealyMachine import StochasticMealyMachine, StochasticMealyState from .MarkovChain import MarkovChain, McState -from .Vpa import Vpa, VpaState from .Sevpa import Sevpa, SevpaState, SevpaAlphabet, SevpaTransition diff --git a/aalpy/base/Automaton.py b/aalpy/base/Automaton.py index 2e114ee2..a05c4a7a 100644 --- a/aalpy/base/Automaton.py +++ b/aalpy/base/Automaton.py @@ -107,7 +107,8 @@ def is_input_complete(self) -> bool: return False return True - def get_input_alphabet(self) -> list: + # returns a list which is input alphabet, or a sevpa alphabet in case of VPAs + def get_input_alphabet(self): """ Returns the input alphabet """ diff --git a/aalpy/utils/BenchmarkSevpaModels.py b/aalpy/utils/BenchmarkSevpaModels.py index 8b83f1df..5ac2a42a 100644 --- a/aalpy/utils/BenchmarkSevpaModels.py +++ b/aalpy/utils/BenchmarkSevpaModels.py @@ -1,4 +1,5 @@ from aalpy.automata.Sevpa import Sevpa, SevpaAlphabet +from aalpy.utils import load_automaton_from_file def sevpa_for_L1(): @@ -18,7 +19,7 @@ def sevpa_for_L1(): 'q1': (True, {'b': [('q1', 'pop', ('q0', 'a'))] }) } - sevpa = Sevpa.from_state_setup(state_setup, "q0", input_alphabet) + sevpa = Sevpa.from_state_setup(state_setup, init_state_id="q0", input_alphabet=input_alphabet) return sevpa @@ -41,7 +42,7 @@ def sevpa_for_L2(): 'c': [('q1', 'pop', ('q0', 'a')), ('q1', 'pop', ('q0', 'b'))] }) } - sevpa = Sevpa.from_state_setup(state_setup, "q0", input_alphabet) + sevpa = Sevpa.from_state_setup(state_setup, init_state_id="q0", input_alphabet=input_alphabet) return sevpa @@ -76,7 +77,7 @@ def sevpa_for_L3(): 'q6': (False, {'h': [('q1', 'pop', ('q0', 'c'))] }) } - sevpa = Sevpa.from_state_setup(state_setup, "q0", input_alphabet) + sevpa = Sevpa.from_state_setup(state_setup, init_state_id="q0", input_alphabet=input_alphabet) return sevpa @@ -99,7 +100,7 @@ def sevpa_for_L4(): 'q2': (False, {'d': [('q1', 'pop', ('q0', 'a'))] }) } - sevpa = Sevpa.from_state_setup(state_setup, "q0", input_alphabet) + sevpa = Sevpa.from_state_setup(state_setup, init_state_id="q0", input_alphabet=input_alphabet) return sevpa @@ -124,7 +125,7 @@ def sevpa_for_L5(): 'q3': (False, {'f': [('q1', 'pop', ('q0', 'a'))] }) } - sevpa = Sevpa.from_state_setup(state_setup, "q0", input_alphabet) + sevpa = Sevpa.from_state_setup(state_setup, init_state_id="q0", input_alphabet=input_alphabet) return sevpa @@ -152,7 +153,7 @@ def sevpa_for_L7(): }) } - sevpa = Sevpa.from_state_setup(state_setup, "q0", input_alphabet) + sevpa = Sevpa.from_state_setup(state_setup, init_state_id="q0", input_alphabet=input_alphabet) return sevpa @@ -183,7 +184,7 @@ def sevpa_for_L8(): ('q1', 'pop', ('q1', '['))] }) } - sevpa = Sevpa.from_state_setup(state_setup, "q0", input_alphabet) + sevpa = Sevpa.from_state_setup(state_setup, init_state_id="q0", input_alphabet=input_alphabet) return sevpa @@ -219,7 +220,7 @@ def sevpa_for_L9(): }) } - sevpa = Sevpa.from_state_setup(state_setup, "q0", input_alphabet) + sevpa = Sevpa.from_state_setup(state_setup, init_state_id="q0", input_alphabet=input_alphabet) return sevpa @@ -260,7 +261,7 @@ def sevpa_for_L10(): ("qv", 'pop', ('q2', 'a'))] }) } - sevpa = Sevpa.from_state_setup(state_setup, "q0", input_alphabet) + sevpa = Sevpa.from_state_setup(state_setup, init_state_id="q0", input_alphabet=input_alphabet) return sevpa @@ -304,7 +305,7 @@ def sevpa_for_L11(): }) } - sevpa = Sevpa.from_state_setup(state_setup, "q0", input_alphabet) + sevpa = Sevpa.from_state_setup(state_setup, init_state_id="q0", input_alphabet=input_alphabet) return sevpa @@ -327,7 +328,7 @@ def sevpa_for_L12(): ')': [('q1', 'pop', ('q0', '('))] }) } - sevpa = Sevpa.from_state_setup(state_setup, "q0", input_alphabet) + sevpa = Sevpa.from_state_setup(state_setup, init_state_id="q0", input_alphabet=input_alphabet) return sevpa @@ -356,7 +357,7 @@ def sevpa_for_L13(): ('q1', 'pop', ('q1', '('))] }) } - sevpa = Sevpa.from_state_setup(state_setup, "q0", input_alphabet) + sevpa = Sevpa.from_state_setup(state_setup, init_state_id="q0", input_alphabet=input_alphabet) return sevpa @@ -389,7 +390,7 @@ def sevpa_for_L14(): ('q1', 'pop', ('q1', '('))] }) } - sevpa = Sevpa.from_state_setup(state_setup, "q0", input_alphabet) + sevpa = Sevpa.from_state_setup(state_setup, init_state_id="q0", input_alphabet=input_alphabet) return sevpa @@ -418,5 +419,16 @@ def sevpa_for_L15(): 'q3': (False, {'c': [('q1', None, None)] }) } - sevpa = Sevpa.from_state_setup(state_setup, "q0", input_alphabet) + sevpa = Sevpa.from_state_setup(state_setup, init_state_id="q0", input_alphabet=input_alphabet) return sevpa + + +if __name__ == '__main__': + e = sevpa_for_L13() + print(e) + + e.save('test') + m = load_automaton_from_file('test.dot', automaton_type='vpa') + m.visualize() + + diff --git a/aalpy/utils/BenchmarkVpaModels.py b/aalpy/utils/BenchmarkVpaModels.py deleted file mode 100644 index 26fc54af..00000000 --- a/aalpy/utils/BenchmarkVpaModels.py +++ /dev/null @@ -1,378 +0,0 @@ -from aalpy.automata.Vpa import Vpa, VpaAlphabet - - -def vpa_for_L1(): - # we always ensure that n >= 1 - - call_set = ['a'] - return_set = ['b'] - internal_set = [] - - input_alphabet = VpaAlphabet(internal_alphabet=internal_set, call_alphabet=call_set, return_alphabet=return_set) - - state_setup = { - "q0": (False, {"a": [("q1", 'push', "a")], "b": [(Vpa.error_state.state_id, None, None)]}), - "q1": (False, {"a": [("q1", 'push', "a")], "b": [("q2", 'pop', "a")]}), - "q2": (True, {"a": [(Vpa.error_state.state_id, None, None)], "b": [("q2", 'pop', "a")]}), - } - vpa = Vpa.from_state_setup(state_setup, init_state_id="q0", input_alphabet=input_alphabet) - return vpa - - -def vpa_for_L2(): - call_set = ['a', 'b'] - return_set = ['c', 'd'] - internal_set = [] - - input_alphabet = VpaAlphabet(internal_alphabet=internal_set, call_alphabet=call_set, return_alphabet=return_set) - - state_setup = { - "q0": (False, {"a": [("q1", 'push', "a")], "b": [("q1", 'push', "b")], - "c": [(Vpa.error_state.state_id, None, None)], - "d": [(Vpa.error_state.state_id, None, None)]}), - "q1": (False, {"a": [("q1", 'push', "a")], "b": [("q1", 'push', "b")], - "c": [("q2", 'pop', "a"), ("q2", 'pop', "b")], - "d": [("q2", 'pop', "a"), ("q2", 'pop', "b")]}), - "q2": (True, {"a": [(Vpa.error_state.state_id, None, None)], - "b": [(Vpa.error_state.state_id, None, None)], - "c": [("q2", 'pop', "a"), ("q2", 'pop', "b")], - "d": [("q2", 'pop', "a"), ("q2", 'pop', "b")]}), - } - vpa = Vpa.from_state_setup(state_setup, init_state_id="q0", input_alphabet=input_alphabet) - return vpa - - -def vpa_for_L3(): - call_set = ['a', 'c', 'b', 'd'] - return_set = ['e', 'g', 'f', 'h'] - internal_set = [] - - input_alphabet = VpaAlphabet(internal_alphabet=internal_set, call_alphabet=call_set, return_alphabet=return_set) - - state_setup = { - "q0": (False, {"a": [("q0a", 'push', "a")], - "c": [("q0c", 'push', "c")], - }), - "q0a": (False, {"b": [("q1", 'push', "b")]}), - "q0c": (False, {"d": [("q1", 'push', "d")]}), - "q1": (False, {"a": [("q1a", 'push', "a")], - "c": [("q1c", 'push', "c")], - "e": [("q2e", 'pop', "b"), ("q2e", 'pop', "d")], - "g": [("q2g", 'pop', "b"), ("q2g", 'pop', "d")], # stack should actually be redundant - }), - "q1a": (False, {"b": [("q1", 'push', "b")]}), - "q1c": (False, {"d": [("q1", 'push', "d")]}), - "q2e": (False, {"f": [("q2", 'pop', "a"), ("q2", 'pop', "c")]}), - "q2g": (False, {"h": [("q2", 'pop', "a"), ("q2", 'pop', "c")]}), - "q2": (True, {"e": [("q2e", 'pop', "b"), ("q2e", 'pop', "d")], - "g": [("q2g", 'pop', "b"), ("q2g", 'pop', "d")]}) - } - vpa = Vpa.from_state_setup(state_setup, init_state_id="q0", input_alphabet=input_alphabet) - return vpa - - -def vpa_for_L4(): - call_set = ['a', 'b'] - return_set = ['c', 'd'] - internal_set = [] - - input_alphabet = VpaAlphabet(internal_alphabet=internal_set, call_alphabet=call_set, return_alphabet=return_set) - - state_setup = { - "q0": (False, {"a": [("q01", 'push', "a")], "b": [(Vpa.error_state.state_id, None, None)]}), - "q01": (False, {"b": [("q1", 'push', "b")], "a": [(Vpa.error_state.state_id, None, None)]}), - - "q1": (False, {"a": [("q11", 'push', "a")], "b": [(Vpa.error_state.state_id, None, None)], - "c": [("q21", 'pop', "b")]}), - "q11": (False, {"b": [("q1", 'push', "b")], "a": [(Vpa.error_state.state_id, None, None)]}), - "q21": (False, {"d": [("q2", 'pop', "a")]}), - "q2": (True, {"c": [("q21", 'pop', "b")]}), - } - vpa = Vpa.from_state_setup(state_setup, init_state_id="q0", input_alphabet=input_alphabet) - return vpa - - -def vpa_for_L5(): - call_set = ['a', 'b', 'c'] - return_set = ['d', 'e', 'f'] - internal_set = [] - - input_alphabet = VpaAlphabet(internal_alphabet=internal_set, call_alphabet=call_set, return_alphabet=return_set) - - state_setup = { - "q0": (False, {"a": [("q01", 'push', "a")]}), - "q01": (False, {"b": [("q02", 'push', "b")]}), - "q02": (False, {"c": [("q1", 'push', "c")]}), - "q1": (False, {"a": [("q11", 'push', "a")], - "d": [("q21", 'pop', "c")]}), - "q11": (False, {"b": [("q12", 'push', "b")]}), - "q12": (False, {"c": [("q1", 'push', "c")]}), - "q21": (False, {"e": [("q22", 'pop', "b")]}), - "q22": (False, {"f": [("q2", 'pop', "a")]}), - "q2": (True, {"d": [("q21", 'pop', "c")]}), - } - vpa = Vpa.from_state_setup(state_setup, init_state_id="q0", input_alphabet=input_alphabet) - return vpa - - -def vpa_for_L7(): - # Dyck order 2 - - call_set = ['(', '['] - return_set = [')', ']'] - internal_set = [] - - input_alphabet = VpaAlphabet(internal_alphabet=internal_set, call_alphabet=call_set, return_alphabet=return_set) - - state_setup = { - "q0": (False, {"(": [("q1", 'push', '(')], - "[": [("q1", 'push', '[')], # exclude empty seq - }), - "q1": (True, {"(": [("q1", 'push', '(')], - "[": [("q1", 'push', '[')], - ")": [("q1", 'pop', "(")], - "]": [("q1", 'pop', "[")] - }), - } - vpa = Vpa.from_state_setup(state_setup, init_state_id="q0", input_alphabet=input_alphabet) - return vpa - - -def vpa_for_L8(): - # Dyck order 3 - - call_set = ['(', '[', '{'] - return_set = [')', ']', '}'] - internal_set = [] - - input_alphabet = VpaAlphabet(internal_alphabet=internal_set, call_alphabet=call_set, return_alphabet=return_set) - - state_setup = { - "q0": (False, {"(": [("q1", 'push', '(')], - "[": [("q1", 'push', '[')], - "{": [("q1", 'push', '{')], - }), - "q1": (True, {"(": [("q1", 'push', '(')], - "[": [("q1", 'push', '[')], - "{": [("q1", 'push', '{')], - ")": [("q1", 'pop', "(")], - "]": [("q1", 'pop', "[")], - "}": [("q1", 'pop', "{")], - }), - } - vpa = Vpa.from_state_setup(state_setup, init_state_id="q0", input_alphabet=input_alphabet) - return vpa - - -def vpa_for_L9(): - # Dyck order 4 - - call_set = ['(', '[', '{', '<'] - return_set = [')', ']', '}', '>'] - internal_set = [] - - input_alphabet = VpaAlphabet(internal_alphabet=internal_set, call_alphabet=call_set, return_alphabet=return_set) - - state_setup = { - "q0": (False, {"(": [("q1", 'push', '(')], - "[": [("q1", 'push', '[')], - "{": [("q1", 'push', '{')], - "<": [("q1", 'push', '<')], - }), - "q1": (True, {"(": [("q1", 'push', '(')], - "[": [("q1", 'push', '[')], - "{": [("q1", 'push', '{')], - "<": [("q1", 'push', '<')], - ")": [("q1", 'pop', "(")], - "]": [("q1", 'pop', "[")], - "}": [("q1", 'pop', "{")], - ">": [("q1", 'pop', "<")], - }), - } - vpa = Vpa.from_state_setup(state_setup, init_state_id="q0", input_alphabet=input_alphabet) - return vpa - - -def vpa_for_L10(): - # RE Dyck order 1 - - call_set = ['a'] - return_set = ['v'] - internal_set = ['b', 'c', 'd', 'e', 'w', 'x', 'y', 'z'] - - input_alphabet = VpaAlphabet(internal_alphabet=internal_set, call_alphabet=call_set, return_alphabet=return_set) - - state_setup = { - "q0": (False, {"a": [("qa", 'push', "a")], - }), - "qa": (False, {"b": [("qb", None, None)], - }), - "qb": (False, {"c": [("qc", None, None)], - }), - "qc": (False, {"d": [("qd", None, None)], - }), - "qd": (False, {"e": [("q1", None, None)], - }), - "q1": (True, {"a": [("qa", 'push', "a")], - "v": [("qv", 'pop', "a")]}), - "qv": (False, {"w": [("qw", None, None)]}), - "qw": (False, {"x": [("qx", None, None)]}), - "qx": (False, {"y": [("qy", None, None)]}), - "qy": (False, {"z": [("q1", None, None)]}) - } - vpa = Vpa.from_state_setup(state_setup, init_state_id="q0", input_alphabet=input_alphabet) - return vpa - - -def vpa_for_L11(): - # RE Dyck order 1 - - call_set = ['c1', 'c2'] - return_set = ['r1', 'r2'] - internal_set = ['i1', 'i2'] - - input_alphabet = VpaAlphabet(internal_alphabet=internal_set, call_alphabet=call_set, return_alphabet=return_set) - - state_setup = { - "q0": (False, {"c1": [("qa", 'push', "c1")], - "c2": [("q1", 'push', "c2")], - }), - "qa": (False, {"i1": [("q1", None, None)], - }), - "q1": (True, {"c1": [("qa", 'push', "c1")], - "c2": [("q1", 'push', "c2")], - "r1": [("qd", 'pop', "c1"), ("qd", 'pop', "c2")], - "r2": [("q1", 'pop', "c1"), ("q1", 'pop', "c2")]}), - "qd": (False, {"i2": [("q1", None, None)]}) - } - vpa = Vpa.from_state_setup(state_setup, init_state_id="q0", input_alphabet=input_alphabet) - return vpa - - -def vpa_for_L12(): - # Dyck order 2 (single-nested) - - call_set = ['(', '['] - return_set = [')', ']'] - internal_set = [] - - input_alphabet = VpaAlphabet(internal_alphabet=internal_set, call_alphabet=call_set, return_alphabet=return_set) - - state_setup = { - "q0": (False, {"(": [("q1", 'push', "(")], - "[": [("q1", 'push', "[")], # exclude empty seq - }), - "q1": (False, {"(": [("q1", 'push', "(")], - "[": [("q1", 'push', "[")], - ")": [("q2", 'pop', "(")], - "]": [("q2", 'pop', "[")]}), - "q2": (True, { - ")": [("q2", 'pop', "(")], - "]": [("q2", 'pop', "[")] - }), - } - vpa = Vpa.from_state_setup(state_setup, init_state_id="q0", input_alphabet=input_alphabet) - return vpa - - -def vpa_for_L13(): - # Dyck order 1 - - call_set = ['('] - return_set = [')'] - internal_set = ['a', 'b', 'c'] - - input_alphabet = VpaAlphabet(internal_alphabet=internal_set, call_alphabet=call_set, return_alphabet=return_set) - - state_setup = { - "q0": (False, {"(": [("q1", 'push', "(")], - "a": [("q1", None, None)], - "b": [("q1", None, None)], - "c": [("q1", None, None)], # exclude empty seq - }), - "q1": (True, {"(": [("q1", 'push', "(")], - ")": [("q1", 'pop', "(")], - "a": [("q1", None, None)], - "b": [("q1", None, None)], - "c": [("q1", None, None)] - }), - } - vpa = Vpa.from_state_setup(state_setup, init_state_id="q0", input_alphabet=input_alphabet) - return vpa - - -def vpa_for_L14(): - # Dyck order 2 - - call_set = ['(', '['] - return_set = [')', ']'] - internal_set = ['a', 'b', 'c'] - - input_alphabet = VpaAlphabet(internal_alphabet=internal_set, call_alphabet=call_set, return_alphabet=return_set) - - state_setup = { - "q0": (False, {"(": [("q1", 'push', "(")], - "[": [("q1", 'push', "[")], - "a": [("q1", None, None)], - "b": [("q1", None, None)], - "c": [("q1", None, None)], # exclude empty seq - }), - "q1": (True, {"(": [("q1", 'push', "(")], - "[": [("q1", 'push', "[")], - ")": [("q1", 'pop', "(")], - "]": [("q1", 'pop', "[")], - "a": [("q1", None, None)], - "b": [("q1", None, None)], - "c": [("q1", None, None)] - }), - } - vpa = Vpa.from_state_setup(state_setup, init_state_id="q0", input_alphabet=input_alphabet) - return vpa - - -def vpa_for_L15(): - # Dyck order 1 - - call_set = ['('] - return_set = [')'] - internal_set = ['a', 'b', 'c', 'd'] - - input_alphabet = VpaAlphabet(internal_alphabet=internal_set, call_alphabet=call_set, return_alphabet=return_set) - - state_setup = { - "q0": (False, {"(": [("q1", 'push', "(")], - "a": [("qa", None, None)], - "d": [("q1", None, None)], # exclude empty seq - }), - "q1": (True, {"(": [("q1", 'push', "(")], - ")": [("q1", 'pop', "(")], - "a": [("qa", None, None)], - "d": [("q1", None, None)], - }), - "qa": (False, {"b": [("qb", None, None)], - }), - "qb": (False, {"c": [("q1", None, None)], - }) - } - vpa = Vpa.from_state_setup(state_setup, init_state_id="q0", input_alphabet=input_alphabet) - return vpa - - -def vpa_for_L16(): - # just a testing language - - call_set = ['a'] - return_set = ['b'] - internal_set = [] - - input_alphabet = VpaAlphabet(internal_alphabet=internal_set, call_alphabet=call_set, return_alphabet=return_set) - - state_setup = { - "q0": (False, {"a": [("q1", 'push', "$")]}), - "q1": (False, {"a": [("q1", 'push', "x")], - "b": [("q1", 'pop', "x"), ("q2", 'pop', "$")], - }), - "q2": (True, {}) - } - vpa = Vpa.from_state_setup(state_setup, init_state_id="q0", input_alphabet=input_alphabet) - return vpa diff --git a/aalpy/utils/FileHandler.py b/aalpy/utils/FileHandler.py index dd052abd..803bc62f 100644 --- a/aalpy/utils/FileHandler.py +++ b/aalpy/utils/FileHandler.py @@ -6,11 +6,11 @@ from pydot import Dot, Node, Edge, graph_from_dot_file from aalpy.automata import Dfa, MooreMachine, Mdp, Onfsm, MealyState, DfaState, MooreState, MealyMachine, \ - MdpState, StochasticMealyMachine, StochasticMealyState, OnfsmState, MarkovChain, McState, Vpa, Sevpa + MdpState, StochasticMealyMachine, StochasticMealyState, OnfsmState, MarkovChain, McState, Sevpa, SevpaState file_types = ['dot', 'png', 'svg', 'pdf', 'string'] automaton_types = {Dfa: 'dfa', MealyMachine: 'mealy', MooreMachine: 'moore', Mdp: 'mdp', - StochasticMealyMachine: 'smm', Onfsm: 'onfsm', MarkovChain: 'mc', Vpa: 'vpa', Sevpa: 'sevpa'} + StochasticMealyMachine: 'smm', Onfsm: 'onfsm', MarkovChain: 'mc', Sevpa: 'vpa'} def _wrap_label(label): @@ -44,10 +44,6 @@ def _get_node(state, automaton_type): if state.is_accepting: return Node(state.state_id, label=_wrap_label(state.state_id), shape='doublecircle') return Node(state.state_id, label=_wrap_label(state.state_id)) - if automaton_type == 'sevpa': - if state.is_accepting: - return Node(state.state_id, label=_wrap_label(state.state_id), shape='doublecircle') - return Node(state.state_id, label=_wrap_label(state.state_id)) def _add_transition_to_graph(graph, state, automaton_type, display_same_state_trans, round_floats): @@ -91,20 +87,6 @@ def _add_transition_to_graph(graph, state, automaton_type, display_same_state_tr prob = round(s[2], round_floats) if round_floats else s[2] graph.add_edge(Edge(state.state_id, s[0].state_id, label=_wrap_label(f'{i}/{s[1]}:{prob}'))) if automaton_type == 'vpa': - for i in state.transitions.keys(): - transitions_list = state.transitions[i] - for transition in transitions_list: - if transition.action == 'push': - edge = Edge(transition.start.state_id, transition.target.state_id, label=_wrap_label(f'{transition.symbol} | push({transition.stack_guard})')) - elif transition.action == 'pop': - edge = Edge(transition.start.state_id, transition.target.state_id, label=_wrap_label(f'{transition.symbol} | pop({transition.stack_guard})')) - else: - edge = Edge(transition.start.state_id, transition.target.state_id, label=_wrap_label(f'{transition.symbol}')) - - if transition.target == Vpa.error_state: - edge.set_style('dashed') - graph.add_edge(edge) - if automaton_type == 'sevpa': for i in state.transitions.keys(): transitions_list = state.transitions[i] for transition in transitions_list: @@ -184,8 +166,6 @@ def save_automaton_to_file(automaton, path="LearnedModel", file_type="dot", for state in automaton.states: if automaton_type == 'pda' and state.state_id == 'ErrorSinkState': continue - # elif automaton_type == 'vpa' and state.state_id == 'ErrorSinkState': - # continue graph.add_node(_get_node(state, automaton_type)) for state in automaton.states: @@ -242,6 +222,8 @@ def _process_label(label, source, destination, automaton_type): inp = int(inp) if inp.isdigit() else inp out = int(out) if out.isdigit() else out source.transitions[inp].append((destination, out, float(prob))) + if automaton_type == 'vpa': + pass def _process_node_label(node, label, node_label_dict, node_type, automaton_type): @@ -298,7 +280,7 @@ def load_automaton_from_file(path, automaton_type, compute_prefixes=False): id_node_aut_map = {'dfa': (DfaState, Dfa), 'mealy': (MealyState, MealyMachine), 'moore': (MooreState, MooreMachine), 'onfsm': (OnfsmState, Onfsm), 'mdp': (MdpState, Mdp), 'mc': (McState, MarkovChain), - 'smm': (StochasticMealyState, StochasticMealyMachine)} + 'smm': (StochasticMealyState, StochasticMealyMachine), 'vpa': (SevpaState, Sevpa)} nodeType, aut_type = id_node_aut_map[automaton_type] diff --git a/det_cex_processing_test.py b/det_cex_processing_test.py deleted file mode 100644 index 432bf602..00000000 --- a/det_cex_processing_test.py +++ /dev/null @@ -1,25 +0,0 @@ -from aalpy.utils import generate_random_deterministic_automata, bisimilar -from aalpy.SULs import MealySUL -from aalpy.oracles import RandomWMethodEqOracle -from aalpy.learning_algs import run_KV, run_Lstar - -for x in ['rs', 'linear_fwd', 'linear_bwd', 'exponential_fwd', 'exponential_bwd']: - for at in ['moore', 'dfa', 'mealy']: - for i in range(50): - print(x, at, i) - model_type = at # or 'moore', 'dfa' - - # for random dfa's you can also define num_accepting_states - random_model = generate_random_deterministic_automata(automaton_type=model_type, num_states=75, - input_alphabet_size=4, output_alphabet_size=5) - - sul = MealySUL(random_model) - input_alphabet = random_model.get_input_alphabet() - - # select any of the oracles - eq_oracle = RandomWMethodEqOracle(input_alphabet, sul, walks_per_state=10, walk_len=15) - - learned_model = run_Lstar(input_alphabet, sul, eq_oracle, model_type, cex_processing=x, print_level=0) - if not bisimilar(random_model, learned_model): - print(x, at) - print(bisimilar(random_model, learned_model, return_cex=True)) diff --git a/sevpa_eq_checks.py b/sevpa_eq_checks.py deleted file mode 100644 index 59473d0d..00000000 --- a/sevpa_eq_checks.py +++ /dev/null @@ -1,79 +0,0 @@ -from aalpy.SULs.AutomataSUL import VpaSUL, SevpaSUL -import random -import aalpy.utils.BenchmarkSevpaModels as SEVPAs -import aalpy.utils.BenchmarkVpaModels as VPAs - -amount_languages = 15 - -missing_languages = {6} - -sevpa_suls = [] -vpa_suls = [] -alphabets = [] - -for l in range(1, amount_languages+1): - if l in missing_languages: - sevpa_suls.append(None) - vpa_suls.append(None) - alphabets.append(None) - continue - - language_sevpa = f'sevpa_for_L{l}' - language_vpa = f'vpa_for_L{l}' - - # Get SEVPAs - if hasattr(SEVPAs, language_sevpa): - sevpa = getattr(SEVPAs, language_sevpa)() - else: - print(f"Function {language_sevpa} not found") - continue - sevpa_input_alphabet = sevpa.get_input_alphabet() - sevpa_sul = SevpaSUL(sevpa, include_top=False, check_balance=False) - sevpa_suls.append(sevpa_sul) - alphabets.append(sevpa_input_alphabet) - - # Get VPA - if hasattr(VPAs, language_vpa): - vpa = getattr(VPAs, language_vpa)() - else: - print(f"Function {language_vpa} not found") - continue - vpa_input_alphabet = vpa.input_alphabet.get_merged_alphabet - vpa_sul = VpaSUL(vpa, include_top=False, check_balance=False) - vpa_suls.append(vpa_sul) - -for l in range(0, amount_languages): - language_index = l+1 - print(f'Checking Language L{language_index}') - if language_index in missing_languages: - print(f'Skipping L{language_index}') - continue - tests_passed = True - for i in range(0, 100000): - word_length = random.randint(1, 100) - word = [] - for j in range(0, word_length): - word.append(random.choice(alphabets[l])) - - pda_out = sevpa_suls[l].query(tuple(word)) - vpa_out = vpa_suls[l].query(tuple(word)) - - if pda_out == vpa_out: - continue - else: - print(f'Language L{language_index} failed on following test:') - print(f'Input: {word}') - print(f'Pda out: {pda_out} \nVpa out: {vpa_out}') - tests_passed = False - break - - if tests_passed: - print(f'Language L{language_index} passed') - else: - print(f'Language L{language_index} failed') - - - - - - diff --git a/test_main.py b/test_main.py deleted file mode 100644 index ae4521bc..00000000 --- a/test_main.py +++ /dev/null @@ -1,237 +0,0 @@ -import ast -import random - -from Examples import learning_context_free_grammar_example -from aalpy.SULs.AutomataSUL import SevpaSUL, VpaSUL -from aalpy.base import SUL -from aalpy.learning_algs import run_KV -from aalpy.oracles import RandomWordEqOracle, RandomWalkEqOracle, StatePrefixEqOracle -from aalpy.utils import visualize_automaton, get_Angluin_dfa, generate_random_sevpa -from aalpy.utils.BenchmarkVpaModels import * -from aalpy.utils.BenchmarkSevpaModels import * -from random import seed - - -# learning_context_free_grammar_example() - -# TODOs -# 1. exponential cex processing in CounterExampleProcessing.py -# 2. Create a SEVPA function that generates random positive strings - model.generate_random_positive_string() -# 2. Add all 15 langs as SVEPA -# 4. Implement and test to_state_setup, test saving and loading to/from file -# 5. Create an active interface to learn a grammar of some language, like simplified C or Java - -# Thesis -# 1. Intro -# 2. Preliminaries (very important) -# 2.1 CFG, context pairs, well matched words -# 2.2 What are SEVPA and why we use those instead of VPAs -# 2.3 Example SEVPA and how to read/interpret it (Important on a small example) -# 2.4 Automata Learning and KV -# ... -# 3. KV for CFG inference (intuition behind everything and how it fits with preliminaries) -# 3.1 Explain alg in detail, like Maxi -# 3.2 Explain CEX processing/transform access string, also on example and intuition -# 3.3 Important: Run of the algorithm, visualize classification tree... -# 4. Evaluation -# - number of steps/queries for models of growing alphabet, state size, ...] -# - on 15 languages -# - on random languages -# - on something cool - -def test_arithmetic_expression(): - import warnings - warnings.filterwarnings("ignore") - - class ArithmeticSUL(SUL): - def __init__(self): - super().__init__() - self.string_under_test = '' - - def pre(self): - self.string_under_test = '' - - def post(self): - pass - - def step(self, letter): - if letter: - self.string_under_test += ' ' + letter - - try: - eval(self.string_under_test) - return True - except (SyntaxError, TypeError): - return False - - sul = ArithmeticSUL() - alphabet = SevpaAlphabet(internal_alphabet=['1', '+'], call_alphabet=['('], return_alphabet=[')']) - eq_oracle = RandomWordEqOracle(alphabet.get_merged_alphabet(), sul, min_walk_len=5, - max_walk_len=20, num_walks=20000) - - learned_model = run_KV(alphabet, sul, eq_oracle, automaton_type='vpa') - learned_model.visualize() - - -def test_on_random_svepa(): - random_svepa = generate_random_sevpa(num_states=50, internal_alphabet_size=3, - call_alphabet_size=3, - return_alphabet_size=3, - acceptance_prob=0.4, - return_transition_prob=0.5) - - alphabet = random_svepa.input_alphabet - - sul = SevpaSUL(random_svepa) - - eq_oracle = RandomWordEqOracle(alphabet=alphabet.get_merged_alphabet(), sul=sul, num_walks=10000, - min_walk_len=10, max_walk_len=30) - # model = run_KV_vpda(alphabet=alphabet, sul=sul, eq_oracle=eq_oracle, print_level=3,) - model = run_KV(alphabet=alphabet, sul=sul, eq_oracle=eq_oracle, automaton_type='vpa', - print_level=2, cex_processing='rs') - - -def test_random_word_gen(): - model_under_learning = vpa_for_L11() - - # Learn Model - alphabet = SevpaAlphabet(list(model_under_learning.internal_set), - list(model_under_learning.call_set), - list(model_under_learning.return_set)) - - sul = VpaSUL(model_under_learning) - - eq_oracle = RandomWordEqOracle(alphabet=alphabet.get_merged_alphabet(), sul=sul, num_walks=100000) - model = run_KV(alphabet=alphabet, sul=sul, eq_oracle=eq_oracle, automaton_type='vpa', - print_level=2, cex_processing='exponential_fwd') - - sul_model = SevpaSUL(model) - - # Test SEVPA random word gen - random_word_list = model.gen_random_accepting_word_bfs(min_word_length=3, amount_words=10) - for random_word in random_word_list: - out_model = sul_model.query(random_word)[-1] - out_sul = sul.query(random_word)[-1] - assert out_model == out_sul and out_model - - print(f'All tests passed for gen_random_accepting_word_bfs') - - total_len = 0 - for i in range(0, 100): - random_word = model.gen_random_accepting_word() - total_len += len(random_word) - out_model = sul_model.query(random_word)[-1] - out_sul = sul.query(random_word)[-1] - assert out_model == out_sul and out_model - - print(f'All tests passed average word length: {total_len / 100}') - - # Test VPA random word gen - total_len = 0 - for i in range(0, 100): - random_word = model_under_learning.gen_random_accepting_word(return_letter_prob=0.5) - total_len += len(random_word) - out_model = sul_model.query(random_word)[-1] - out_sul = sul.query(random_word)[-1] - assert out_model == out_sul and out_model - - print(f'All tests passed average word length: {total_len / 100}') - - total_len = 0 - for i in range(0, 100): - random_word = model_under_learning.gen_random_accepting_word(return_letter_prob=0.5, early_finish=False) - total_len += len(random_word) - out_model = sul_model.query(random_word)[-1] - out_sul = sul.query(random_word)[-1] - assert out_model == out_sul and out_model - - print(f'All tests passed average word length: {total_len / 100}') - - total_len = 0 - for i in range(0, 100): - random_word = model_under_learning.gen_random_accepting_word(early_finish=False) - total_len += len(random_word) - out_model = sul_model.query(random_word)[-1] - out_sul = sul.query(random_word)[-1] - assert out_model == out_sul and out_model - - print(f'All tests passed average word length: {total_len / 100}') - - -def test_cex_processing_strategies_vpa(): - cex_processing_strategies = ['rs', 'linear_fwd', 'linear_bwd', 'exponential_fwd', 'exponential_bwd', ] - - for i, vpa in enumerate( - [vpa_for_L1(), vpa_for_L2(), vpa_for_L3(), vpa_for_L4(), vpa_for_L5(), vpa_for_L7(), vpa_for_L8(), - vpa_for_L9(), vpa_for_L10(), vpa_for_L11(), vpa_for_L12(), vpa_for_L13(), vpa_for_L14(), vpa_for_L15()]): - - print(f'VPA {i + 1 if i < 6 else i + 2}') - - model_under_learning = vpa - - alphabet = SevpaAlphabet(list(model_under_learning.internal_set), - list(model_under_learning.call_set), - list(model_under_learning.return_set)) - - for cex_processing in cex_processing_strategies: - sul = VpaSUL(model_under_learning) - eq_oracle = RandomWordEqOracle(alphabet=alphabet.get_merged_alphabet(), sul=sul, num_walks=20000) - model = run_KV(alphabet=alphabet, sul=sul, eq_oracle=eq_oracle, automaton_type='vpa', - print_level=1, cex_processing=cex_processing) - - sul_learned_model = SevpaSUL(model) - - print(f'Checking {cex_processing}') - for i in range(0, 10000): - word_length = random.randint(1, 100) - word = [] - for j in range(0, word_length): - word.append(random.choice(alphabet.get_merged_alphabet())) - - vpa_out = sul.query(tuple(word)) - learned_model_out = sul_learned_model.query(tuple(word)) - - if vpa_out == learned_model_out: - continue - else: - print(f'{cex_processing} failed on following test:') - print(f'Input: {word}') - print(f'Vpa out: {vpa_out} \nLearned vpa out: {learned_model_out}') - assert False - -# test_cex_processing_strategies_vpa() -# test_arithmetic_expression() -# test_on_random_svepa() -# import cProfile -# pr = cProfile.Profile() -# pr.enable() -# test_on_random_svepa() -# pr.disable() -# pr.print_stats(sort='tottime') -# exit() - - -for i, vpa in enumerate( - [vpa_for_L1(), vpa_for_L2(), vpa_for_L3(), vpa_for_L4(), vpa_for_L5(), vpa_for_L7(), vpa_for_L8(), - vpa_for_L9(), vpa_for_L10(), vpa_for_L11(), vpa_for_L12(), vpa_for_L13(), vpa_for_L14(), vpa_for_L15()]): - - print(f'VPA {i + 1 if i < 6 else i + 2}') - # 16 works - for s in range(10): - print(s) - seed(s) - model_under_learning = vpa - - alphabet = SevpaAlphabet(list(model_under_learning.internal_set), - list(model_under_learning.call_set), - list(model_under_learning.return_set)) - - # if i == 9: - # alphabet.exclusive_call_return_pairs = {'(': ')', '[': ']', '{': '}', '<': '>'} - - sul = VpaSUL(model_under_learning) - - eq_oracle = RandomWordEqOracle(alphabet=alphabet.get_merged_alphabet(), sul=sul, num_walks=10000) - # model = run_KV_vpda(alphabet=alphabet, sul=sul, eq_oracle=eq_oracle, print_level=3,) - model = run_KV(alphabet=alphabet, sul=sul, eq_oracle=eq_oracle, automaton_type='vpa', - print_level=2, cex_processing='exponential_fwd') diff --git a/vpa_eq_checks.py b/vpa_eq_checks.py deleted file mode 100644 index b032ea04..00000000 --- a/vpa_eq_checks.py +++ /dev/null @@ -1,79 +0,0 @@ -from aalpy.SULs.AutomataSUL import VpaSUL, PdaSUL -import random -import aalpy.utils.BenchmarkPdaModels as PDAs -import aalpy.utils.BenchmarkVpaModels as VPAs - -amount_languages = 15 - -missing_languages = {6} - -pda_suls = [] -vpa_suls = [] -alphabets = [] - -for l in range(1, amount_languages+1): - if l in missing_languages: - pda_suls.append(None) - vpa_suls.append(None) - alphabets.append(None) - continue - language_pda = f'pda_for_L{l}' - language_vpa = f'vpa_for_L{l}' - - # Get PDAs - if hasattr(PDAs, language_pda): - pda = getattr(PDAs, language_pda)() - else: - print(f"Function {language_pda} not found") - continue - pda_input_alphabet = pda.get_input_alphabet() - pda_sul = PdaSUL(pda, include_top=True, check_balance=True) - pda_suls.append(pda_sul) - alphabets.append(pda_input_alphabet) - - # Get VPA - if hasattr(VPAs, language_vpa): - vpa = getattr(VPAs, language_vpa)() - else: - print(f"Function {language_vpa} not found") - continue - vpa_input_alphabet = vpa.get_input_alphabet() - merged_input_alphabet = vpa.get_input_alphabet_merged() - vpa_sul = VpaSUL(vpa, include_top=True, check_balance=True) - vpa_suls.append(vpa_sul) - -for l in range(0, amount_languages): - language_index = l+1 - print(f'Checking Language L{language_index}') - if language_index in missing_languages: - print(f'Skipping L{language_index}') - continue - tests_passed = True - for i in range(0, 50000): - word_length = random.randint(1, 100) - word = [] - for j in range(0, word_length): - word.append(random.choice(alphabets[l])) - - pda_out = pda_suls[l].query(tuple(word)) - vpa_out = vpa_suls[l].query(tuple(word)) - - if pda_out == vpa_out: - continue - else: - print(f'Language L{language_index} failed on following test:') - print(f'Input: {word}') - print(f'Pda out: {pda_out} \nVpa out: {vpa_out}') - tests_passed = False - break - - if tests_passed: - print(f'Language L{language_index} passed') - else: - print(f'Language L{language_index} failed') - - - - - - From 7bbf4c36f4422f2b017fc425776128b6ba6cbae2 Mon Sep 17 00:00:00 2001 From: Edi Muskardin Date: Tue, 12 Dec 2023 17:19:52 +0100 Subject: [PATCH 56/62] WIP on SEVPA refactoring and loading --- aalpy/automata/Sevpa.py | 40 ++-- .../deterministic/ClassificationTree.py | 6 +- aalpy/utils/AutomatonGenerators.py | 10 +- aalpy/utils/BenchmarkSevpaModels.py | 178 +++--------------- aalpy/utils/FileHandler.py | 22 ++- test_main.py | 170 +++++++++++++++++ 6 files changed, 239 insertions(+), 187 deletions(-) create mode 100644 test_main.py diff --git a/aalpy/automata/Sevpa.py b/aalpy/automata/Sevpa.py index 11e34758..22d1c958 100644 --- a/aalpy/automata/Sevpa.py +++ b/aalpy/automata/Sevpa.py @@ -69,8 +69,7 @@ class SevpaTransition: action: The action performed during the transition (pop | None). stack_guard: The stack symbol to be pushed/popped. """ - def __init__(self, start: SevpaState, target: SevpaState, symbol, action, stack_guard=None): - self.start = start + def __init__(self, target: SevpaState, symbol, action, stack_guard=None): self.target = target self.symbol = symbol self.action = action @@ -81,7 +80,7 @@ def __str__(self): Returns: str: A string representation of the transition. """ - return f"{self.symbol}: {self.start.state_id} --> {self.target.state_id} | {self.action}: {self.stack_guard}" + return f"{self.symbol} --> {self.target.state_id} | {self.action}: {self.stack_guard}" class Sevpa(Automaton): @@ -195,7 +194,6 @@ def to_state_setup(self): def from_state_setup(state_setup: dict, **kwargs): init_state_id = kwargs['init_state_id'] - input_alphabet = kwargs['input_alphabet'] # build states with state_id and output states = {key: SevpaState(key, val[0]) for key, val in state_setup.items()} @@ -206,13 +204,11 @@ def from_state_setup(state_setup: dict, **kwargs): for _input, trans_spec in state_setup[state_id][1].items(): for (target_state_id, action, stack_guard) in trans_spec: if action == 'pop': - assert stack_guard[0] in states - assert stack_guard[1] in input_alphabet.call_alphabet stack_guard = (stack_guard[0], stack_guard[1]) - trans = SevpaTransition(start=state, target=states[target_state_id], symbol=_input, + trans = SevpaTransition(target=states[target_state_id], symbol=_input, action=action, stack_guard=stack_guard) elif action is None: - trans = SevpaTransition(start=state, target=states[target_state_id], symbol=_input, + trans = SevpaTransition(target=states[target_state_id], symbol=_input, action=None, stack_guard=None) else: assert False, 'Action must either be "pop" or None, note that there are no push actions ' \ @@ -221,7 +217,7 @@ def from_state_setup(state_setup: dict, **kwargs): state.transitions[_input].append(trans) init_state = states[init_state_id] - return Sevpa(init_state, [state for state in states.values()], input_alphabet) + return Sevpa(init_state, [state for state in states.values()]) def transform_access_string(self, state=None, stack_content=None) -> List[str]: """ @@ -273,31 +269,31 @@ def create_daisy_hypothesis(initial_state, alphabet): Sevpa: The created 1-SEVPA with the specified initial state and alphabet. """ for i in alphabet.internal_alphabet: - trans = SevpaTransition(start=initial_state, target=initial_state, symbol=i, action=None) + trans = SevpaTransition(target=initial_state, symbol=i, action=None) initial_state.transitions[i].append(trans) for c in alphabet.call_alphabet: for r in alphabet.return_alphabet: - trans = SevpaTransition(start=initial_state, target=initial_state, symbol=r, action='pop', + trans = SevpaTransition(target=initial_state, symbol=r, action='pop', stack_guard=(initial_state.state_id, c)) initial_state.transitions[r].append(trans) - return Sevpa(initial_state, [initial_state], alphabet) + return Sevpa(initial_state, [initial_state]) def get_input_alphabet(self): - if self.input_alphabet: - return self.input_alphabet internal, ret, call = [], [], [] for state in self.states: - for transition in state.transitions: - if transition.action == 'pop': - if transition.symbol not in ret: - ret.append(transition.symbol) - if transition.stack_guard[1] not in call: - call.append(transition.stack_guard[1]) - else: - internal.append(transition.symbol) + for transition_list in state.transitions.values(): + for transition in transition_list: + if transition.action == 'pop': + if transition.symbol not in ret: + ret.append(transition.symbol) + if transition.stack_guard[1] not in call: + call.append(transition.stack_guard[1]) + else: + if transition.symbol not in internal: + internal.append(transition.symbol) return SevpaAlphabet(internal, call, ret) diff --git a/aalpy/learning_algs/deterministic/ClassificationTree.py b/aalpy/learning_algs/deterministic/ClassificationTree.py index 0418cfcb..133b278a 100644 --- a/aalpy/learning_algs/deterministic/ClassificationTree.py +++ b/aalpy/learning_algs/deterministic/ClassificationTree.py @@ -214,7 +214,7 @@ def gen_hypothesis(self): transition_target_access_string = transition_target_node.access_string assert transition_target_access_string in states - trans = SevpaTransition(start=state, target=states[transition_target_access_string], + trans = SevpaTransition(target=states[transition_target_access_string], symbol=internal_letter, action=None) state.transitions[internal_letter].append(trans) @@ -235,13 +235,13 @@ def gen_hypothesis(self): other_state.prefix + (call_letter,) + state.prefix + (return_letter,)) transition_target_access_string = transition_target_node.access_string - trans = SevpaTransition(start=state, target=states[transition_target_access_string], + trans = SevpaTransition(target=states[transition_target_access_string], symbol=return_letter, action='pop', stack_guard=(other_state.state_id, call_letter)) state.transitions[return_letter].append(trans) if self.automaton_type == 'vpa': - hypothesis = Sevpa(initial_state=initial_state, states=list(states.values()), input_alphabet=self.alphabet) + hypothesis = Sevpa(initial_state=initial_state, states=list(states.values())) if not self.error_state_prefix: error_state = hypothesis.get_error_state() if error_state: diff --git a/aalpy/utils/AutomatonGenerators.py b/aalpy/utils/AutomatonGenerators.py index 9dedc49b..6c6b6389 100644 --- a/aalpy/utils/AutomatonGenerators.py +++ b/aalpy/utils/AutomatonGenerators.py @@ -492,7 +492,7 @@ def generate_random_sevpa(num_states, internal_alphabet_size, call_alphabet_size target_state = random.choice(states) state.transitions[return_letter].append( - SevpaTransition(state, target_state, return_letter, 'pop', stack_guard)) + SevpaTransition(target_state, return_letter, 'pop', stack_guard)) else: # add an internal transition while True: @@ -504,7 +504,7 @@ def generate_random_sevpa(num_states, internal_alphabet_size, call_alphabet_size if target_state in state_buffer: state_buffer.remove(target_state) state.transitions[internal_letter].append( - SevpaTransition(state, target_state, internal_letter, None, None)) + SevpaTransition(target_state, internal_letter, None, None)) assert len(states) == num_states initial_state = random.choice(states) @@ -514,7 +514,7 @@ def generate_random_sevpa(num_states, internal_alphabet_size, call_alphabet_size if state.transitions[internal_letter] is None: target_state = random.choice(states) state.transitions[internal_letter].append( - SevpaTransition(state, target_state, internal_letter, None, None)) + SevpaTransition(target_state, internal_letter, None, None)) for call_letter in call_alphabet: for stack_state in states: @@ -523,6 +523,6 @@ def generate_random_sevpa(num_states, internal_alphabet_size, call_alphabet_size if not _has_transition(state, return_letter, stack_guard): target_state = states[random.randint(0, len(states) - 1)] state.transitions[return_letter].append( - SevpaTransition(state, target_state, return_letter, 'pop', stack_guard)) + SevpaTransition(target_state, return_letter, 'pop', stack_guard)) - return Sevpa(initial_state, states, sevpa_alphabet) + return Sevpa(initial_state, states) diff --git a/aalpy/utils/BenchmarkSevpaModels.py b/aalpy/utils/BenchmarkSevpaModels.py index 5ac2a42a..99d2e248 100644 --- a/aalpy/utils/BenchmarkSevpaModels.py +++ b/aalpy/utils/BenchmarkSevpaModels.py @@ -3,15 +3,6 @@ def sevpa_for_L1(): - call_set = ['a'] - return_set = ['b'] - internal_set = [] - - input_alphabet = SevpaAlphabet( - internal_alphabet=internal_set, - call_alphabet=call_set, - return_alphabet=return_set - ) state_setup = { 'q0': (False, {'b': [('q1', 'pop', ('q0', 'a'))] @@ -19,20 +10,10 @@ def sevpa_for_L1(): 'q1': (True, {'b': [('q1', 'pop', ('q0', 'a'))] }) } - sevpa = Sevpa.from_state_setup(state_setup, init_state_id="q0", input_alphabet=input_alphabet) - return sevpa + return Sevpa.from_state_setup(state_setup, init_state_id="q0") def sevpa_for_L2(): - call_set = ['a', 'b'] - return_set = ['c', 'd'] - internal_set = [] - - input_alphabet = SevpaAlphabet( - internal_alphabet=internal_set, - call_alphabet=call_set, - return_alphabet=return_set - ) state_setup = { 'q0': (False, {'d': [('q1', 'pop', ('q0', 'a')), ('q1', 'pop', ('q0', 'b'))], @@ -42,20 +23,11 @@ def sevpa_for_L2(): 'c': [('q1', 'pop', ('q0', 'a')), ('q1', 'pop', ('q0', 'b'))] }) } - sevpa = Sevpa.from_state_setup(state_setup, init_state_id="q0", input_alphabet=input_alphabet) - return sevpa + return Sevpa.from_state_setup(state_setup, init_state_id="q0") -def sevpa_for_L3(): - call_set = ['a', 'c', 'b', 'd'] - return_set = ['e', 'g', 'f', 'h'] - internal_set = [] - input_alphabet = SevpaAlphabet( - internal_alphabet=internal_set, - call_alphabet=call_set, - return_alphabet=return_set - ) +def sevpa_for_L3(): state_setup = { 'q0': (False, {'g': [('q6', 'pop', ('q0', 'd')), @@ -77,20 +49,11 @@ def sevpa_for_L3(): 'q6': (False, {'h': [('q1', 'pop', ('q0', 'c'))] }) } - sevpa = Sevpa.from_state_setup(state_setup, init_state_id="q0", input_alphabet=input_alphabet) - return sevpa + return Sevpa.from_state_setup(state_setup, init_state_id="q0") -def sevpa_for_L4(): - call_set = ['a', 'b'] - return_set = ['c', 'd'] - internal_set = [] - input_alphabet = SevpaAlphabet( - internal_alphabet=internal_set, - call_alphabet=call_set, - return_alphabet=return_set - ) +def sevpa_for_L4(): state_setup = { 'q0': (False, {'c': [('q2', 'pop', ('q0', 'b'))] @@ -100,20 +63,10 @@ def sevpa_for_L4(): 'q2': (False, {'d': [('q1', 'pop', ('q0', 'a'))] }) } - sevpa = Sevpa.from_state_setup(state_setup, init_state_id="q0", input_alphabet=input_alphabet) - return sevpa + return Sevpa.from_state_setup(state_setup, init_state_id="q0") def sevpa_for_L5(): - call_set = ['a', 'b', 'c'] - return_set = ['d', 'e', 'f'] - internal_set = [] - - input_alphabet = SevpaAlphabet( - internal_alphabet=internal_set, - call_alphabet=call_set, - return_alphabet=return_set - ) state_setup = { 'q0': (False, {'d': [('q2', 'pop', ('q0', 'c'))] @@ -125,20 +78,11 @@ def sevpa_for_L5(): 'q3': (False, {'f': [('q1', 'pop', ('q0', 'a'))] }) } - sevpa = Sevpa.from_state_setup(state_setup, init_state_id="q0", input_alphabet=input_alphabet) - return sevpa + return Sevpa.from_state_setup(state_setup, init_state_id="q0") -def sevpa_for_L7(): - call_set = ['(', '['] - return_set = [')', ']'] - internal_set = [] - input_alphabet = SevpaAlphabet( - internal_alphabet=internal_set, - call_alphabet=call_set, - return_alphabet=return_set - ) +def sevpa_for_L7(): state_setup = { 'q0': (False, {')': [('q1', 'pop', ('q0', '(')), @@ -153,20 +97,12 @@ def sevpa_for_L7(): }) } - sevpa = Sevpa.from_state_setup(state_setup, init_state_id="q0", input_alphabet=input_alphabet) - return sevpa + return Sevpa.from_state_setup(state_setup, init_state_id="q0") -def sevpa_for_L8(): - call_set = ['(', '[', '{'] - return_set = [')', ']', '}'] - internal_set = [] - input_alphabet = SevpaAlphabet( - internal_alphabet=internal_set, - call_alphabet=call_set, - return_alphabet=return_set - ) + +def sevpa_for_L8(): state_setup = { 'q0': (False, {')': [('q1', 'pop', ('q0', '(')), @@ -184,20 +120,10 @@ def sevpa_for_L8(): ('q1', 'pop', ('q1', '['))] }) } - sevpa = Sevpa.from_state_setup(state_setup, init_state_id="q0", input_alphabet=input_alphabet) - return sevpa + return Sevpa.from_state_setup(state_setup, init_state_id="q0") def sevpa_for_L9(): - call_set = ['(', '[', '{', '<'] - return_set = [')', ']', '}', '>'] - internal_set = [] - - input_alphabet = SevpaAlphabet( - internal_alphabet=internal_set, - call_alphabet=call_set, - return_alphabet=return_set - ) state_setup = { 'q0': (False, {']': [('q1', 'pop', ('q0', '[')), @@ -220,20 +146,10 @@ def sevpa_for_L9(): }) } - sevpa = Sevpa.from_state_setup(state_setup, init_state_id="q0", input_alphabet=input_alphabet) - return sevpa + return Sevpa.from_state_setup(state_setup, init_state_id="q0") def sevpa_for_L10(): - call_set = ['a'] - return_set = ['v'] - internal_set = ['b', 'c', 'd', 'e', 'w', 'x', 'y', 'z'] - - input_alphabet = SevpaAlphabet( - internal_alphabet=internal_set, - call_alphabet=call_set, - return_alphabet=return_set - ) state_setup = { "q0": (False, {"b": [("qb", None, None)], @@ -261,20 +177,11 @@ def sevpa_for_L10(): ("qv", 'pop', ('q2', 'a'))] }) } - sevpa = Sevpa.from_state_setup(state_setup, init_state_id="q0", input_alphabet=input_alphabet) - return sevpa + return Sevpa.from_state_setup(state_setup, init_state_id="q0") -def sevpa_for_L11(): - call_set = ['c1', 'c2'] - return_set = ['r1', 'r2'] - internal_set = ['i1', 'i2'] - input_alphabet = SevpaAlphabet( - internal_alphabet=internal_set, - call_alphabet=call_set, - return_alphabet=return_set - ) +def sevpa_for_L11(): state_setup = { 'q0': (False, {'i1': [('q2', None, None)], @@ -305,21 +212,10 @@ def sevpa_for_L11(): }) } - sevpa = Sevpa.from_state_setup(state_setup, init_state_id="q0", input_alphabet=input_alphabet) - return sevpa + return Sevpa.from_state_setup(state_setup, init_state_id="q0") def sevpa_for_L12(): - call_set = ['(', '['] - return_set = [')', ']'] - internal_set = [] - - input_alphabet = SevpaAlphabet( - internal_alphabet=internal_set, - call_alphabet=call_set, - return_alphabet=return_set - ) - state_setup = { 'q0': (False, {']': [('q1', 'pop', ('q0', '['))], ')': [('q1', 'pop', ('q0', '('))] @@ -328,20 +224,11 @@ def sevpa_for_L12(): ')': [('q1', 'pop', ('q0', '('))] }) } - sevpa = Sevpa.from_state_setup(state_setup, init_state_id="q0", input_alphabet=input_alphabet) - return sevpa + return Sevpa.from_state_setup(state_setup, init_state_id="q0",) -def sevpa_for_L13(): - call_set = ['('] - return_set = [')'] - internal_set = ['a', 'b', 'c'] - input_alphabet = SevpaAlphabet( - internal_alphabet=internal_set, - call_alphabet=call_set, - return_alphabet=return_set - ) +def sevpa_for_L13(): state_setup = { 'q0': (False, {'c': [('q1', None, None)], @@ -357,20 +244,11 @@ def sevpa_for_L13(): ('q1', 'pop', ('q1', '('))] }) } - sevpa = Sevpa.from_state_setup(state_setup, init_state_id="q0", input_alphabet=input_alphabet) - return sevpa + return Sevpa.from_state_setup(state_setup, init_state_id="q0") -def sevpa_for_L14(): - call_set = ['(', '['] - return_set = [')', ']'] - internal_set = ['a', 'b', 'c'] - input_alphabet = SevpaAlphabet( - internal_alphabet=internal_set, - call_alphabet=call_set, - return_alphabet=return_set - ) +def sevpa_for_L14(): state_setup = { 'q0': (False, {'a': [('q1', None, None)], @@ -390,19 +268,13 @@ def sevpa_for_L14(): ('q1', 'pop', ('q1', '('))] }) } - sevpa = Sevpa.from_state_setup(state_setup, init_state_id="q0", input_alphabet=input_alphabet) - return sevpa + + return Sevpa.from_state_setup(state_setup, init_state_id="q0") def sevpa_for_L15(): # Dyck order 1 - call_set = ['('] - return_set = [')'] - internal_set = ['a', 'b', 'c', 'd'] - - input_alphabet = SevpaAlphabet(internal_alphabet=internal_set, call_alphabet=call_set, return_alphabet=return_set) - state_setup = { 'q0': (False, {'d': [('q1', None, None)], 'a': [('q2', None, None)], @@ -419,14 +291,14 @@ def sevpa_for_L15(): 'q3': (False, {'c': [('q1', None, None)] }) } - sevpa = Sevpa.from_state_setup(state_setup, init_state_id="q0", input_alphabet=input_alphabet) - return sevpa + + return Sevpa.from_state_setup(state_setup, init_state_id="q0") if __name__ == '__main__': e = sevpa_for_L13() print(e) - + print(e.get_input_alphabet()) e.save('test') m = load_automaton_from_file('test.dot', automaton_type='vpa') m.visualize() diff --git a/aalpy/utils/FileHandler.py b/aalpy/utils/FileHandler.py index 803bc62f..992d3861 100644 --- a/aalpy/utils/FileHandler.py +++ b/aalpy/utils/FileHandler.py @@ -1,3 +1,4 @@ +import re import sys import sys import traceback @@ -6,7 +7,8 @@ from pydot import Dot, Node, Edge, graph_from_dot_file from aalpy.automata import Dfa, MooreMachine, Mdp, Onfsm, MealyState, DfaState, MooreState, MealyMachine, \ - MdpState, StochasticMealyMachine, StochasticMealyState, OnfsmState, MarkovChain, McState, Sevpa, SevpaState + MdpState, StochasticMealyMachine, StochasticMealyState, OnfsmState, MarkovChain, McState, Sevpa, SevpaState, \ + SevpaTransition file_types = ['dot', 'png', 'svg', 'pdf', 'string'] automaton_types = {Dfa: 'dfa', MealyMachine: 'mealy', MooreMachine: 'moore', Mdp: 'mdp', @@ -91,10 +93,10 @@ def _add_transition_to_graph(graph, state, automaton_type, display_same_state_tr transitions_list = state.transitions[i] for transition in transitions_list: if transition.action == 'pop': - edge = Edge(transition.start.state_id, transition.target.state_id, + edge = Edge(state.state_id, transition.target.state_id, label=_wrap_label(f'{transition.symbol} / {transition.stack_guard}')) elif transition.action is None: - edge = Edge(transition.start.state_id, transition.target.state_id, + edge = Edge(state.state_id, transition.target.state_id, label=_wrap_label(f'{transition.symbol}')) else: assert False @@ -223,6 +225,14 @@ def _process_label(label, source, destination, automaton_type): out = int(out) if out.isdigit() else out source.transitions[inp].append((destination, out, float(prob))) if automaton_type == 'vpa': + # TODO work with string representations in transitions + match = re.match(r"(\S+)\s*/\s*\(\s*'(\S+)'\s*,\s*'(\S+)'\s*\)", label) + if match: + print(match.group()) + else: + internal_transition = SevpaTransition(label, destination, None, None) + source.transitions[label].append(internal_transition) + print(internal_transition) pass @@ -302,7 +312,11 @@ def load_automaton_from_file(path, automaton_type, compute_prefixes=False): continue source = node_label_dict[edge.get_source()] - destination = node_label_dict[edge.get_destination()] + if automaton_type != 'vpa': + destination = node_label_dict[edge.get_destination()] + else: + # TODO + exit() label = edge.get_attributes()['label'] label = _strip_label(label) diff --git a/test_main.py b/test_main.py new file mode 100644 index 00000000..b1a9cd96 --- /dev/null +++ b/test_main.py @@ -0,0 +1,170 @@ +import ast +import random + +from Examples import learning_context_free_grammar_example +from aalpy.SULs.AutomataSUL import SevpaSUL +from aalpy.base import SUL +from aalpy.learning_algs import run_KV +from aalpy.oracles import RandomWordEqOracle, RandomWalkEqOracle, StatePrefixEqOracle +from aalpy.utils import visualize_automaton, get_Angluin_dfa, generate_random_sevpa +from aalpy.utils.BenchmarkSevpaModels import * +from random import seed + + +# learning_context_free_grammar_example() + +# TODOs +# 1. exponential cex processing in CounterExampleProcessing.py +# 2. Create a SEVPA function that generates random positive strings - model.generate_random_positive_string() +# 2. Add all 15 langs as SVEPA +# 4. Implement and test to_state_setup, test saving and loading to/from file +# 5. Create an active interface to learn a grammar of some language, like simplified C or Java + +# Thesis +# 1. Intro +# 2. Preliminaries (very important) +# 2.1 CFG, context pairs, well matched words +# 2.2 What are SEVPA and why we use those instead of VPAs +# 2.3 Example SEVPA and how to read/interpret it (Important on a small example) +# 2.4 Automata Learning and KV +# ... +# 3. KV for CFG inference (intuition behind everything and how it fits with preliminaries) +# 3.1 Explain alg in detail, like Maxi +# 3.2 Explain CEX processing/transform access string, also on example and intuition +# 3.3 Important: Run of the algorithm, visualize classification tree... +# 4. Evaluation +# - number of steps/queries for models of growing alphabet, state size, ...] +# - on 15 languages +# - on random languages +# - on something cool + +def test_arithmetic_expression(): + import warnings + warnings.filterwarnings("ignore") + + class ArithmeticSUL(SUL): + def __init__(self): + super().__init__() + self.string_under_test = '' + + def pre(self): + self.string_under_test = '' + + def post(self): + pass + + def step(self, letter): + if letter: + self.string_under_test += ' ' + letter + + try: + eval(self.string_under_test) + return True + except (SyntaxError, TypeError): + return False + + sul = ArithmeticSUL() + alphabet = SevpaAlphabet(internal_alphabet=['1', '+'], call_alphabet=['('], return_alphabet=[')']) + eq_oracle = RandomWordEqOracle(alphabet.get_merged_alphabet(), sul, min_walk_len=5, + max_walk_len=20, num_walks=20000) + + learned_model = run_KV(alphabet, sul, eq_oracle, automaton_type='vpa') + learned_model.visualize() + + +def test_on_random_svepa(): + random_svepa = generate_random_sevpa(num_states=50, internal_alphabet_size=3, + call_alphabet_size=3, + return_alphabet_size=3, + acceptance_prob=0.4, + return_transition_prob=0.5) + + alphabet = random_svepa.input_alphabet + + sul = SevpaSUL(random_svepa) + + eq_oracle = RandomWordEqOracle(alphabet=alphabet.get_merged_alphabet(), sul=sul, num_walks=10000, + min_walk_len=10, max_walk_len=30) + # model = run_KV_vpda(alphabet=alphabet, sul=sul, eq_oracle=eq_oracle, print_level=3,) + model = run_KV(alphabet=alphabet, sul=sul, eq_oracle=eq_oracle, automaton_type='vpa', + print_level=2, cex_processing='rs') + + +def test_cex_processing_strategies_vpa(): + cex_processing_strategies = ['rs', 'linear_fwd', 'linear_bwd', 'exponential_fwd', 'exponential_bwd', ] + + for i, vpa in enumerate( + [sevpa_for_L1(), sevpa_for_L2(), sevpa_for_L3(), sevpa_for_L4(), sevpa_for_L5(), sevpa_for_L7(), sevpa_for_L8(), + sevpa_for_L9(), sevpa_for_L10(), sevpa_for_L11(), sevpa_for_L12(), sevpa_for_L13(), sevpa_for_L14(), sevpa_for_L15()]): + + print(f'VPA {i + 1 if i < 6 else i + 2}') + + model_under_learning = vpa + + alphabet = SevpaAlphabet(list(model_under_learning.internal_set), + list(model_under_learning.call_set), + list(model_under_learning.return_set)) + + for cex_processing in cex_processing_strategies: + sul = SevpaSUL(model_under_learning) + eq_oracle = RandomWordEqOracle(alphabet=alphabet.get_merged_alphabet(), sul=sul, num_walks=20000) + model = run_KV(alphabet=alphabet, sul=sul, eq_oracle=eq_oracle, automaton_type='vpa', + print_level=1, cex_processing=cex_processing) + + sul_learned_model = SevpaSUL(model) + + print(f'Checking {cex_processing}') + for i in range(0, 10000): + word_length = random.randint(1, 100) + word = [] + for j in range(0, word_length): + word.append(random.choice(alphabet.get_merged_alphabet())) + + vpa_out = sul.query(tuple(word)) + learned_model_out = sul_learned_model.query(tuple(word)) + + if vpa_out == learned_model_out: + continue + else: + print(f'{cex_processing} failed on following test:') + print(f'Input: {word}') + print(f'Vpa out: {vpa_out} \nLearned vpa out: {learned_model_out}') + assert False + +# test_cex_processing_strategies_vpa() +# test_arithmetic_expression() +# test_on_random_svepa() +# import cProfile +# pr = cProfile.Profile() +# pr.enable() +# test_on_random_svepa() +# pr.disable() +# pr.print_stats(sort='tottime') +# exit() + + +for i, vpa in enumerate( + [sevpa_for_L1(), sevpa_for_L2(), sevpa_for_L3(), sevpa_for_L4(), sevpa_for_L5(), sevpa_for_L7(), sevpa_for_L8(), + sevpa_for_L9(), sevpa_for_L10(), sevpa_for_L11(), sevpa_for_L12(), sevpa_for_L13(), sevpa_for_L14(), + sevpa_for_L15()]): + + print(f'VPA {i + 1 if i < 6 else i + 2}') + # 16 works + for s in range(10): + print(s) + seed(s) + model_under_learning = vpa + + alphabet = SevpaAlphabet(list(model_under_learning.internal_set), + list(model_under_learning.call_set), + list(model_under_learning.return_set)) + + # if i == 9: + # alphabet.exclusive_call_return_pairs = {'(': ')', '[': ']', '{': '}', '<': '>'} + + sul = SevpaSUL(model_under_learning) + + eq_oracle = RandomWordEqOracle(alphabet=alphabet.get_merged_alphabet(), sul=sul, num_walks=10000) + # model = run_KV_vpda(alphabet=alphabet, sul=sul, eq_oracle=eq_oracle, print_level=3,) + model = run_KV(alphabet=alphabet, sul=sul, eq_oracle=eq_oracle, automaton_type='vpa', + print_level=2, cex_processing='exponential_fwd') From 74fa677b9df09cc85d0554cda0172045e4e9b6cf Mon Sep 17 00:00:00 2001 From: Edi Muskardin Date: Tue, 12 Dec 2023 17:23:36 +0100 Subject: [PATCH 57/62] WIP on SEVPA refactoring and loading --- aalpy/utils/FileHandler.py | 14 ++++++-------- test_main.py | 21 +++++++++++++++++++++ 2 files changed, 27 insertions(+), 8 deletions(-) diff --git a/aalpy/utils/FileHandler.py b/aalpy/utils/FileHandler.py index 992d3861..df7f3138 100644 --- a/aalpy/utils/FileHandler.py +++ b/aalpy/utils/FileHandler.py @@ -14,6 +14,7 @@ automaton_types = {Dfa: 'dfa', MealyMachine: 'mealy', MooreMachine: 'moore', Mdp: 'mdp', StochasticMealyMachine: 'smm', Onfsm: 'onfsm', MarkovChain: 'mc', Sevpa: 'vpa'} +sevpa_transition_regex = r"(\S+)\s*/\s*\(\s*'(\S+)'\s*,\s*'(\S+)'\s*\)" def _wrap_label(label): """ @@ -226,9 +227,10 @@ def _process_label(label, source, destination, automaton_type): source.transitions[inp].append((destination, out, float(prob))) if automaton_type == 'vpa': # TODO work with string representations in transitions - match = re.match(r"(\S+)\s*/\s*\(\s*'(\S+)'\s*,\s*'(\S+)'\s*\)", label) + match = re.match(sevpa_transition_regex, label) if match: - print(match.group()) + a,b,c, = re.match() + print(a,b,c) else: internal_transition = SevpaTransition(label, destination, None, None) source.transitions[label].append(internal_transition) @@ -312,12 +314,8 @@ def load_automaton_from_file(path, automaton_type, compute_prefixes=False): continue source = node_label_dict[edge.get_source()] - if automaton_type != 'vpa': - destination = node_label_dict[edge.get_destination()] - else: - # TODO - exit() - + destination = node_label_dict[edge.get_destination()] + #WIP label = edge.get_attributes()['label'] label = _strip_label(label) _process_label(label, source, destination, automaton_type) diff --git a/test_main.py b/test_main.py index b1a9cd96..f324efc5 100644 --- a/test_main.py +++ b/test_main.py @@ -143,6 +143,27 @@ def test_cex_processing_strategies_vpa(): # exit() +import re + +expression = "baadsf / ('q1', '(fasfas')" + +# Define the regex pattern +pattern = r"(\S+)\s*/\s*\(\s*'(\S+)'\s*,\s*'(\S+)'\s*\)" + +# Match the pattern in the expression +match = re.match(r"(\S+)\s*/\s*\(\s*'(\S+)'\s*,\s*'(\S+)'\s*\)", expression) + +# Extract groups if there is a match +if match: + a, b, c = match.groups() + print("a:", a) + print("b:", b) + print("c:", c) +else: + print("No match found.") + +exit() + for i, vpa in enumerate( [sevpa_for_L1(), sevpa_for_L2(), sevpa_for_L3(), sevpa_for_L4(), sevpa_for_L5(), sevpa_for_L7(), sevpa_for_L8(), sevpa_for_L9(), sevpa_for_L10(), sevpa_for_L11(), sevpa_for_L12(), sevpa_for_L13(), sevpa_for_L14(), From d8420271cae14ecf680562755fee2307229299f8 Mon Sep 17 00:00:00 2001 From: Edi Muskardin Date: Tue, 12 Dec 2023 23:23:20 +0100 Subject: [PATCH 58/62] add loading of sevpa --- aalpy/utils/BenchmarkSevpaModels.py | 4 ++-- aalpy/utils/FileHandler.py | 12 ++++++------ test_main.py | 21 --------------------- 3 files changed, 8 insertions(+), 29 deletions(-) diff --git a/aalpy/utils/BenchmarkSevpaModels.py b/aalpy/utils/BenchmarkSevpaModels.py index 99d2e248..fe9351ff 100644 --- a/aalpy/utils/BenchmarkSevpaModels.py +++ b/aalpy/utils/BenchmarkSevpaModels.py @@ -301,6 +301,6 @@ def sevpa_for_L15(): print(e.get_input_alphabet()) e.save('test') m = load_automaton_from_file('test.dot', automaton_type='vpa') - m.visualize() - + print('Loaded') + print(m) diff --git a/aalpy/utils/FileHandler.py b/aalpy/utils/FileHandler.py index df7f3138..034cfad1 100644 --- a/aalpy/utils/FileHandler.py +++ b/aalpy/utils/FileHandler.py @@ -229,12 +229,12 @@ def _process_label(label, source, destination, automaton_type): # TODO work with string representations in transitions match = re.match(sevpa_transition_regex, label) if match: - a,b,c, = re.match() - print(a,b,c) + ret, stack_guard, top_of_stack = match.groups() + return_transition = SevpaTransition(destination, ret, 'pop', (stack_guard, top_of_stack)) + source.transitions[label].append(return_transition) else: - internal_transition = SevpaTransition(label, destination, None, None) + internal_transition = SevpaTransition(destination, label, None, None) source.transitions[label].append(internal_transition) - print(internal_transition) pass @@ -327,9 +327,9 @@ def load_automaton_from_file(path, automaton_type, compute_prefixes=False): assert False automaton = aut_type(initial_node, list(node_label_dict.values())) - if automaton_type != 'mc' and not automaton.is_input_complete(): + if automaton_type not in {'mc', 'vpa'} and not automaton.is_input_complete(): print('Warning: Loaded automaton is not input complete.') - if compute_prefixes and not automaton_type == 'mc': + if compute_prefixes and not automaton_type not in {'mc', 'vpa'}: for state in automaton.states: state.prefix = automaton.get_shortest_path(automaton.initial_state, state) return automaton diff --git a/test_main.py b/test_main.py index f324efc5..b1a9cd96 100644 --- a/test_main.py +++ b/test_main.py @@ -143,27 +143,6 @@ def test_cex_processing_strategies_vpa(): # exit() -import re - -expression = "baadsf / ('q1', '(fasfas')" - -# Define the regex pattern -pattern = r"(\S+)\s*/\s*\(\s*'(\S+)'\s*,\s*'(\S+)'\s*\)" - -# Match the pattern in the expression -match = re.match(r"(\S+)\s*/\s*\(\s*'(\S+)'\s*,\s*'(\S+)'\s*\)", expression) - -# Extract groups if there is a match -if match: - a, b, c = match.groups() - print("a:", a) - print("b:", b) - print("c:", c) -else: - print("No match found.") - -exit() - for i, vpa in enumerate( [sevpa_for_L1(), sevpa_for_L2(), sevpa_for_L3(), sevpa_for_L4(), sevpa_for_L5(), sevpa_for_L7(), sevpa_for_L8(), sevpa_for_L9(), sevpa_for_L10(), sevpa_for_L11(), sevpa_for_L12(), sevpa_for_L13(), sevpa_for_L14(), From ac891b7b7ac9728379e90f67365a952609fc1f72 Mon Sep 17 00:00:00 2001 From: Edi Muskardin Date: Tue, 12 Dec 2023 23:23:48 +0100 Subject: [PATCH 59/62] add loading of sevpa --- aalpy/utils/FileHandler.py | 1 - 1 file changed, 1 deletion(-) diff --git a/aalpy/utils/FileHandler.py b/aalpy/utils/FileHandler.py index 034cfad1..e8aaf057 100644 --- a/aalpy/utils/FileHandler.py +++ b/aalpy/utils/FileHandler.py @@ -226,7 +226,6 @@ def _process_label(label, source, destination, automaton_type): out = int(out) if out.isdigit() else out source.transitions[inp].append((destination, out, float(prob))) if automaton_type == 'vpa': - # TODO work with string representations in transitions match = re.match(sevpa_transition_regex, label) if match: ret, stack_guard, top_of_stack = match.groups() From e46746f9133e9fad06e25d3a1a9387ca59ab037e Mon Sep 17 00:00:00 2001 From: Edi Muskardin Date: Wed, 13 Dec 2023 10:20:41 +0100 Subject: [PATCH 60/62] refactor sevpa learning - bug in gen_random_accepting_word for L1 --- Examples.py | 25 +++++++ README.md | 7 +- aalpy/automata/Sevpa.py | 71 +++++++++++-------- .../deterministic/ClassificationTree.py | 6 +- aalpy/utils/AutomatonGenerators.py | 4 +- aalpy/utils/BenchmarkSevpaModels.py | 17 +---- aalpy/utils/FileHandler.py | 14 ++-- test_main.py | 4 +- 8 files changed, 88 insertions(+), 60 deletions(-) diff --git a/Examples.py b/Examples.py index 079f4ac1..8aa28648 100644 --- a/Examples.py +++ b/Examples.py @@ -1004,6 +1004,31 @@ def step(self, letter): learned_model.visualize() +def test_on_benchmark_svepa(): + from aalpy.SULs import SevpaSUL + from aalpy.oracles import RandomWordEqOracle + from aalpy.learning_algs import run_KV + from aalpy.utils.BenchmarkSevpaModels import sevpa_for_L1, sevpa_for_L2, sevpa_for_L11, sevpa_for_L12, sevpa_for_L14 + + models = [sevpa_for_L1(), sevpa_for_L2(), sevpa_for_L11(), sevpa_for_L12(), sevpa_for_L14()] + + for inx, model in enumerate(models): + + alphabet = model.get_input_alphabet() + + sul = SevpaSUL(model) + + if inx == 4: + alphabet.exclusive_call_return_pairs = {'(': ')', '[': ']'} + + eq_oracle = RandomWordEqOracle(alphabet=alphabet.get_merged_alphabet(), sul=sul, num_walks=10000, + min_walk_len=10, max_walk_len=30) + + learned_model = run_KV(alphabet=alphabet, sul=sul, eq_oracle=eq_oracle, automaton_type='vpa', + print_level=2, cex_processing='rs') + + print(learned_model.gen_random_accepting_word()) + def test_on_random_svepa(): from aalpy.SULs import SevpaSUL from aalpy.oracles import RandomWordEqOracle diff --git a/README.md b/README.md index 89731720..9b575bcd 100644 --- a/README.md +++ b/README.md @@ -20,7 +20,8 @@ You can start learning automata in just a few lines of code. Whether you work with regular languages or you would like to learn models of (black-box) reactive systems, AALpy supports a wide range of modeling formalisms, including -**deterministic**, **non-deterministic**, and **stochastic automata**. +**deterministic**, **non-deterministic**, and **stochastic automata**, +as well as **deterministic context-free grammars/pushdown automata**.
    @@ -29,14 +30,14 @@ Whether you work with regular languages or you would like to learn models of | Deterministic | DFAs
    Mealy Machines
    Moore Machines | L*
    KV
    RPNI | Seamless Caching
    Counterexample Processing
    13 Equivalence Oracles | | Non-Deterministic | ONFSM
    Abstracted ONFSM | L*ONFSM | Size Reduction Trough Abstraction | | Stochastic | Markov Decision Processes
    Stochastic Mealy Machines
    Markov Chains | L*MDP
    L*SMM
    ALERGIA | Counterexample Processing
    Exportable to PRISM format
    Bindings to jALERGIA| - +| Context-Free | VPDA/SEVPA | KVVPA | Specification of exclusive
    call-return pairs
    AALpy enables efficient learning by providing a large set of equivalence oracles, implementing various conformance testing strategies. Active learning is mostly based on Angluin's [L* algorithm](https://people.eecs.berkeley.edu/~dawnsong/teaching/s10/papers/angluin87.pdf), for which AALpy supports a selection of optimizations, including efficient counterexample processing caching. However, the recent addition of efficiently implemented [KV](https://mitpress.mit.edu/9780262111935/an-introduction-to-computational-learning-theory/) algorithm -requires (on average) much less interaction with the system under learning than L*. +requires (on average) much less interaction with the system under learning than L*. In addition, KV can be used to learn Visibly Deterministic Pushdown Automata (VPDA). AALpy also includes **passive automata learning algorithms**, namely RPNI for deterministic and ALERGIA for stochastic models. Unlike active algorithms which learn by interaction with the system, passive learning algorithms construct a model based on provided data. diff --git a/aalpy/automata/Sevpa.py b/aalpy/automata/Sevpa.py index 22d1c958..3105fcf7 100644 --- a/aalpy/automata/Sevpa.py +++ b/aalpy/automata/Sevpa.py @@ -4,7 +4,6 @@ from aalpy.base import Automaton, AutomatonState - from typing import List, Dict @@ -52,6 +51,7 @@ class SevpaState(AutomatonState): """ Single state of a 1-SEVPA. """ + def __init__(self, state_id, is_accepting=False): super().__init__(state_id) self.transitions = defaultdict(list[SevpaTransition]) @@ -65,13 +65,14 @@ class SevpaTransition: Attributes: start (SevpaState): The starting state of the transition. target (SevpaState): The target state of the transition. - symbol: The symbol associated with the transition. + letter: The symbol associated with the transition. action: The action performed during the transition (pop | None). - stack_guard: The stack symbol to be pushed/popped. + stack_guard: Pair of (automaton_state_id, call_letter) """ - def __init__(self, target: SevpaState, symbol, action, stack_guard=None): - self.target = target - self.symbol = symbol + + def __init__(self, target: SevpaState, letter, action, stack_guard=None): + self.target_state = target + self.letter = letter self.action = action self.stack_guard = stack_guard @@ -80,7 +81,10 @@ def __str__(self): Returns: str: A string representation of the transition. """ - return f"{self.symbol} --> {self.target.state_id} | {self.action}: {self.stack_guard}" + if self.stack_guard: + return f'{self.letter} --> {self.target_state.state_id} | {self.action}: {self.stack_guard}' + else: + return f'{self.letter} --> {self.target_state.state_id}' class Sevpa(Automaton): @@ -135,11 +139,11 @@ def step(self, letter): transitions = self.current_state.transitions[letter] taken_transition = None for t in transitions: - if t.symbol in self.return_set: + if t.letter in self.return_set: if t.stack_guard == self.stack[-1]: taken_transition = t break - elif t.symbol in self.internal_set: + elif t.letter in self.internal_set: taken_transition = t break else: @@ -150,7 +154,7 @@ def step(self, letter): self.error_state_reached = True return False - self.current_state = taken_transition.target + self.current_state = taken_transition.target_state if taken_transition.action == 'pop': # empty stack elem should always be on the stack @@ -167,6 +171,9 @@ def get_state_by_id(self, state_id) -> Union[SevpaState, None]: return state return None + def is_input_complete(self) -> bool: + pass + def execute_sequence(self, origin_state, seq): if origin_state.prefix != self.initial_state.prefix: assert False, 'execute_sequence for Sevpa only is only supported from the initial state.' @@ -177,13 +184,14 @@ def execute_sequence(self, origin_state, seq): def to_state_setup(self): state_setup_dict = {} - sorted_states = sorted(self.states, key=lambda x: len(x.state_id)) + sorted_states = sorted(self.states, key=lambda x: x.state_id) for state in sorted_states: transitions_for_symbol = {} - for symbol, trans_list in state.transitions.items(): + for symbol, transition_list in state.transitions.items(): trans_list_for_setup = [] - for trans in trans_list: - trans_list_for_setup.append((trans.target.state_id, trans.action, trans.stack_guard)) + for transition in transition_list: + trans_list_for_setup.append( + (transition.target_state.state_id, transition.action, transition.stack_guard)) if trans_list_for_setup: transitions_for_symbol[symbol] = trans_list_for_setup state_setup_dict[state.state_id] = (state.is_accepting, transitions_for_symbol) @@ -205,10 +213,10 @@ def from_state_setup(state_setup: dict, **kwargs): for (target_state_id, action, stack_guard) in trans_spec: if action == 'pop': stack_guard = (stack_guard[0], stack_guard[1]) - trans = SevpaTransition(target=states[target_state_id], symbol=_input, + trans = SevpaTransition(target=states[target_state_id], letter=_input, action=action, stack_guard=stack_guard) elif action is None: - trans = SevpaTransition(target=states[target_state_id], symbol=_input, + trans = SevpaTransition(target=states[target_state_id], letter=_input, action=None, stack_guard=None) else: assert False, 'Action must either be "pop" or None, note that there are no push actions ' \ @@ -269,12 +277,12 @@ def create_daisy_hypothesis(initial_state, alphabet): Sevpa: The created 1-SEVPA with the specified initial state and alphabet. """ for i in alphabet.internal_alphabet: - trans = SevpaTransition(target=initial_state, symbol=i, action=None) + trans = SevpaTransition(target=initial_state, letter=i, action=None) initial_state.transitions[i].append(trans) for c in alphabet.call_alphabet: for r in alphabet.return_alphabet: - trans = SevpaTransition(target=initial_state, symbol=r, action='pop', + trans = SevpaTransition(target=initial_state, letter=r, action='pop', stack_guard=(initial_state.state_id, c)) initial_state.transitions[r].append(trans) @@ -287,13 +295,13 @@ def get_input_alphabet(self): for transition_list in state.transitions.values(): for transition in transition_list: if transition.action == 'pop': - if transition.symbol not in ret: - ret.append(transition.symbol) + if transition.letter not in ret: + ret.append(transition.letter) if transition.stack_guard[1] not in call: call.append(transition.stack_guard[1]) else: - if transition.symbol not in internal: - internal.append(transition.symbol) + if transition.letter not in internal: + internal.append(transition.letter) return SevpaAlphabet(internal, call, ret) @@ -323,9 +331,9 @@ def get_error_state(self): for letter in ret_int_al: for transition in state.transitions[letter]: if state_target is None: - state_target = transition.target + state_target = transition.target_state else: - if state_target != transition.target: + if state_target != transition.target_state: is_error_state = False break if not is_error_state: @@ -336,7 +344,7 @@ def get_error_state(self): for return_letter in self.input_alphabet.return_alphabet: for transition in self.initial_state.transitions[return_letter]: if transition.stack_guard[0] == state_target.state_id: - if transition.target != state_target: + if transition.target_state != state_target: is_error_state = False break if not is_error_state: @@ -366,7 +374,7 @@ def delete_state(self, state_to_remove): if transition.stack_guard is not None: if transition.stack_guard[0] == state_to_remove.state_id: continue - if transition.target.state_id == state_to_remove.state_id: + if transition.target_state.state_id == state_to_remove.state_id: continue cleaned_transitions.append(transition) @@ -396,7 +404,7 @@ def get_allowed_call_transitions(self): for internal_letter in self.input_alphabet.internal_alphabet: for internal_trans in current_state.transitions[internal_letter]: - target_state = internal_trans.target + target_state = internal_trans.target_state if target_state not in connected_states: queue.append(target_state) @@ -454,7 +462,7 @@ def gen_random_accepting_word(self, return_letter_prob: float = 0.5, min_length: Generate a random word that is accepted by the automaton. Only internal letters and return letters will be chosen. If a return letter is randomly chosen a random - stack guard will be selected. Then the stack needed stack configuration will bne searched by using BFS + stack guard will be selected. Then the stack needed stack configuration will be searched by using BFS Args: - return_letter_prob (float): Probability for selecting a letter from the return alphabet. @@ -504,8 +512,10 @@ def gen_random_accepting_word(self, return_letter_prob: float = 0.5, min_length: elif len(self.current_state.transitions[letter_for_word]) == 1: random_stack_guard = self.current_state.transitions[letter_for_word][0].stack_guard else: - random_stack_guard_index = random.randint(0, len(self.current_state.transitions[letter_for_word]) - 1) - random_stack_guard = self.current_state.transitions[letter_for_word][random_stack_guard_index].stack_guard + random_stack_guard_index = random.randint(0, + len(self.current_state.transitions[letter_for_word]) - 1) + random_stack_guard = self.current_state.transitions[letter_for_word][ + random_stack_guard_index].stack_guard # start from the initial state self.reset_to_initial() @@ -553,4 +563,5 @@ def gen_random_accepting_word(self, return_letter_prob: float = 0.5, min_length: if self.current_state.is_accepting and self.stack[-1] == self.empty and len(word) >= min_length: break + self.reset_to_initial() return word diff --git a/aalpy/learning_algs/deterministic/ClassificationTree.py b/aalpy/learning_algs/deterministic/ClassificationTree.py index 133b278a..7cc1768d 100644 --- a/aalpy/learning_algs/deterministic/ClassificationTree.py +++ b/aalpy/learning_algs/deterministic/ClassificationTree.py @@ -215,7 +215,7 @@ def gen_hypothesis(self): assert transition_target_access_string in states trans = SevpaTransition(target=states[transition_target_access_string], - symbol=internal_letter, action=None) + letter=internal_letter, action=None) state.transitions[internal_letter].append(trans) # Add call transitions @@ -236,7 +236,7 @@ def gen_hypothesis(self): transition_target_access_string = transition_target_node.access_string trans = SevpaTransition(target=states[transition_target_access_string], - symbol=return_letter, + letter=return_letter, action='pop', stack_guard=(other_state.state_id, call_letter)) state.transitions[return_letter].append(trans) @@ -407,7 +407,7 @@ def _insert_new_leaf(self, discriminator, old_leaf_access_string, new_leaf_acces Inserts a new leaf in the classification tree by: - moving the leaf node specified by down one level - inserting an internal node at the former position of the old node (i.e. as the parent of the old node) - - adding a new leaf node with as child of the new internal node / sibling of the old node + - adding a new leaf node with as child of the new internal node/sibling of the old node Could also be thought of as 'splitting' the old node into two (one of which keeps the old access string and one of which gets the new one) with as the distinguishing string between the two. diff --git a/aalpy/utils/AutomatonGenerators.py b/aalpy/utils/AutomatonGenerators.py index 6c6b6389..adc12e4c 100644 --- a/aalpy/utils/AutomatonGenerators.py +++ b/aalpy/utils/AutomatonGenerators.py @@ -439,11 +439,11 @@ def _has_transition(state: SevpaState, transition_letter, stack_guard) -> bool: if transitions is not None: if stack_guard is None: # internal transition for transition in transitions: - if transition.symbol == transition_letter: + if transition.letter == transition_letter: return True else: # return transition for transition in transitions: - if transition.stack_guard == stack_guard and transition.symbol == transition_letter: + if transition.stack_guard == stack_guard and transition.letter == transition_letter: return True return False diff --git a/aalpy/utils/BenchmarkSevpaModels.py b/aalpy/utils/BenchmarkSevpaModels.py index fe9351ff..8d2714e8 100644 --- a/aalpy/utils/BenchmarkSevpaModels.py +++ b/aalpy/utils/BenchmarkSevpaModels.py @@ -1,9 +1,8 @@ -from aalpy.automata.Sevpa import Sevpa, SevpaAlphabet +from aalpy.automata.Sevpa import Sevpa from aalpy.utils import load_automaton_from_file def sevpa_for_L1(): - state_setup = { 'q0': (False, {'b': [('q1', 'pop', ('q0', 'a'))] }), @@ -14,7 +13,6 @@ def sevpa_for_L1(): def sevpa_for_L2(): - state_setup = { 'q0': (False, {'d': [('q1', 'pop', ('q0', 'a')), ('q1', 'pop', ('q0', 'b'))], 'c': [('q1', 'pop', ('q0', 'a')), ('q1', 'pop', ('q0', 'b'))] @@ -28,7 +26,6 @@ def sevpa_for_L2(): def sevpa_for_L3(): - state_setup = { 'q0': (False, {'g': [('q6', 'pop', ('q0', 'd')), ('q4', 'pop', ('q0', 'b'))], @@ -54,7 +51,6 @@ def sevpa_for_L3(): def sevpa_for_L4(): - state_setup = { 'q0': (False, {'c': [('q2', 'pop', ('q0', 'b'))] }), @@ -67,7 +63,6 @@ def sevpa_for_L4(): def sevpa_for_L5(): - state_setup = { 'q0': (False, {'d': [('q2', 'pop', ('q0', 'c'))] }), @@ -83,7 +78,6 @@ def sevpa_for_L5(): def sevpa_for_L7(): - state_setup = { 'q0': (False, {')': [('q1', 'pop', ('q0', '(')), ('q1', 'pop', ('q1', '('))], @@ -101,9 +95,7 @@ def sevpa_for_L7(): return Sevpa.from_state_setup(state_setup, init_state_id="q0") - def sevpa_for_L8(): - state_setup = { 'q0': (False, {')': [('q1', 'pop', ('q0', '(')), ('q1', 'pop', ('q1', '('))], @@ -124,7 +116,6 @@ def sevpa_for_L8(): def sevpa_for_L9(): - state_setup = { 'q0': (False, {']': [('q1', 'pop', ('q0', '[')), ('q1', 'pop', ('q1', '['))], @@ -150,7 +141,6 @@ def sevpa_for_L9(): def sevpa_for_L10(): - state_setup = { "q0": (False, {"b": [("qb", None, None)], }), @@ -182,7 +172,6 @@ def sevpa_for_L10(): def sevpa_for_L11(): - state_setup = { 'q0': (False, {'i1': [('q2', None, None)], 'r1': [('q3', 'pop', ('q0', 'c2')), @@ -225,11 +214,10 @@ def sevpa_for_L12(): }) } - return Sevpa.from_state_setup(state_setup, init_state_id="q0",) + return Sevpa.from_state_setup(state_setup, init_state_id="q0", ) def sevpa_for_L13(): - state_setup = { 'q0': (False, {'c': [('q1', None, None)], 'b': [('q1', None, None)], @@ -249,7 +237,6 @@ def sevpa_for_L13(): def sevpa_for_L14(): - state_setup = { 'q0': (False, {'a': [('q1', None, None)], 'b': [('q1', None, None)], diff --git a/aalpy/utils/FileHandler.py b/aalpy/utils/FileHandler.py index e8aaf057..f3717542 100644 --- a/aalpy/utils/FileHandler.py +++ b/aalpy/utils/FileHandler.py @@ -94,11 +94,11 @@ def _add_transition_to_graph(graph, state, automaton_type, display_same_state_tr transitions_list = state.transitions[i] for transition in transitions_list: if transition.action == 'pop': - edge = Edge(state.state_id, transition.target.state_id, - label=_wrap_label(f'{transition.symbol} / {transition.stack_guard}')) + edge = Edge(state.state_id, transition.target_state.state_id, + label=_wrap_label(f'{transition.letter} / {transition.stack_guard}')) elif transition.action is None: - edge = Edge(state.state_id, transition.target.state_id, - label=_wrap_label(f'{transition.symbol}')) + edge = Edge(state.state_id, transition.target_state.state_id, + label=_wrap_label(f'{transition.letter}')) else: assert False graph.add_edge(edge) @@ -227,6 +227,8 @@ def _process_label(label, source, destination, automaton_type): source.transitions[inp].append((destination, out, float(prob))) if automaton_type == 'vpa': match = re.match(sevpa_transition_regex, label) + # cast to integer + label = int(label) if label.isdigit() else label if match: ret, stack_guard, top_of_stack = match.groups() return_transition = SevpaTransition(destination, ret, 'pop', (stack_guard, top_of_stack)) @@ -249,7 +251,7 @@ def _process_node_label(node, label, node_label_dict, node_type, automaton_type) node_label_dict[node_name] = node_type(label, output) else: node_label_dict[node_name] = node_type(label) - if automaton_type == 'dfa': + if automaton_type == 'dfa' or automaton_type == 'vpa': if 'shape' in node.get_attributes().keys() and 'doublecircle' in node.get_attributes()['shape']: node_label_dict[node_name].is_accepting = True @@ -314,7 +316,7 @@ def load_automaton_from_file(path, automaton_type, compute_prefixes=False): source = node_label_dict[edge.get_source()] destination = node_label_dict[edge.get_destination()] - #WIP + label = edge.get_attributes()['label'] label = _strip_label(label) _process_label(label, source, destination, automaton_type) diff --git a/test_main.py b/test_main.py index b1a9cd96..817d13ea 100644 --- a/test_main.py +++ b/test_main.py @@ -141,8 +141,10 @@ def test_cex_processing_strategies_vpa(): # pr.disable() # pr.print_stats(sort='tottime') # exit() +from Examples import test_on_benchmark_svepa - +test_on_benchmark_svepa() +exit() for i, vpa in enumerate( [sevpa_for_L1(), sevpa_for_L2(), sevpa_for_L3(), sevpa_for_L4(), sevpa_for_L5(), sevpa_for_L7(), sevpa_for_L8(), sevpa_for_L9(), sevpa_for_L10(), sevpa_for_L11(), sevpa_for_L12(), sevpa_for_L13(), sevpa_for_L14(), From 2f6f7939d31f01588a2b1fa734c0c863a931767f Mon Sep 17 00:00:00 2001 From: Edi Muskardin Date: Wed, 13 Dec 2023 11:03:27 +0100 Subject: [PATCH 61/62] fix generation of random words --- Examples.py | 2 +- aalpy/automata/Sevpa.py | 21 +++++++++-------- test_main.py | 52 ++++++++++++++++++++++++----------------- 3 files changed, 43 insertions(+), 32 deletions(-) diff --git a/Examples.py b/Examples.py index 8aa28648..8ab028cd 100644 --- a/Examples.py +++ b/Examples.py @@ -1027,7 +1027,7 @@ def test_on_benchmark_svepa(): learned_model = run_KV(alphabet=alphabet, sul=sul, eq_oracle=eq_oracle, automaton_type='vpa', print_level=2, cex_processing='rs') - print(learned_model.gen_random_accepting_word()) + print(learned_model.get_random_accepting_word()) def test_on_random_svepa(): from aalpy.SULs import SevpaSUL diff --git a/aalpy/automata/Sevpa.py b/aalpy/automata/Sevpa.py index 3105fcf7..d566b10c 100644 --- a/aalpy/automata/Sevpa.py +++ b/aalpy/automata/Sevpa.py @@ -63,7 +63,6 @@ class SevpaTransition: Represents a transition in a 1-SEVPA. Attributes: - start (SevpaState): The starting state of the transition. target (SevpaState): The target state of the transition. letter: The symbol associated with the transition. action: The action performed during the transition (pop | None). @@ -81,10 +80,8 @@ def __str__(self): Returns: str: A string representation of the transition. """ - if self.stack_guard: - return f'{self.letter} --> {self.target_state.state_id} | {self.action}: {self.stack_guard}' - else: - return f'{self.letter} --> {self.target_state.state_id}' + return f'{self.letter} --> {self.target_state.state_id}' + \ + f' | {self.action}: {self.stack_guard}' if self.stack_guard else '' class Sevpa(Automaton): @@ -416,7 +413,7 @@ def get_allowed_call_transitions(self): return allowed_call_transitions - def gen_random_accepting_word_bfs(self, min_word_length: int = 0, amount_words: int = 1) -> set: + def get_accepting_words_bfs(self, min_word_length: int = 0, num_words: int = 1) -> list: """ Generate a list of random words that are accepted by the automaton using the breadth-first search approach. @@ -445,10 +442,11 @@ def gen_random_accepting_word_bfs(self, min_word_length: int = 0, amount_words: continue if self.current_state.is_accepting and self.stack[-1] == self.empty and len(word) >= min_word_length: found_words.add(tuple(word)) - if len(found_words) >= amount_words: + if len(found_words) >= num_words: + found_words = list(found_words) + found_words.sort(key=len) return found_words shuffled_alphabet = self.input_alphabet.get_merged_alphabet() - random.shuffle(shuffled_alphabet) for letter in shuffled_alphabet: if letter in allowed_call_trans: # skip words where it's not possible to pop the stack_guard @@ -457,7 +455,7 @@ def gen_random_accepting_word_bfs(self, min_word_length: int = 0, amount_words: new_word = word + [letter] queue.append(new_word) - def gen_random_accepting_word(self, return_letter_prob: float = 0.5, min_length: int = 0) -> list: + def get_random_accepting_word(self, return_letter_prob: float = 0.5, min_length: int = 0) -> list: """ Generate a random word that is accepted by the automaton. @@ -477,6 +475,8 @@ def gen_random_accepting_word(self, return_letter_prob: float = 0.5, min_length: internal_letter_prob = 0.0 if len(self.input_alphabet.internal_alphabet) != 0: internal_letter_prob = 1.0 - return_letter_prob + else: + return_letter_prob = 1.0 assert (return_letter_prob + internal_letter_prob) == 1.0 @@ -560,7 +560,8 @@ def gen_random_accepting_word(self, return_letter_prob: float = 0.5, min_length: else: self.execute_sequence(self.initial_state, word) - if self.current_state.is_accepting and self.stack[-1] == self.empty and len(word) >= min_length: + if self.current_state.is_accepting and self.stack[-1] == self.empty and len(word) >= min_length \ + and random.random() < 0.2: break self.reset_to_initial() diff --git a/test_main.py b/test_main.py index 817d13ea..e34d9671 100644 --- a/test_main.py +++ b/test_main.py @@ -143,30 +143,40 @@ def test_cex_processing_strategies_vpa(): # exit() from Examples import test_on_benchmark_svepa -test_on_benchmark_svepa() +sevpa_for_L5().get_accepting_words_bfs(num_words=2) exit() + for i, vpa in enumerate( [sevpa_for_L1(), sevpa_for_L2(), sevpa_for_L3(), sevpa_for_L4(), sevpa_for_L5(), sevpa_for_L7(), sevpa_for_L8(), sevpa_for_L9(), sevpa_for_L10(), sevpa_for_L11(), sevpa_for_L12(), sevpa_for_L13(), sevpa_for_L14(), sevpa_for_L15()]): - print(f'VPA {i + 1 if i < 6 else i + 2}') - # 16 works - for s in range(10): - print(s) - seed(s) - model_under_learning = vpa - - alphabet = SevpaAlphabet(list(model_under_learning.internal_set), - list(model_under_learning.call_set), - list(model_under_learning.return_set)) - - # if i == 9: - # alphabet.exclusive_call_return_pairs = {'(': ')', '[': ']', '{': '}', '<': '>'} - - sul = SevpaSUL(model_under_learning) - - eq_oracle = RandomWordEqOracle(alphabet=alphabet.get_merged_alphabet(), sul=sul, num_walks=10000) - # model = run_KV_vpda(alphabet=alphabet, sul=sul, eq_oracle=eq_oracle, print_level=3,) - model = run_KV(alphabet=alphabet, sul=sul, eq_oracle=eq_oracle, automaton_type='vpa', - print_level=2, cex_processing='exponential_fwd') + print(i, '-----------------------------------------------') + e = vpa.get_accepting_words_bfs(num_words=2) + print(e) + continue + print('random') + for _ in range(20): + r = vpa.get_random_accepting_word() + print(r) + + # print(f'VPA {i + 1 if i < 6 else i + 2}') + # # 16 works + # for s in range(10): + # print(s) + # seed(s) + # model_under_learning = vpa + # + # alphabet = SevpaAlphabet(list(model_under_learning.internal_set), + # list(model_under_learning.call_set), + # list(model_under_learning.return_set)) + # + # # if i == 9: + # # alphabet.exclusive_call_return_pairs = {'(': ')', '[': ']', '{': '}', '<': '>'} + # + # sul = SevpaSUL(model_under_learning) + # + # eq_oracle = RandomWordEqOracle(alphabet=alphabet.get_merged_alphabet(), sul=sul, num_walks=10000) + # # model = run_KV_vpda(alphabet=alphabet, sul=sul, eq_oracle=eq_oracle, print_level=3,) + # model = run_KV(alphabet=alphabet, sul=sul, eq_oracle=eq_oracle, automaton_type='vpa', + # print_level=2, cex_processing='exponential_fwd') From fb4966a54f79067b88ebd2931a4bf14f656dd97b Mon Sep 17 00:00:00 2001 From: Edi Muskardin Date: Wed, 13 Dec 2023 11:23:47 +0100 Subject: [PATCH 62/62] update Examples and nomenclature --- Examples.py | 8 +- aalpy/automata/Sevpa.py | 24 +++--- test_main.py | 182 ---------------------------------------- 3 files changed, 17 insertions(+), 197 deletions(-) delete mode 100644 test_main.py diff --git a/Examples.py b/Examples.py index 8ab028cd..49caacb8 100644 --- a/Examples.py +++ b/Examples.py @@ -964,7 +964,7 @@ def learning_context_free_grammar_example(): learned_model.visualize() -def test_arithmetic_expression(): +def arithmetic_expression_sevpa_learning(): from aalpy.base import SUL from aalpy.automata import SevpaAlphabet from aalpy.oracles import RandomWordEqOracle @@ -1004,7 +1004,7 @@ def step(self, letter): learned_model.visualize() -def test_on_benchmark_svepa(): +def benchmark_sevpa_learning(): from aalpy.SULs import SevpaSUL from aalpy.oracles import RandomWordEqOracle from aalpy.learning_algs import run_KV @@ -1029,7 +1029,8 @@ def test_on_benchmark_svepa(): print(learned_model.get_random_accepting_word()) -def test_on_random_svepa(): + +def random_sevpa_learning(): from aalpy.SULs import SevpaSUL from aalpy.oracles import RandomWordEqOracle from aalpy.learning_algs import run_KV @@ -1053,3 +1054,4 @@ def test_on_random_svepa(): model = run_KV(alphabet=alphabet, sul=sul, eq_oracle=eq_oracle, automaton_type='vpa', print_level=2, cex_processing='rs') + diff --git a/aalpy/automata/Sevpa.py b/aalpy/automata/Sevpa.py index d566b10c..b2a782a6 100644 --- a/aalpy/automata/Sevpa.py +++ b/aalpy/automata/Sevpa.py @@ -287,20 +287,20 @@ def create_daisy_hypothesis(initial_state, alphabet): def get_input_alphabet(self): - internal, ret, call = [], [], [] + int_alphabet, ret_alphabet, call_alphabet = [], [], [] for state in self.states: for transition_list in state.transitions.values(): for transition in transition_list: if transition.action == 'pop': - if transition.letter not in ret: - ret.append(transition.letter) - if transition.stack_guard[1] not in call: - call.append(transition.stack_guard[1]) + if transition.letter not in ret_alphabet: + ret_alphabet.append(transition.letter) + if transition.stack_guard[1] not in call_alphabet: + call_alphabet.append(transition.stack_guard[1]) else: - if transition.letter not in internal: - internal.append(transition.letter) + if transition.letter not in int_alphabet: + int_alphabet.append(transition.letter) - return SevpaAlphabet(internal, call, ret) + return SevpaAlphabet(int_alphabet, call_alphabet, ret_alphabet) def get_error_state(self): """ @@ -455,7 +455,7 @@ def get_accepting_words_bfs(self, min_word_length: int = 0, num_words: int = 1) new_word = word + [letter] queue.append(new_word) - def get_random_accepting_word(self, return_letter_prob: float = 0.5, min_length: int = 0) -> list: + def get_random_accepting_word(self, return_letter_prob: float = 0.5, min_len: int = 2) -> list: """ Generate a random word that is accepted by the automaton. @@ -464,7 +464,7 @@ def get_random_accepting_word(self, return_letter_prob: float = 0.5, min_length: Args: - return_letter_prob (float): Probability for selecting a letter from the return alphabet. - - min_length (int): Minimum length of the generated word. + - min_len (int): Minimum length of the generated word. Returns: - list: A randomly generated word that gets accepted by the automaton. @@ -560,8 +560,8 @@ def get_random_accepting_word(self, return_letter_prob: float = 0.5, min_length: else: self.execute_sequence(self.initial_state, word) - if self.current_state.is_accepting and self.stack[-1] == self.empty and len(word) >= min_length \ - and random.random() < 0.2: + if self.current_state.is_accepting and self.stack[-1] == self.empty and len(word) >= min_len \ + and random.random() < 0.2: break self.reset_to_initial() diff --git a/test_main.py b/test_main.py deleted file mode 100644 index e34d9671..00000000 --- a/test_main.py +++ /dev/null @@ -1,182 +0,0 @@ -import ast -import random - -from Examples import learning_context_free_grammar_example -from aalpy.SULs.AutomataSUL import SevpaSUL -from aalpy.base import SUL -from aalpy.learning_algs import run_KV -from aalpy.oracles import RandomWordEqOracle, RandomWalkEqOracle, StatePrefixEqOracle -from aalpy.utils import visualize_automaton, get_Angluin_dfa, generate_random_sevpa -from aalpy.utils.BenchmarkSevpaModels import * -from random import seed - - -# learning_context_free_grammar_example() - -# TODOs -# 1. exponential cex processing in CounterExampleProcessing.py -# 2. Create a SEVPA function that generates random positive strings - model.generate_random_positive_string() -# 2. Add all 15 langs as SVEPA -# 4. Implement and test to_state_setup, test saving and loading to/from file -# 5. Create an active interface to learn a grammar of some language, like simplified C or Java - -# Thesis -# 1. Intro -# 2. Preliminaries (very important) -# 2.1 CFG, context pairs, well matched words -# 2.2 What are SEVPA and why we use those instead of VPAs -# 2.3 Example SEVPA and how to read/interpret it (Important on a small example) -# 2.4 Automata Learning and KV -# ... -# 3. KV for CFG inference (intuition behind everything and how it fits with preliminaries) -# 3.1 Explain alg in detail, like Maxi -# 3.2 Explain CEX processing/transform access string, also on example and intuition -# 3.3 Important: Run of the algorithm, visualize classification tree... -# 4. Evaluation -# - number of steps/queries for models of growing alphabet, state size, ...] -# - on 15 languages -# - on random languages -# - on something cool - -def test_arithmetic_expression(): - import warnings - warnings.filterwarnings("ignore") - - class ArithmeticSUL(SUL): - def __init__(self): - super().__init__() - self.string_under_test = '' - - def pre(self): - self.string_under_test = '' - - def post(self): - pass - - def step(self, letter): - if letter: - self.string_under_test += ' ' + letter - - try: - eval(self.string_under_test) - return True - except (SyntaxError, TypeError): - return False - - sul = ArithmeticSUL() - alphabet = SevpaAlphabet(internal_alphabet=['1', '+'], call_alphabet=['('], return_alphabet=[')']) - eq_oracle = RandomWordEqOracle(alphabet.get_merged_alphabet(), sul, min_walk_len=5, - max_walk_len=20, num_walks=20000) - - learned_model = run_KV(alphabet, sul, eq_oracle, automaton_type='vpa') - learned_model.visualize() - - -def test_on_random_svepa(): - random_svepa = generate_random_sevpa(num_states=50, internal_alphabet_size=3, - call_alphabet_size=3, - return_alphabet_size=3, - acceptance_prob=0.4, - return_transition_prob=0.5) - - alphabet = random_svepa.input_alphabet - - sul = SevpaSUL(random_svepa) - - eq_oracle = RandomWordEqOracle(alphabet=alphabet.get_merged_alphabet(), sul=sul, num_walks=10000, - min_walk_len=10, max_walk_len=30) - # model = run_KV_vpda(alphabet=alphabet, sul=sul, eq_oracle=eq_oracle, print_level=3,) - model = run_KV(alphabet=alphabet, sul=sul, eq_oracle=eq_oracle, automaton_type='vpa', - print_level=2, cex_processing='rs') - - -def test_cex_processing_strategies_vpa(): - cex_processing_strategies = ['rs', 'linear_fwd', 'linear_bwd', 'exponential_fwd', 'exponential_bwd', ] - - for i, vpa in enumerate( - [sevpa_for_L1(), sevpa_for_L2(), sevpa_for_L3(), sevpa_for_L4(), sevpa_for_L5(), sevpa_for_L7(), sevpa_for_L8(), - sevpa_for_L9(), sevpa_for_L10(), sevpa_for_L11(), sevpa_for_L12(), sevpa_for_L13(), sevpa_for_L14(), sevpa_for_L15()]): - - print(f'VPA {i + 1 if i < 6 else i + 2}') - - model_under_learning = vpa - - alphabet = SevpaAlphabet(list(model_under_learning.internal_set), - list(model_under_learning.call_set), - list(model_under_learning.return_set)) - - for cex_processing in cex_processing_strategies: - sul = SevpaSUL(model_under_learning) - eq_oracle = RandomWordEqOracle(alphabet=alphabet.get_merged_alphabet(), sul=sul, num_walks=20000) - model = run_KV(alphabet=alphabet, sul=sul, eq_oracle=eq_oracle, automaton_type='vpa', - print_level=1, cex_processing=cex_processing) - - sul_learned_model = SevpaSUL(model) - - print(f'Checking {cex_processing}') - for i in range(0, 10000): - word_length = random.randint(1, 100) - word = [] - for j in range(0, word_length): - word.append(random.choice(alphabet.get_merged_alphabet())) - - vpa_out = sul.query(tuple(word)) - learned_model_out = sul_learned_model.query(tuple(word)) - - if vpa_out == learned_model_out: - continue - else: - print(f'{cex_processing} failed on following test:') - print(f'Input: {word}') - print(f'Vpa out: {vpa_out} \nLearned vpa out: {learned_model_out}') - assert False - -# test_cex_processing_strategies_vpa() -# test_arithmetic_expression() -# test_on_random_svepa() -# import cProfile -# pr = cProfile.Profile() -# pr.enable() -# test_on_random_svepa() -# pr.disable() -# pr.print_stats(sort='tottime') -# exit() -from Examples import test_on_benchmark_svepa - -sevpa_for_L5().get_accepting_words_bfs(num_words=2) -exit() - -for i, vpa in enumerate( - [sevpa_for_L1(), sevpa_for_L2(), sevpa_for_L3(), sevpa_for_L4(), sevpa_for_L5(), sevpa_for_L7(), sevpa_for_L8(), - sevpa_for_L9(), sevpa_for_L10(), sevpa_for_L11(), sevpa_for_L12(), sevpa_for_L13(), sevpa_for_L14(), - sevpa_for_L15()]): - - print(i, '-----------------------------------------------') - e = vpa.get_accepting_words_bfs(num_words=2) - print(e) - continue - print('random') - for _ in range(20): - r = vpa.get_random_accepting_word() - print(r) - - # print(f'VPA {i + 1 if i < 6 else i + 2}') - # # 16 works - # for s in range(10): - # print(s) - # seed(s) - # model_under_learning = vpa - # - # alphabet = SevpaAlphabet(list(model_under_learning.internal_set), - # list(model_under_learning.call_set), - # list(model_under_learning.return_set)) - # - # # if i == 9: - # # alphabet.exclusive_call_return_pairs = {'(': ')', '[': ']', '{': '}', '<': '>'} - # - # sul = SevpaSUL(model_under_learning) - # - # eq_oracle = RandomWordEqOracle(alphabet=alphabet.get_merged_alphabet(), sul=sul, num_walks=10000) - # # model = run_KV_vpda(alphabet=alphabet, sul=sul, eq_oracle=eq_oracle, print_level=3,) - # model = run_KV(alphabet=alphabet, sul=sul, eq_oracle=eq_oracle, automaton_type='vpa', - # print_level=2, cex_processing='exponential_fwd')