Skip to content

Commit

Permalink
Merge pull request #56 from moritzpistauer/merge_to_master_repo
Browse files Browse the repository at this point in the history
Add Learning of Deterministic Context Free Grammars
  • Loading branch information
emuskardin authored Dec 13, 2023
2 parents 239c858 + fb4966a commit 26a85d0
Show file tree
Hide file tree
Showing 28 changed files with 3,267 additions and 1,867 deletions.
260 changes: 260 additions & 0 deletions Benchmarking/vpa_benchmarking/benchmark_vpa.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,260 @@
from collections import defaultdict

import matplotlib.pyplot as plt
import numpy as np
import pickle

from aalpy.SULs.AutomataSUL import SevpaSUL, VpaSUL, DfaSUL
from aalpy.automata import SevpaAlphabet
from aalpy.learning_algs import run_KV
from aalpy.oracles import RandomWordEqOracle
from aalpy.utils import generate_random_sevpa, visualize_automaton
from aalpy.utils.BenchmarkVpaModels import *


def state_increasing():
print("Benchmarking for increasing state size")
max_number_states = 100
step_size = 10
repeats = 10

cex_processing = ['rs', 'linear_fwd', 'linear_bwd', 'exponential_fwd', 'exponential_bwd']
# cex_processing = ['rs']
data_dict = defaultdict(tuple)

for cex in cex_processing:
states_data_median = []
query_data_median = []
for number_states in range(10, max_number_states + 1, step_size):
print(number_states)
states_data = []
query_data = []
for x in range(repeats):
random_svepa = generate_random_sevpa(num_states=number_states, internal_alphabet_size=3,
call_alphabet_size=3,
return_alphabet_size=3,
acceptance_prob=0.4,
return_transition_prob=0.5)

alphabet = random_svepa.input_alphabet

sul = SevpaSUL(random_svepa)

eq_oracle = RandomWordEqOracle(alphabet=alphabet.get_merged_alphabet(), sul=sul, num_walks=10000,
min_walk_len=10, max_walk_len=30)

model, data = run_KV(alphabet=alphabet, sul=sul, eq_oracle=eq_oracle, automaton_type='vpa',
print_level=0, cex_processing=cex, return_data=True)
states_data.append(number_states)
query_data.append(data['queries_learning'])

states_data_median.append(np.median(states_data))
query_data_median.append(np.median(query_data))

data_dict[cex] = (states_data_median, query_data_median)

# Save data_dict to a pickle file
with open('state_increasing.pickle', 'wb') as file:
pickle.dump(data_dict, file)

# plot
plt.figure()
plt.xlabel('Number of states')
plt.ylabel('Number of membership queries')
plt.title('Query growth of a random SEVPA with increasing state size')
for key in data_dict:
plt.plot(data_dict[key][0], data_dict[key][1], label=key)
plt.legend()
plt.savefig('state_increasing.png')


def alphabet_increasing():
print("Benchmarking for increasing alphabet size")
repeats = 10
max_alphabet_size = 15

cex_processing = ['rs', 'linear_fwd', 'linear_bwd', 'exponential_fwd', 'exponential_bwd']
# cex_processing = ['rs']
data_dict = defaultdict(tuple)

for cex in cex_processing:
states_data_median = []
query_data_median = []
for alphabet_size in range(1, max_alphabet_size):
print(alphabet_size)
for x in range(repeats):
random_svepa = generate_random_sevpa(num_states=100, internal_alphabet_size=alphabet_size,
call_alphabet_size=alphabet_size,
return_alphabet_size=alphabet_size,
acceptance_prob=0.4,
return_transition_prob=0.5)

alphabet = random_svepa.input_alphabet

sul = SevpaSUL(random_svepa)

eq_oracle = RandomWordEqOracle(alphabet=alphabet.get_merged_alphabet(), sul=sul, num_walks=10000,
min_walk_len=10, max_walk_len=30)

states_data = []
query_data = []
model, data = run_KV(alphabet=alphabet, sul=sul, eq_oracle=eq_oracle, automaton_type='vpa',
print_level=0, cex_processing=cex, return_data=True)
states_data.append(alphabet_size * 3)
query_data.append(data['queries_learning'])

states_data_median.append(np.median(states_data))
query_data_median.append(np.median(query_data))

data_dict[cex] = (states_data_median, query_data_median)

# Save data_dict to a pickle file
with open('alphabet_increasing.pickle', 'wb') as file:
pickle.dump(data_dict, file)

# plot
plt.figure()
plt.xlabel('Size of the input alphabet')
plt.ylabel('Number of membership queries')
plt.title('Query growth of a random SEVPA with increasing alphabet size')
for key in data_dict:
plt.plot(data_dict[key][0], data_dict[key][1], label=key)
plt.legend()
plt.savefig('alphabet_increasing.png')


def alphabet_increasing_variable():
print("Benchmarking for variably increasing alphabet size")
repeats = 10
max_alphabet_size = 15

data_dict = defaultdict(tuple)
alphabet_types = ['int', 'call', 'ret']

for alphabet_type in alphabet_types:
states_data_median = []
query_data_median = []
for alphabet_size in range(1, max_alphabet_size):
print(alphabet_size)
for x in range(repeats):
if alphabet_type == 'int':
random_svepa = generate_random_sevpa(num_states=100, internal_alphabet_size=alphabet_size,
call_alphabet_size=1,
return_alphabet_size=1,
acceptance_prob=0.4,
return_transition_prob=0.5)
elif alphabet_type == 'call':
random_svepa = generate_random_sevpa(num_states=100, internal_alphabet_size=alphabet_size,
call_alphabet_size=1,
return_alphabet_size=1,
acceptance_prob=0.4,
return_transition_prob=0.5)
elif alphabet_type == 'ret':
random_svepa = generate_random_sevpa(num_states=100, internal_alphabet_size=alphabet_size,
call_alphabet_size=1,
return_alphabet_size=1,
acceptance_prob=0.4,
return_transition_prob=0.5)

alphabet = random_svepa.input_alphabet

sul = SevpaSUL(random_svepa)

eq_oracle = RandomWordEqOracle(alphabet=alphabet.get_merged_alphabet(), sul=sul, num_walks=10000,
min_walk_len=10, max_walk_len=30)

states_data = []
query_data = []
model, data = run_KV(alphabet=alphabet, sul=sul, eq_oracle=eq_oracle, automaton_type='vpa',
print_level=0, cex_processing='rs', return_data=True)
states_data.append(alphabet_size)
query_data.append(data['queries_learning'])

states_data_median.append(np.median(states_data))
query_data_median.append(np.median(query_data))

data_dict[alphabet_type] = (states_data_median, query_data_median)

# Save data_dict to a pickle file
with open('alphabet_increasing_variable.pickle', 'wb') as file:
pickle.dump(data_dict, file)

# plot
plt.figure()
plt.xlabel('Size of the input alphabet')
plt.ylabel('Number of membership queries')
plt.title('Query growth of a random SEVPA with increasing alphabet size')
for key in data_dict:
plt.plot(data_dict[key][0], data_dict[key][1], label=key)
plt.legend()
plt.savefig('alphabet_increasing_variable.png')


def benchmark_vpa_dfa():
max_learning_rounds = 100
data_dict = defaultdict(tuple)
label_data = []

for i, vpa in enumerate(
[vpa_for_L1(), vpa_for_L2(), vpa_for_L3(), vpa_for_L4(), vpa_for_L5(), vpa_for_L7(), vpa_for_L8(),
vpa_for_L9(), vpa_for_L10(), vpa_for_L11(), vpa_for_L12(), vpa_for_L13(), vpa_for_L14(), vpa_for_L15()]):
print(f'VPA {i + 1 if i < 6 else i + 2}')
label_data.append(f'VPA {i + 1 if i < 6 else i + 2}')

model_under_learning = vpa

alphabet_sevpa = SevpaAlphabet(list(model_under_learning.internal_set),
list(model_under_learning.call_set),
list(model_under_learning.return_set))

alphabet_dfa = model_under_learning.input_alphabet.get_merged_alphabet()

sul_vpa = VpaSUL(vpa)
sul_dfa = DfaSUL(vpa)

eq_oracle_vpa = RandomWordEqOracle(alphabet=alphabet_sevpa.get_merged_alphabet(), sul=sul_vpa, num_walks=10000,
min_walk_len=10, max_walk_len=30)
eq_oracle_dfa = RandomWordEqOracle(alphabet=alphabet_sevpa.get_merged_alphabet(), sul=sul_vpa, num_walks=10000,
min_walk_len=10, max_walk_len=30)

model_vpa, data_vpa = run_KV(alphabet=alphabet_sevpa, sul=sul_vpa, eq_oracle=eq_oracle_vpa, automaton_type='vpa',
print_level=0, cex_processing='rs', return_data=True,
max_learning_rounds=max_learning_rounds)

model_dfa, data_dfa = run_KV(alphabet=alphabet_dfa, sul=sul_dfa, eq_oracle=eq_oracle_dfa, automaton_type='dfa',
print_level=0, cex_processing='rs', return_data=True,
max_learning_rounds=max_learning_rounds)

print(data_dfa['queries_learning'])

data_dict[vpa] = (data_vpa['queries_learning'], data_dfa['queries_learning'])


# Save data_dict to a pickle file
with open('benchmark_vpa_dfa.pickle', 'wb') as file:
pickle.dump(data_dict, file)

#plotting
keys = list(data_dict.keys())
values = list(data_dict.values())
data1, data2 = zip(*values)

# Creating bar graph
bar_width = 0.35
index = np.arange(len(keys))
plt.bar(index, data1, bar_width, label='Data VPA', align='center')
plt.bar(index + bar_width, data2, bar_width, label='Data DFA', align='center')

plt.xlabel('VPA Instances')
plt.ylabel('Number of Queries')
plt.title('Bar Graph of Queries for VPA and DFA')
plt.xticks(index + bar_width / 2, label_data)
plt.legend()
plt.show()


# choose which benchmark to execute
state_increasing()
alphabet_increasing()
alphabet_increasing_variable()
benchmark_vpa_dfa()
119 changes: 119 additions & 0 deletions Examples.py
Original file line number Diff line number Diff line change
Expand Up @@ -936,3 +936,122 @@ def compare_stochastic_and_non_deterministic_learning(example='first_grid'):

print(model_type)
print('Error for each property:', [round(d * 100, 2) for d in diff.values()])


def learning_context_free_grammar_example():
from aalpy.automata import SevpaAlphabet
from aalpy.learning_algs import run_KV
from aalpy.oracles import RandomWordEqOracle
from aalpy.utils.BenchmarkSULs import get_balanced_string_sul

call_return_map = {'(': ')', '[': ']'}

sevpa_alphabet = SevpaAlphabet([], list(call_return_map.keys()), list(call_return_map.values()))

# bounded deterministic approximation
balanced_string_sul = get_balanced_string_sul(call_return_map, allow_empty_string=False)
eq_oracle = RandomWordEqOracle(sevpa_alphabet.get_merged_alphabet(), balanced_string_sul, num_walks=1000,
min_walk_len=5, max_walk_len=30)

learned_deterministic_approximation = run_KV(sevpa_alphabet.get_merged_alphabet(),
balanced_string_sul, eq_oracle, automaton_type='dfa',
max_learning_rounds=20)

balanced_string_sul = get_balanced_string_sul(call_return_map, allow_empty_string=False)
eq_oracle = RandomWordEqOracle(sevpa_alphabet.get_merged_alphabet(), balanced_string_sul, num_walks=1000,
min_walk_len=5, max_walk_len=30)
learned_model = run_KV(sevpa_alphabet, balanced_string_sul, eq_oracle, automaton_type='vpa')
learned_model.visualize()


def arithmetic_expression_sevpa_learning():
from aalpy.base import SUL
from aalpy.automata import SevpaAlphabet
from aalpy.oracles import RandomWordEqOracle
from aalpy.learning_algs import run_KV
import warnings
warnings.filterwarnings("ignore")

class ArithmeticSUL(SUL):
def __init__(self):
super().__init__()
self.string_under_test = ''

def pre(self):
self.string_under_test = ''

def post(self):
pass

def step(self, letter):
if letter:
self.string_under_test += ' ' + letter

try:
eval(self.string_under_test)
return True
except (SyntaxError, TypeError):
return False

sul = ArithmeticSUL()

alphabet = SevpaAlphabet(internal_alphabet=['1', '+'], call_alphabet=['('], return_alphabet=[')'])

eq_oracle = RandomWordEqOracle(alphabet.get_merged_alphabet(), sul, min_walk_len=5,
max_walk_len=20, num_walks=20000)

learned_model = run_KV(alphabet, sul, eq_oracle, automaton_type='vpa')
learned_model.visualize()


def benchmark_sevpa_learning():
from aalpy.SULs import SevpaSUL
from aalpy.oracles import RandomWordEqOracle
from aalpy.learning_algs import run_KV
from aalpy.utils.BenchmarkSevpaModels import sevpa_for_L1, sevpa_for_L2, sevpa_for_L11, sevpa_for_L12, sevpa_for_L14

models = [sevpa_for_L1(), sevpa_for_L2(), sevpa_for_L11(), sevpa_for_L12(), sevpa_for_L14()]

for inx, model in enumerate(models):

alphabet = model.get_input_alphabet()

sul = SevpaSUL(model)

if inx == 4:
alphabet.exclusive_call_return_pairs = {'(': ')', '[': ']'}

eq_oracle = RandomWordEqOracle(alphabet=alphabet.get_merged_alphabet(), sul=sul, num_walks=10000,
min_walk_len=10, max_walk_len=30)

learned_model = run_KV(alphabet=alphabet, sul=sul, eq_oracle=eq_oracle, automaton_type='vpa',
print_level=2, cex_processing='rs')

print(learned_model.get_random_accepting_word())


def random_sevpa_learning():
from aalpy.SULs import SevpaSUL
from aalpy.oracles import RandomWordEqOracle
from aalpy.learning_algs import run_KV
from aalpy.utils import generate_random_sevpa

random_svepa = generate_random_sevpa(num_states=50, internal_alphabet_size=3,
call_alphabet_size=3,
return_alphabet_size=3,
acceptance_prob=0.4,
return_transition_prob=0.5)

# from aalpy.utils.BenchmarkVpaModels import vpa_for_L11
# balanced_parentheses = vpa_for_L11()

alphabet = random_svepa.input_alphabet

sul = SevpaSUL(random_svepa)

eq_oracle = RandomWordEqOracle(alphabet=alphabet.get_merged_alphabet(), sul=sul, num_walks=10000,
min_walk_len=10, max_walk_len=30)

model = run_KV(alphabet=alphabet, sul=sul, eq_oracle=eq_oracle, automaton_type='vpa',
print_level=2, cex_processing='rs')

Loading

0 comments on commit 26a85d0

Please sign in to comment.