-
Notifications
You must be signed in to change notification settings - Fork 14
Expand file tree
/
Copy pathProbabilisticGenerator.py
More file actions
177 lines (149 loc) · 6.5 KB
/
ProbabilisticGenerator.py
File metadata and controls
177 lines (149 loc) · 6.5 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
# -*- coding: utf-8 -*-
#/usr/bin/python
# @copyright: MIT License
# Copyright (c) 2018 syntactic (Pastèque Ho)
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
# @summary: This file generates sentences from a PCFG in JSGF. Run it by entering
# in the command line: python ProbabilisticGenerator.py <grammarFile> <numStrings>
# where <grammarFile> is the path of the JSGF file, and <numString> is the number
# of strings you want to generate
# @since: 2014/06/02
"""
This file probabilistically generates strings from a JSGF grammar. It takes advantage \
of weights assigned to alternatives (separated by pipes) by choosing to \
expand higher weighted alternatives with greater probability. For sets of \
alternatives without weights, each alternative is equally likely to be \
expanded. For optional groups, the elements in the group have a 50% chance \
of being expanded.
It requires two arguments: the path to the JSGF\
Grammar file, and the number of strings to generate. You can run this on the \
included grammar Ideas.gram:\
``python ProbabilisticGenerator.py Ideas.gram 20``
This will generate 20 sentences based on the public rule(s) in Ideas.gram, using the \
weights if they are provided.
"""
import sys, itertools, random, bisect, argparse
import JSGFParser as parser
import JSGFGrammar as gram
def weightedChoice(listOfTuples):
"""
Chooses an element of a list based on its weight
:param listOfTuples: a list of (element, weight) tuples, where the element can be a JSGF expression object, string, or list,\
and the weight is a float
:returns: the first element of a chosen tuple
"""
def accum(listOfWeights): # support function for creating ranges for weights
for i in range(len(listOfWeights)):
if i > 0:
listOfWeights[i] += listOfWeights[i-1]
return listOfWeights
choices, weights = zip(*listOfTuples)
cumdist = accum(list(weights))
x = random.random() * cumdist[-1]
return choices[bisect.bisect(cumdist, x)]
def combineSets(listOfSets):
"""
Combines sets of strings by taking the cross product of the sets and \
concatenating the elements in the resulting tuples
:param listOfSets: 2-D list of strings
:returns: a list of strings
"""
totalCrossProduct = ['']
for i in range(len(listOfSets)):
currentProduct = []
for crossProduct in itertools.product(totalCrossProduct, listOfSets[i]):
#print crossProduct[0], crossProduct[1]
currentProduct.append((crossProduct[0].strip() + ' ' + crossProduct[1].strip()).strip())
totalCrossProduct = currentProduct
return totalCrossProduct
def processSequence(seq):
"""
Combines adjacent elements in a sequence.
"""
componentSets = []
for component in seq:
expandedComponent = processRHS(component).strip()
if len(expandedComponent) > 0:
componentSets.append(expandedComponent)
return ' '.join(componentSets)
def processNonTerminal(nt):
"""
Finds the rule expansion for a nonterminal and returns its expansion.
"""
return processRHS(grammar.getRHS(nt))
def processDisjunction(disj):
"""
Chooses either a random disjunct (for alternatives without weights) or
a disjunct based on defined weights.
"""
if type(disj.disjuncts[0]) is tuple:
return processRHS(weightedChoice(disj.disjuncts))
else:
return processRHS(random.choice(disj.disjuncts))
def processOptional(opt):
"""
Processes the optional element 50% of the time, skips it the other 50% of the time
"""
rand = random.random()
if rand <= 0.5:
return ''
else:
return processRHS(opt.option)
def processRHS(rhs):
if type(rhs) is list:
return processSequence(rhs)
elif isinstance(rhs, gram.Disjunction):
return processDisjunction(rhs)
elif isinstance(rhs, gram.Optional):
return processOptional(rhs)
elif isinstance(rhs, gram.NonTerminal):
return processNonTerminal(rhs)
elif isinstance(rhs, str):
return rhs
def main():
"""Main function for command line usage"""
global grammar
argParser = argparse.ArgumentParser(description='Generate random strings from a JSGF grammar')
argParser.add_argument('grammarFile', help='Path to the JSGF grammar file')
argParser.add_argument('iterations', type=int, help='Number of strings to generate')
try:
args = argParser.parse_args()
except SystemExit:
return
try:
with open(args.grammarFile, 'r') as fileStream:
grammar = parser.getGrammarObject(fileStream)
if len(grammar.publicRules) > 1:
# Multiple public rules - create a disjunction of all of them
disjuncts = [rule.rhs for rule in grammar.publicRules]
newStartSymbol = gram.Disjunction(disjuncts)
for i in range(args.iterations):
print(processRHS(newStartSymbol))
else:
# Single public rule
startSymbol = grammar.publicRules[0]
for i in range(args.iterations):
expansions = processRHS(startSymbol.rhs)
print(expansions)
except FileNotFoundError:
print(f"Error: Grammar file '{args.grammarFile}' not found")
sys.exit(1)
except Exception as e:
print(f"Error processing grammar: {e}")
sys.exit(1)
if __name__ == '__main__':
main()