3 ## sexpr.py - by Yusuke Shinyama
8 from abstfilter import AbstractFeeder, AbstractFilter, AbstractConsumer
13 class SExprReader(AbstractFilter):
16 reader = SExprReader(consumer)
17 reader.feed("(this is (sexpr))")
29 def __init__(self, next_filter,
30 comment_begin=COMMENT_BEGIN,
31 comment_end=COMMENT_END,
33 paren_begin=PAREN_BEGIN,
37 AbstractFilter.__init__(self, next_filter)
38 self.comment_begin = comment_begin
39 self.comment_end = comment_end
40 self.separator = separator
41 self.paren_begin = paren_begin
42 self.paren_end = paren_end
45 self.special = comment_begin + separator + paren_begin + paren_end + quote + escape
49 # SExprReader ignores any error and
50 # try to continue as long as possible.
51 # if you want to throw exception however,
52 # please modify these methods.
54 # called if redundant parantheses are found.
55 def illegal_close_paren(self, i):
56 print "Ignore a close parenthesis: %d" % i
58 # called if it reaches the end-of-file while the stack is not empty.
59 def premature_eof(self, i, x):
60 print "Premature end of file: %d parens left, partial=%s" % (i, x)
63 # reset the internal states.
65 self.incomment = False # if within a comment.
66 self.inquote = False # if within a quote.
67 self.inescape = False # if within a escape.
68 self.sym = '' # partially constructed symbol.
69 # NOTICE: None != nil (an empty list)
70 self.build = None # partially constructed list.
71 self.build_stack = [] # to store a chain of partial lists.
75 def feed(self, tokens):
76 for (i,c) in enumerate(tokens):
78 # within a comment - skip
79 self.incomment = (c not in self.comment_end)
80 elif self.inescape or (c not in self.special):
81 # add to the current working symbol
84 elif c in self.escape:
87 elif self.inquote and (c not in self.quote):
90 # special character (blanks, parentheses, or comment)
92 # close the current symbol
93 if self.build == None:
94 self.feed_next(self.sym)
96 self.build.append(self.sym)
98 if c in self.comment_begin:
100 self.incomment = True
101 elif c in self.quote:
103 self.inquote = not self.inquote
104 elif c in self.paren_begin:
105 # beginning a new list.
106 self.build_stack.append(self.build)
108 if self.build == None:
109 # begin from a scratch.
112 # begin from the end of the current list.
113 self.build.append(empty)
115 elif c in self.paren_end:
116 # terminating the current list
117 if self.build == None:
118 # there must be a working list.
119 self.illegal_close_paren(i)
121 if len(self.build_stack) == 1:
122 # current working list is the last one in the stack.
123 self.feed_next(self.build)
124 self.build = self.build_stack.pop()
129 # a working list should not exist.
130 if self.build != None:
131 # error - still try to construct a partial structure.
133 self.build.append(self.sym)
135 if len(self.build_stack) == 1:
138 x = self.build_stack[1]
140 self.build_stack = []
141 self.premature_eof(len(self.build_stack), x)
143 # flush the current working symbol.
144 self.feed_next(self.sym)
150 AbstractFilter.close(self)
157 class SExprIllegalClosingParenError(ValueError):
158 """It throws an exception with an ill-structured input."""
160 class SExprPrematureEOFError(ValueError):
162 class StrictSExprReader(SExprReader):
163 def illegal_close_paren(self, i):
164 raise SExprIllegalClosingParenError(i)
165 def premature_eof(self, i, x):
166 raise SExprPrematureEOFError(i, x)
171 class _SExprStrConverter(AbstractConsumer):
174 _SExprStrConverter.results.append(s)
176 _str_converter = SExprReader(_SExprStrConverter())
177 _str_converter_strict = StrictSExprReader(_SExprStrConverter())
180 """parse a string as a sexpr."""
181 _SExprStrConverter.results = []
182 _str_converter.reset().feed(s).terminate()
183 return _SExprStrConverter.results
184 def str2sexpr_strict(s):
185 """parse a string as a sexpr."""
186 _SExprStrConverter.results = []
187 _str_converter_strict.reset().feed(s).terminate()
188 return _SExprStrConverter.results
194 """convert a sexpr into Lisp-like representation."""
195 if not isinstance(e, list):
197 return "("+" ".join(map(sexpr2str, e))+")"
202 assert str2sexpr("(this ;comment\n is (a test (sentences) (des()) (yo)))") == \
203 [["this", "is", ["a", "test", ["sentences"], ["des", []], ["yo"]]]]
204 assert str2sexpr('''(paren\\(\\)theses_in\\#symbol "space in \nsymbol"
205 this\\ way\\ also. "escape is \\"better than\\" quote")''') == \
206 [['paren()theses_in#symbol', 'space in \nsymbol', 'this way also.', 'escape is "better than" quote']]
207 str2sexpr("(this (is (a (parial (sentence")
212 if __name__ == "__main__":