]>
Commit | Line | Data |
---|---|---|
e9647132 PI |
1 | #!/usr/bin/env python |
2 | ## | |
3 | ## sexpr.py - by Yusuke Shinyama | |
4 | ## | |
5 | ## * public domain * | |
6 | ## | |
9d9c1ae1 PI |
7 | ## from http://www.unixuser.org/~euske/python/index.html: |
8 | ## The following files are in public domain except where otherwise noted. THESE FILES COME WITH ABSOLUTELY NO WARRANTY. | |
e9647132 PI |
9 | |
10 | from abstfilter import AbstractFeeder, AbstractFilter, AbstractConsumer | |
11 | ||
12 | ||
13 | ## SExprReader | |
14 | ## | |
15 | class SExprReader(AbstractFilter): | |
16 | """Usage: | |
17 | ||
18 | reader = SExprReader(consumer) | |
19 | reader.feed("(this is (sexpr))") | |
20 | reader.close() | |
21 | """ | |
22 | ||
23 | COMMENT_BEGIN = ";" | |
24 | COMMENT_END = "\n" | |
25 | SEPARATOR = " \t\n" | |
26 | PAREN_BEGIN = "(" | |
27 | PAREN_END = ")" | |
28 | QUOTE = '"' | |
29 | ESCAPE = "\\" | |
30 | ||
31 | def __init__(self, next_filter, | |
32 | comment_begin=COMMENT_BEGIN, | |
33 | comment_end=COMMENT_END, | |
34 | separator=SEPARATOR, | |
35 | paren_begin=PAREN_BEGIN, | |
36 | paren_end=PAREN_END, | |
37 | quote=QUOTE, | |
38 | escape=ESCAPE): | |
39 | AbstractFilter.__init__(self, next_filter) | |
40 | self.comment_begin = comment_begin | |
41 | self.comment_end = comment_end | |
42 | self.separator = separator | |
43 | self.paren_begin = paren_begin | |
44 | self.paren_end = paren_end | |
45 | self.quote = quote | |
46 | self.escape = escape | |
47 | self.special = comment_begin + separator + paren_begin + paren_end + quote + escape | |
48 | self.reset() | |
49 | return | |
50 | ||
51 | # SExprReader ignores any error and | |
52 | # try to continue as long as possible. | |
53 | # if you want to throw exception however, | |
54 | # please modify these methods. | |
55 | ||
56 | # called if redundant parantheses are found. | |
57 | def illegal_close_paren(self, i): | |
58 | print "Ignore a close parenthesis: %d" % i | |
59 | return | |
60 | # called if it reaches the end-of-file while the stack is not empty. | |
61 | def premature_eof(self, i, x): | |
62 | print "Premature end of file: %d parens left, partial=%s" % (i, x) | |
63 | return | |
64 | ||
65 | # reset the internal states. | |
66 | def reset(self): | |
67 | self.incomment = False # if within a comment. | |
68 | self.inquote = False # if within a quote. | |
69 | self.inescape = False # if within a escape. | |
70 | self.sym = '' # partially constructed symbol. | |
71 | # NOTICE: None != nil (an empty list) | |
72 | self.build = None # partially constructed list. | |
73 | self.build_stack = [] # to store a chain of partial lists. | |
74 | return self | |
75 | ||
76 | # analyze strings | |
77 | def feed(self, tokens): | |
78 | for (i,c) in enumerate(tokens): | |
79 | if self.incomment: | |
80 | # within a comment - skip | |
81 | self.incomment = (c not in self.comment_end) | |
82 | elif self.inescape or (c not in self.special): | |
83 | # add to the current working symbol | |
84 | self.sym += c | |
85 | self.inescape = False | |
86 | elif c in self.escape: | |
87 | # escape | |
88 | self.inescape = True | |
89 | elif self.inquote and (c not in self.quote): | |
90 | self.sym += c | |
91 | else: | |
92 | # special character (blanks, parentheses, or comment) | |
93 | if self.sym: | |
94 | # close the current symbol | |
95 | if self.build == None: | |
96 | self.feed_next(self.sym) | |
97 | else: | |
98 | self.build.append(self.sym) | |
99 | self.sym = '' | |
100 | if c in self.comment_begin: | |
101 | # comment | |
102 | self.incomment = True | |
103 | elif c in self.quote: | |
104 | # quote | |
105 | self.inquote = not self.inquote | |
106 | elif c in self.paren_begin: | |
107 | # beginning a new list. | |
108 | self.build_stack.append(self.build) | |
109 | empty = [] | |
110 | if self.build == None: | |
111 | # begin from a scratch. | |
112 | self.build = empty | |
113 | else: | |
114 | # begin from the end of the current list. | |
115 | self.build.append(empty) | |
116 | self.build = empty | |
117 | elif c in self.paren_end: | |
118 | # terminating the current list | |
119 | if self.build == None: | |
120 | # there must be a working list. | |
121 | self.illegal_close_paren(i) | |
122 | else: | |
123 | if len(self.build_stack) == 1: | |
124 | # current working list is the last one in the stack. | |
125 | self.feed_next(self.build) | |
126 | self.build = self.build_stack.pop() | |
127 | return self | |
128 | ||
129 | # terminate | |
130 | def terminate(self): | |
131 | # a working list should not exist. | |
132 | if self.build != None: | |
133 | # error - still try to construct a partial structure. | |
134 | if self.sym: | |
135 | self.build.append(self.sym) | |
136 | self.sym = '' | |
137 | if len(self.build_stack) == 1: | |
138 | x = self.build | |
139 | else: | |
140 | x = self.build_stack[1] | |
141 | self.build = None | |
142 | self.build_stack = [] | |
143 | self.premature_eof(len(self.build_stack), x) | |
144 | elif self.sym: | |
145 | # flush the current working symbol. | |
146 | self.feed_next(self.sym) | |
147 | self.sym = '' | |
148 | return self | |
149 | ||
150 | # closing. | |
151 | def close(self): | |
152 | AbstractFilter.close(self) | |
153 | self.terminate() | |
154 | return | |
155 | ||
156 | ||
157 | ## StrictSExprReader | |
158 | ## | |
159 | class SExprIllegalClosingParenError(ValueError): | |
160 | """It throws an exception with an ill-structured input.""" | |
161 | pass | |
162 | class SExprPrematureEOFError(ValueError): | |
163 | pass | |
164 | class StrictSExprReader(SExprReader): | |
165 | def illegal_close_paren(self, i): | |
166 | raise SExprIllegalClosingParenError(i) | |
167 | def premature_eof(self, i, x): | |
168 | raise SExprPrematureEOFError(i, x) | |
169 | ||
170 | ||
171 | ## str2sexpr | |
172 | ## | |
173 | class _SExprStrConverter(AbstractConsumer): | |
174 | results = [] | |
175 | def feed(self, s): | |
176 | _SExprStrConverter.results.append(s) | |
177 | return | |
178 | _str_converter = SExprReader(_SExprStrConverter()) | |
179 | _str_converter_strict = StrictSExprReader(_SExprStrConverter()) | |
180 | ||
181 | def str2sexpr(s): | |
182 | """parse a string as a sexpr.""" | |
183 | _SExprStrConverter.results = [] | |
184 | _str_converter.reset().feed(s).terminate() | |
185 | return _SExprStrConverter.results | |
186 | def str2sexpr_strict(s): | |
187 | """parse a string as a sexpr.""" | |
188 | _SExprStrConverter.results = [] | |
189 | _str_converter_strict.reset().feed(s).terminate() | |
190 | return _SExprStrConverter.results | |
191 | ||
192 | ||
193 | ## sexpr2str | |
194 | ## | |
195 | def sexpr2str(e): | |
196 | """convert a sexpr into Lisp-like representation.""" | |
197 | if not isinstance(e, list): | |
198 | return e | |
199 | return "("+" ".join(map(sexpr2str, e))+")" | |
200 | ||
201 | ||
202 | # test stuff | |
203 | def test(): | |
204 | assert str2sexpr("(this ;comment\n is (a test (sentences) (des()) (yo)))") == \ | |
205 | [["this", "is", ["a", "test", ["sentences"], ["des", []], ["yo"]]]] | |
206 | assert str2sexpr('''(paren\\(\\)theses_in\\#symbol "space in \nsymbol" | |
207 | this\\ way\\ also. "escape is \\"better than\\" quote")''') == \ | |
208 | [['paren()theses_in#symbol', 'space in \nsymbol', 'this way also.', 'escape is "better than" quote']] | |
209 | str2sexpr("(this (is (a (parial (sentence") | |
210 | return | |
211 | ||
212 | ||
213 | # main | |
214 | if __name__ == "__main__": | |
215 | test() |