diff options
author | Mike Crute <mcrute@gmail.com> | 2009-12-20 20:39:46 -0500 |
---|---|---|
committer | Mike Crute <mcrute@gmail.com> | 2009-12-20 20:39:46 -0500 |
commit | af2aec4837157eb45078896efa6a472aa96bb084 (patch) | |
tree | cb6d30e28cf268cb8828649e8cde56f040224cd5 | |
download | code_mining-master.tar.bz2 code_mining-master.tar.xz code_mining-master.zip |
-rw-r--r-- | library_digger.py | 244 | ||||
-rw-r--r-- | test.py | 20 |
2 files changed, 264 insertions, 0 deletions
diff --git a/library_digger.py b/library_digger.py new file mode 100644 index 0000000..79054a1 --- /dev/null +++ b/library_digger.py | |||
@@ -0,0 +1,244 @@ | |||
1 | """ | ||
2 | __version__ | ||
3 | __author__ | ||
4 | __date__ | ||
5 | """ | ||
6 | |||
7 | PYTHON_PATH = "/Users/cruteme/Documents/Projects/ag_code/ag_python_lib/" | ||
8 | |||
9 | import compiler | ||
10 | from pprint import pprint as pretty_print | ||
11 | from compiler import visitor, consts, ast | ||
12 | |||
13 | class ASTVisitor(visitor.ASTVisitor, object): | ||
14 | """ | ||
15 | Lets make this a snazzy new-style class. | ||
16 | """ | ||
17 | pass | ||
18 | |||
19 | class ShallowASTVisitor(ASTVisitor): | ||
20 | """ | ||
21 | Shallow AST visitors operate only on modules and consider | ||
22 | only the first level of the tree (thus, they are shallow). | ||
23 | """ | ||
24 | |||
25 | def default(self, node, *args): | ||
26 | if isinstance(node, ast.Module): | ||
27 | node = node.node.nodes | ||
28 | else: | ||
29 | raise ValueError("Shallow visitor can only visit modules.") | ||
30 | |||
31 | for child in node: | ||
32 | self.dispatch(child, *args) | ||
33 | |||
34 | |||
35 | class ShallowSymbolVisitor(ShallowASTVisitor): | ||
36 | """ | ||
37 | Attempt to get all publically accessible module-level | ||
38 | symbols from a module. These are things that someone | ||
39 | could import. | ||
40 | |||
41 | NOTE: This doesn't consider imports, those are considered | ||
42 | to be virtual symbols and handled by a different visitor. | ||
43 | """ | ||
44 | |||
45 | def __init__(self): | ||
46 | self.public_symbols = set() | ||
47 | self.private_symbols = set() | ||
48 | self.protected_symbols = set() | ||
49 | self.magic_symbols = set() | ||
50 | ASTVisitor.__init__(self) | ||
51 | |||
52 | def visitClass(self, node): | ||
53 | self._put_in_set(node.name) | ||
54 | |||
55 | def visitAssign(self, node): | ||
56 | first_child = node.nodes[0] | ||
57 | |||
58 | if isinstance(first_child, ast.AssName): | ||
59 | self._put_in_set(first_child.name) | ||
60 | elif isinstance(first_child, ast.AssTuple): | ||
61 | for item in first_child.nodes: | ||
62 | self._put_in_set(item.name) | ||
63 | |||
64 | def visitFunction(self, node): | ||
65 | self._put_in_set(node.name) | ||
66 | |||
67 | def visitAssName(self, node): | ||
68 | if node.flags is consts.OP_DELETE: | ||
69 | self._remove_from_set(node.name) | ||
70 | |||
71 | def visitAssTuple(self, node): | ||
72 | for item in node.nodes: | ||
73 | if (isinstance(item, ast.AssName) and | ||
74 | item.flags is consts.OP_DELETE): | ||
75 | self._remove_from_set(item.name) | ||
76 | |||
77 | def _remove_from_set(self, symbol_name): | ||
78 | protection = get_protection_status(symbol_name) | ||
79 | |||
80 | try: | ||
81 | if protection is SYMBOL_PRIVATE: | ||
82 | self.private_symbols.remove(symbol_name) | ||
83 | elif protection is SYMBOL_MAGIC: | ||
84 | self.magic_symbols.remove(symbol_name) | ||
85 | elif protection is SYMBOL_PROTECTED: | ||
86 | self.protected_symbols.remove(symbol_name) | ||
87 | elif protection is SYMBOL_PUBLIC: | ||
88 | self.public_symbols.remove(symbol_name) | ||
89 | except KeyError: | ||
90 | """Some values might not exist because they came | ||
91 | from imports.""" | ||
92 | pass | ||
93 | |||
94 | def _put_in_set(self, symbol_name): | ||
95 | protection = get_protection_status(symbol_name) | ||
96 | |||
97 | if protection is SYMBOL_PRIVATE: | ||
98 | self.private_symbols.add(symbol_name) | ||
99 | elif protection is SYMBOL_MAGIC: | ||
100 | self.magic_symbols.add(symbol_name) | ||
101 | elif protection is SYMBOL_PROTECTED: | ||
102 | self.protected_symbols.add(symbol_name) | ||
103 | elif protection is SYMBOL_PUBLIC: | ||
104 | self.public_symbols.add(symbol_name) | ||
105 | |||
106 | @property | ||
107 | def all_symbols(self): | ||
108 | return (self.private_symbols | self.public_symbols | | ||
109 | self.protected_symbols | self.magic_symbols) | ||
110 | |||
111 | |||
112 | SYMBOL_PRIVATE = "SYMBOL_PRIVATE" | ||
113 | SYMBOL_PROTECTED = "SYMBOL_PROTECTED" | ||
114 | SYMBOL_PUBLIC = "SYMBOL_PUBLIC" | ||
115 | SYMBOL_MAGIC = "SYMBOL_MAGIC" | ||
116 | |||
117 | def get_protection_status(symbol_name): | ||
118 | if symbol_name.startswith("__") and not symbol_name.endswith("__"): | ||
119 | return SYMBOL_PRIVATE | ||
120 | elif symbol_name.startswith("__") and symbol_name.endswith("__"): | ||
121 | return SYMBOL_MAGIC | ||
122 | elif symbol_name.startswith("_"): | ||
123 | return SYMBOL_PROTECTED | ||
124 | else: | ||
125 | return SYMBOL_PUBLIC | ||
126 | |||
127 | |||
128 | class ImportVisitor(ASTVisitor, object): | ||
129 | |||
130 | def __init__(self): | ||
131 | self.symbols = {} | ||
132 | super(ImportVisitor, self).__init__() | ||
133 | |||
134 | def visitImport(self, node): | ||
135 | for name in node.names: | ||
136 | self.put_symbol(name[0]) | ||
137 | |||
138 | def visitFrom(self, node): | ||
139 | symbols = [] | ||
140 | for symbol, _ in node.names: | ||
141 | symbols.append(symbol) | ||
142 | self.put_symbol(node.modname, symbols) | ||
143 | |||
144 | def put_symbol(self, module, symbols=None): | ||
145 | if not symbols: | ||
146 | symbols = set() | ||
147 | |||
148 | if not isinstance(symbols, set): | ||
149 | symbols = set(symbols) | ||
150 | |||
151 | if module not in self.symbols: | ||
152 | self.symbols[module] = symbols | ||
153 | else: | ||
154 | self.symbols[module] |= symbols | ||
155 | |||
156 | @property | ||
157 | def modules(self): | ||
158 | return self.symbols.keys() | ||
159 | |||
160 | |||
161 | def get_exported_symbols(ast_tree): | ||
162 | virtual_symbols = get_virutal_symbols(ast_tree) | ||
163 | |||
164 | |||
165 | def get_real_symbols(ast_tree): | ||
166 | pass | ||
167 | |||
168 | |||
169 | def get_virtual_symbols(ast_tree): | ||
170 | """ | ||
171 | Virtual symbols are those symbols which are imported | ||
172 | by the module but not really defined by the module. | ||
173 | """ | ||
174 | import_visitor = ImportVisitor() | ||
175 | visitor.walk(ast_tree, import_visitor) | ||
176 | |||
177 | virtual_symbols = import_visitor.symbols.values() | ||
178 | virtual_symbols = set(flatten_nested_list(virtual_symbols)) | ||
179 | |||
180 | return virtual_symbols | ||
181 | |||
182 | |||
183 | def get_private_symbols(ast_tree): | ||
184 | pass | ||
185 | |||
186 | |||
187 | def flatten_nested_list(list_): | ||
188 | output = [] | ||
189 | |||
190 | for item in list_: | ||
191 | if isinstance(item, (set, tuple, list)): | ||
192 | output += flatten_nested_list(item) | ||
193 | else: | ||
194 | output.append(item) | ||
195 | |||
196 | return output | ||
197 | |||
198 | |||
199 | is_empty_set = lambda input_: input_ == set() | ||
200 | |||
201 | def warn_about_module(symbol_table): | ||
202 | required_tags = set(["__date__", "__author__", "__version__"]) | ||
203 | if not required_tags.issubset(symbol_table.magic_symbols): | ||
204 | print "*** Missing magic metainfo tags!" | ||
205 | |||
206 | if is_empty_set(symbol_table.public_symbols): | ||
207 | print "*** Module exports no pubilc non-virtual symbols!" | ||
208 | |||
209 | if (is_empty_set(symbol_table.public_symbols) and | ||
210 | not is_empty_set(symbol_table.virtual_symbols)): | ||
211 | print "*** Module exports only virtual symbols." | ||
212 | |||
213 | |||
214 | def main(): | ||
215 | #tree = compiler.parseFile(PYTHON_PATH + "app/view/widget/widget.py") | ||
216 | #tree = compiler.parseFile(PYTHON_PATH + "app/yhm/yhmcardorderpage.py") | ||
217 | tree = compiler.parseFile(PYTHON_PATH + "app/ag/beta/wire.py") | ||
218 | #tree = compiler.parseFile("/Library/Python/2.5/site-packages/SQLAlchemy-0.5.0beta1-py2.5.egg/sqlalchemy/__init__.py") | ||
219 | #tree = compiler.parseFile("/Users/cruteme/Desktop/test.py") | ||
220 | |||
221 | visitor_ = ShallowSymbolVisitor() | ||
222 | visitor_.virtual_symbols = get_virtual_symbols(tree) | ||
223 | visitor.walk(tree, visitor_) | ||
224 | |||
225 | print "Public Symbols:" | ||
226 | print visitor_.public_symbols | ||
227 | print "" | ||
228 | print "Private Symbols:" | ||
229 | print visitor_.private_symbols | ||
230 | print "" | ||
231 | print "Protected Symbols:" | ||
232 | print visitor_.protected_symbols | ||
233 | print "" | ||
234 | print "Magic Symbols:" | ||
235 | print visitor_.magic_symbols | ||
236 | print "" | ||
237 | print "Virtual Symbols:" | ||
238 | print visitor_.virtual_symbols | ||
239 | |||
240 | print "" | ||
241 | warn_about_module(visitor_) | ||
242 | |||
243 | if __name__ == "__main__": | ||
244 | main() | ||
@@ -0,0 +1,20 @@ | |||
1 | class Test(object): | ||
2 | pass | ||
3 | |||
4 | CONSTANT = "this" | ||
5 | variable = "that" | ||
6 | |||
7 | def my_function(): | ||
8 | pass | ||
9 | |||
10 | this, that, the_other = "test" | ||
11 | my = junk = "test" | ||
12 | |||
13 | del my | ||
14 | del this, that | ||
15 | |||
16 | (more, stuff) = (other, stuff) | ||
17 | |||
18 | __private = "this is private" | ||
19 | _protected = "this is protected" | ||
20 | __magic__ = "this is magic" | ||