implemented newer version of readability-module

This commit is contained in:
2013-04-15 23:33:24 +02:00
parent 8eb6ed2a6d
commit b6d1e705f0
8 changed files with 797 additions and 234 deletions

25
readability/debug.py Normal file
View File

@@ -0,0 +1,25 @@
def save_to_file(text, filename):
f = open(filename, 'wt')
f.write('<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />')
f.write(text.encode('utf-8'))
f.close()
uids = {}
def describe(node, depth=2):
if not hasattr(node, 'tag'):
return "[%s]" % type(node)
name = node.tag
if node.get('id', ''): name += '#'+node.get('id')
if node.get('class', ''):
name += '.' + node.get('class').replace(' ','.')
if name[:4] in ['div#', 'div.']:
name = name[3:]
if name in ['tr', 'td', 'div', 'p']:
if not node in uids:
uid = uids[node] = len(uids)+1
else:
uid = uids.get(node)
name += "%02d" % (uid)
if depth and node.getparent() is not None:
return name+' - '+describe(node.getparent(), depth-1)
return name