话说前端果然是不行啊。。总是被要求抄某个网站的样式什么的,于是总会发现css多了好多,一条条的去除又太麻烦了。于是昨天抽空写了个脚本来处理,现在还很简陋,输入一个html,一个css文件,把css中有用的标出来写入另一个文件。本来还想再处理一下代码,不过要出去玩了,就先丢上来吧。大致的思路是分别解析css文件和html文件,再遍历html节点,和css规则相匹配。
#!/usr/bin/python
#encoding:utf8
import re
import copy
import BeautifulSoup as bs
html_file_name = 'result.html'
css_file_name = 'mobile.css'
class css_rule:
selecters = []
raw = ''
style = ''
def __init__(self, selecters, raw, style):
self.selecters = selecters
self.raw = raw
self.style = style
def print_css(css_rule):
if css_rule['used'] > 0:
for attr in ['selecters', 'raw', 'style']:
print "%s : %s \n" % (attr, getattr(css_rule['rule'], attr))
print "used : %d" % css_rule['used']
class html_tag:
myclass = ''
name = ''
myid = ''
def __init__(self, myclass, name, myid):
self.myclass = myclass
self.name = name
self.myid = myid
def walk_html(tag):
if not isinstance(tag, bs.NavigableString):
yield tag
for i in tag.contents:
for tmp in walk_html(i):
yield tmp
def get_format_node(tag):
return html_tag(tag.get('class', ''), tag.name, tag.get('id', ''))
def rule_match(format_nodes, selecters):
if not selecters:
return True
if not format_nodes:
return False
n = format_nodes.pop(0)
s = selecters.pop(-1)
if node_match_selecter(n, s):
return rule_match(format_nodes, selecters)
else:
selecters.append(s)
return rule_match(format_nodes, selecters)
def node_match_selecter(node, selecter):
name = re.findall('^(\w*)', selecter)
if name:name = name[0]
if name and name != node.name:
return False
class_name = re.findall('\.([\w]+)', selecter)
if class_name: class_name = class_name[0]
if class_name and class_name not in node.myclass.split(' '):
return False
id_name = re.findall('#([\w]+)', selecter)
if id_name: id_name = id_name[0]
if id_name and id_name != node.myid:
return False
return True
def print_node(node):
for attr in ['myclass', 'myid', 'name']:
print "%s : %s \n" % (attr, getattr(node, attr))
css_list = []
with open(css_file_name) as css_file:
css_content = re.sub('/\*[\d\D]+?\*\/', '', css_file.read().lower())
css_re = re.compile('([\d\D]+?)\{([\d\D]+?)\}')
for (selecter, style) in css_re.findall(css_content):
selecter = re.sub('[\n\t]', '', selecter)
style = re.sub('[\n\t]', '', style)
selecter = re.sub('\s*,\s*', ',', selecter)
parent_list = selecter.split(' ')
#print parent_list
last = parent_list.pop(-1)
#print last
selecter_list = last.split(',')
#print selecter_list
map(lambda x:re.sub('[\n\t ]', '', x), selecter_list)
for a_selecter in selecter_list:
#if ':' in a_selecter:
# (se, st) = a_selecter.split(':')
#else:
# se = a_selecter
# st = ''
nodes = copy.copy(parent_list)
nodes.append(a_selecter)
c = css_rule(nodes, "%s{%s}" % (selecter, style), style)
css_list.append({'rule' : c, 'used' : 0})
with open(html_file_name) as html_file:
soup = bs.BeautifulSoup(html_file.read().replace('\n', '').lower())
body = soup.body
for i in walk_html(body):
#print i.name, "finished! \n"
format_nodes = [get_format_node(k) for k in i.findParents() if k.name not in ['html', '[document]']]
format_nodes.insert(0, get_format_node(i))
for rule in css_list:
tmp_rule = copy.deepcopy(rule['rule'].selecters)
tmp_nodes = copy.deepcopy(format_nodes)
if rule_match(tmp_nodes, tmp_rule):
rule['used'] += 1
#map(print_css, css_list)
aaa = open('processed.css', 'w')
for rule in css_list:
if rule['used'] > 0 :
aaa.write(rule['rule'].raw + '\n')
aaa.close()
在遍历html树时用了yield,注意下递归时yield的用法,http://www.iteye.com/topic/338111。代码就是随手写写,有些命名规则什么的实在是很囧,要是以后想做成一个成熟的项目的话还得再完善下。
有一个bug:css规则我不是很熟,只考虑了如selecter1 selecter2,selecter3这种用法,试了下后发现样式漏掉了一点,检查后原来css可以这么写selecter1 selecter2,selecter3 selecter4,fix起来也简单,逻辑没有问题就好说了。还有点小问题,比如现在只能匹配一个html文件(这改起来很方便),写入css文件后会有冗余(每行一个规则,再去除一下即可)。
然后用不会用gdb,pdb真是坑爹啊,还是得学,为了debug下专门开eclipse,好麻烦,然后写代码还坑爹,编辑效率太低了。