获取文字加表情(alt标签的属性)
#!/usr/bin/env python # encoding: utf-8 from functools import reduce from lxml import html from bs4 import BeautifulSoup html=""" <div><span class="url-icon"><img alt="[馋嘴]" src="//h5.sinaimg.cn/m/emoticon/icon/default/d_chanzui-ad3f4f182c.png" style="width:1em; height:1em;"/></span>听着就很好吃</div> """ def main(): bs=BeautifulSoup(html,'html.parser') main_div=bs.find('div') contents=parse_div(main_div) print(contents) def parse_div(div_tags): contents=div_tags.contents result=[] for content in contents: if isinstance(content,str): content=content.replace('\n','').replace(' ','') result.append(content) elif content.has_attr('alt'): result.append(content.get('alt','')) else: new_contents=parse_div(content) result.append(new_contents) return ''.join(result) #最优解 def main(self, htmlstr): root = html.fromstring(htmlstr) nodes = root.xpath(".//text()|.//@alt") return ''.join([i.replace('\n','').replace(" ", "").replace("\u200b", "") for i in nodes]) if __name__ == '__main__': main()转载于:https://www.cnblogs.com/c-x-a/p/9340620.html
相关资源:图书管理系统(Java Mysql)我的第一个完全自己做的实训项目