mirror of
https://github.com/QingdaoU/OpenVJ.git
synced 2024-12-29 07:51:45 +00:00
增加clean_html的父类方法
This commit is contained in:
parent
8e89837ba0
commit
4dddbe59f9
@ -55,9 +55,6 @@ class PATRobot(Robot):
|
||||
data["id"] = problem_id
|
||||
return data
|
||||
|
||||
def _clean_html(self, text):
|
||||
return self._decode_html(re.compile("<p>|</p>|<b>|</b>|\r|\n|<span>|</span>").sub("", text))
|
||||
|
||||
def _regex_page(self, url, regex):
|
||||
r = self.get(url)
|
||||
self.check_status_code(r)
|
||||
|
@ -1,4 +1,5 @@
|
||||
# coding=utf-8
|
||||
import re
|
||||
import html
|
||||
import requests
|
||||
from .exceptions import RequestFailed, RegexError
|
||||
@ -91,3 +92,10 @@ class Robot(object):
|
||||
if response.status_code != status_code:
|
||||
raise RequestFailed("Invalid status code [%d] when fetching url [%s], expected %d" %
|
||||
(response.status_code, response.url, status_code))
|
||||
|
||||
def _clean_html(self, text):
|
||||
# 先去除部分html标记
|
||||
p1 = self._decode_html(re.compile(r"<p.*?>|</p>|<b.*?>|</b>|<span.*?>|</span>|<i.*?>|</i>").sub("", text))
|
||||
# <br>之类的转换为\n
|
||||
p2 = re.compile(r"<br.*>").sub(r"\n", p1)
|
||||
return p2
|
||||
|
Loading…
Reference in New Issue
Block a user