增加富文本的 xss 过滤

This commit is contained in:
virusdefender 2015-09-22 17:03:53 +08:00
parent b224a823fe
commit c26fd6734d
8 changed files with 288 additions and 3 deletions

View File

@ -0,0 +1,20 @@
# -*- coding: utf-8 -*-
from __future__ import unicode_literals
from django.db import models, migrations
import utils.models
class Migration(migrations.Migration):
dependencies = [
('announcement', '0002_auto_20150818_1445'),
]
operations = [
migrations.AlterField(
model_name='announcement',
name='content',
field=utils.models.RichTextField(),
),
]

View File

@ -3,13 +3,14 @@ from django.db import models
from account.models import User
from group.models import Group
from utils.models import RichTextField
class Announcement(models.Model):
# 标题
title = models.CharField(max_length=50)
# 公告的内容 HTML 格式
content = models.TextField()
content = RichTextField()
# 创建时间
create_time = models.DateTimeField(auto_now_add=True)
# 这个公告是谁创建的

View File

@ -0,0 +1,25 @@
# -*- coding: utf-8 -*-
from __future__ import unicode_literals
from django.db import models, migrations
import utils.models
class Migration(migrations.Migration):
dependencies = [
('contest', '0009_contestsubmission_first_achieved'),
]
operations = [
migrations.AlterField(
model_name='contest',
name='description',
field=utils.models.RichTextField(),
),
migrations.AlterField(
model_name='contestproblem',
name='description',
field=utils.models.RichTextField(),
),
]

View File

@ -5,6 +5,8 @@ from django.utils.timezone import now
from account.models import User
from problem.models import AbstractProblem
from group.models import Group
from utils.models import RichTextField
GROUP_CONTEST = 0
PUBLIC_CONTEST = 1
@ -17,7 +19,7 @@ CONTEST_UNDERWAY = 0
class Contest(models.Model):
title = models.CharField(max_length=40, unique=True)
description = models.TextField()
description = RichTextField()
# 比赛模式0 即为是acm模式1 即为是按照总的 ac 题目数量排名模式
mode = models.IntegerField()
# 是否显示实时排名结果

View File

@ -0,0 +1,20 @@
# -*- coding: utf-8 -*-
from __future__ import unicode_literals
from django.db import models, migrations
import utils.models
class Migration(migrations.Migration):
dependencies = [
('problem', '0007_remove_problem_last_update_time'),
]
operations = [
migrations.AlterField(
model_name='problem',
name='description',
field=utils.models.RichTextField(),
),
]

View File

@ -2,6 +2,7 @@
from django.db import models
from account.models import User
from utils.models import RichTextField
class ProblemTag(models.Model):
@ -15,7 +16,7 @@ class AbstractProblem(models.Model):
# 标题
title = models.CharField(max_length=50)
# 问题描述 HTML 格式
description = models.TextField()
description = RichTextField()
# 输入描述
input_description = models.CharField(max_length=10000)
# 输出描述

14
utils/models.py Normal file
View File

@ -0,0 +1,14 @@
# coding=utf-8
from django.db import models
from utils.xss_filter import XssHtml
class RichTextField(models.TextField):
__metaclass__ = models.SubfieldBase
def get_prep_value(self, value):
parser = XssHtml()
parser.feed(value)
parser.close()
return parser.getHtml()

202
utils/xss_filter.py Normal file
View File

@ -0,0 +1,202 @@
# -*- coding: utf-8 -*-
"""
Python 富文本XSS过滤类
@package XssHtml
@version 0.1
@link http://phith0n.github.io/python-xss-filter
@since 20150407
@copyright (c) Phithon All Rights Reserved
Based on native Python module HTMLParser purifier of HTML, To Clear all javascript in html
You can use it in all python web framework
Written by Phithon <root@leavesongs.com> in 2015 and placed in the public domain.
phithon <root@leavesongs.com> 编写于20150407
From: XDSEC <www.xdsec.org> & 离别歌 <www.leavesongs.com>
GitHub Pages: https://github.com/phith0n/python-xss-filter
Usage:
parser = XssHtml()
parser.feed('<html code>')
parser.close()
html = parser.getHtml()
print html
Requirements
Python 2.6+ or 3.2+
Cannot defense xss in browser which is belowed IE7
浏览器版本IE7+ 或其他浏览器无法防御IE6及以下版本浏览器中的XSS
"""
import re
try:
from html.parser import HTMLParser
except:
from HTMLParser import HTMLParser
class XssHtml(HTMLParser):
allow_tags = ['a', 'img', 'br', 'strong', 'b', 'code', 'pre',
'p', 'div', 'em', 'span', 'h1', 'h2', 'h3', 'h4',
'h5', 'h6', 'blockquote', 'ul', 'ol', 'tr', 'th', 'td',
'hr', 'li', 'u', 'embed', 's', 'table', 'thead', 'tbody',
'caption', 'small', 'q', 'sup', 'sub']
common_attrs = ["style", "class", "name"]
nonend_tags = ["img", "hr", "br", "embed"]
tags_own_attrs = {
"img": ["src", "width", "height", "alt", "align"],
"a": ["href", "target", "rel", "title"],
"embed": ["src", "width", "height", "type", "allowfullscreen", "loop", "play", "wmode", "menu"],
"table": ["border", "cellpadding", "cellspacing"],
}
def __init__(self, allows=[]):
HTMLParser.__init__(self)
self.allow_tags = allows if allows else self.allow_tags
self.result = []
self.start = []
self.data = []
def getHtml(self):
"""
Get the safe html code
"""
for i in range(0, len(self.result)):
tmp = self.result[i].rstrip('\n')
tmp = tmp.lstrip('\n')
if tmp:
self.data.append(tmp)
return ''.join(self.data)
def handle_startendtag(self, tag, attrs):
self.handle_starttag(tag, attrs)
def handle_starttag(self, tag, attrs):
if tag not in self.allow_tags:
return
end_diagonal = ' /' if tag in self.nonend_tags else ''
if not end_diagonal:
self.start.append(tag)
attdict = {}
for attr in attrs:
attdict[attr[0]] = attr[1]
attdict = self._wash_attr(attdict, tag)
if hasattr(self, "node_%s" % tag):
attdict = getattr(self, "node_%s" % tag)(attdict)
else:
attdict = self.node_default(attdict)
attrs = []
for (key, value) in attdict.items():
attrs.append('%s="%s"' % (key, self._htmlspecialchars(value)))
attrs = (' ' + ' '.join(attrs)) if attrs else ''
self.result.append('<' + tag + attrs + end_diagonal + '>')
def handle_endtag(self, tag):
if self.start and tag == self.start[len(self.start) - 1]:
self.result.append('</' + tag + '>')
self.start.pop()
def handle_data(self, data):
self.result.append(self._htmlspecialchars(data))
def handle_entityref(self, name):
if name.isalpha():
self.result.append("&%s;" % name)
def handle_charref(self, name):
if name.isdigit():
self.result.append("&#%s;" % name)
def node_default(self, attrs):
attrs = self._common_attr(attrs)
return attrs
def node_a(self, attrs):
attrs = self._common_attr(attrs)
attrs = self._get_link(attrs, "href")
attrs = self._set_attr_default(attrs, "target", "_blank")
attrs = self._limit_attr(attrs, {
"target": ["_blank", "_self"]
})
return attrs
def node_embed(self, attrs):
attrs = self._common_attr(attrs)
attrs = self._get_link(attrs, "src")
attrs = self._limit_attr(attrs, {
"type": ["application/x-shockwave-flash"],
"wmode": ["transparent", "window", "opaque"],
"play": ["true", "false"],
"loop": ["true", "false"],
"menu": ["true", "false"],
"allowfullscreen": ["true", "false"]
})
attrs["allowscriptaccess"] = "never"
attrs["allownetworking"] = "none"
return attrs
def _true_url(self, url):
prog = re.compile(r"^(http|https|ftp)://.+", re.I | re.S)
if prog.match(url):
return url
else:
return "http://%s" % url
def _true_style(self, style):
if style:
style = re.sub(r"(\\|&#|/\*|\*/)", "_", style)
style = re.sub(r"e.*x.*p.*r.*e.*s.*s.*i.*o.*n", "_", style)
return style
def _get_style(self, attrs):
if "style" in attrs:
attrs["style"] = self._true_style(attrs.get("style"))
return attrs
def _get_link(self, attrs, name):
if name in attrs:
attrs[name] = self._true_url(attrs[name])
return attrs
def _wash_attr(self, attrs, tag):
if tag in self.tags_own_attrs:
other = self.tags_own_attrs.get(tag)
else:
other = []
if attrs:
for (key, value) in attrs.items():
if key not in self.common_attrs + other:
del attrs[key]
return attrs
def _common_attr(self, attrs):
attrs = self._get_style(attrs)
return attrs
def _set_attr_default(self, attrs, name, default=''):
if name not in attrs:
attrs[name] = default
return attrs
def _limit_attr(self, attrs, limit={}):
for (key, value) in limit.items():
if key in attrs and attrs[key] not in value:
del attrs[key]
return attrs
def _htmlspecialchars(self, html):
return html.replace("<", "&lt;") \
.replace(">", "&gt;") \
.replace('"', "&quot;") \
.replace("'", "&#039;")
if "__main__" == __name__:
parser = XssHtml()
parser.feed("""<p><img src=1 onerror=alert(/xss/)></p><div class="left">
<a href='javascript:prompt(1)'><br />hehe</a></div>
<p id="test" onmouseover="alert(1)">&gt;M<svg>
<a href="https://www.baidu.com" target="self">MM</a></p>
<embed src='javascript:alert(/hehe/)' allowscriptaccess=always />""")
parser.close()
print(parser.getHtml())