From 72d11bb59ae7d657fb04b18cc5ad67351ece998c Mon Sep 17 00:00:00 2001 From: Lost_in_wine Date: Wed, 2 Mar 2016 21:51:54 +0800 Subject: [PATCH 1/6] =?UTF-8?q?=E6=8F=90=E4=BA=A4hdoj=E7=88=AC=E5=8F=96?= =?UTF-8?q?=E9=A2=98=E7=9B=AE=E7=9A=84=E4=BB=A3=E7=A0=81?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- robots/hdoj.py | 66 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 66 insertions(+) create mode 100644 robots/hdoj.py diff --git a/robots/hdoj.py b/robots/hdoj.py new file mode 100644 index 0000000..35a0b87 --- /dev/null +++ b/robots/hdoj.py @@ -0,0 +1,66 @@ +# coding=utf-8 +import re +import requests +from .robot import Robot +from .exceptions import AuthFailed, RequestFailed, RegexError, SubmitProblemFailed + + +class HDOJRobot(Robot): + def check_url(self, url): + regex = "^http://acm.hdu.edu.cn/showproblem.php?pid=\d{4}1001$" + return re.compile(regex).match(url) is not None + + def login(self, username, password): + r = self.post("http://acm.hdu.edu.cn/userloginex.php?action=login", + data={ "user[handle]": username, + "user[password]": password, + "login": "Sign In"}, + headers={"Content-Type": "application/x-www-form-urlencoded", + "Referer": "http://acm.hdu.edu.cn/"}) + # 登陆成功会重定向到首页,否则200返回错误页面 + if r.status_code != 302: + raise AuthFailed() + return dict(r.cookies) + + @property + def is_logged_in(self): + print(self.cookies) + r = self.get("http://acm.hdu.edu.cn/control_panel.php", cookies=self.cookies) + # 登录状态是200,否则302到登陆页面 + return r.status_code == 200 + + def get(self, url, headers=None, cookies=None, allow_redirects=False): + r = super().get(url, headers=headers, cookies=cookies, allow_redirects=allow_redirects) + r.encoding = "gb2312" + return r + + def post(self, url, data, headers=None, cookies=None, allow_redirects=False): + return self._request("post", url, data=data, cookies=cookies, headers=headers, allow_redirects=allow_redirects) + + def _regex_page(self, url, regex): + r = self.get(url) + self.check_status_code(r) + data = {} + for k, v in regex.items(): + items = re.compile(v).findall(r.text) + if not items: + raise RegexError("No such data") + if k == "samples": + data[k] = [{"input": items[0], "output": items[1]}] + else: + data[k] = items[0] + print(data["samples"]) + return data + + + def get_problem(self, url): + regex = {"title": r"

(.*)

", + "time_limit": r"Time Limit:\s*[\d]*/([\d]*) MS", + "memory_limit": r"Memory Limit:\s*[\d]*/([\d]*) K", + "description": r"Problem Description
([\s\S]*?)
", + "input_description": r"Input
([\s\S]*?)
", + "output_description": r"Output
([\s\S]*?)
", + "samples": r"Courier New,Courier,monospace;\">([\s\S]*?)" + } + return self._regex_page(url, regex) + From 4dec3affb4a2360b3ecb6899b79258ca97fd2a4c Mon Sep 17 00:00:00 2001 From: Lost_in_wine Date: Wed, 2 Mar 2016 22:06:08 +0800 Subject: [PATCH 2/6] =?UTF-8?q?=E4=BF=AE=E6=AD=A3hdoj=E7=88=AC=E8=99=AB?= =?UTF-8?q?=E9=83=A8=E5=88=86bug?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- robots/hdoj.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/robots/hdoj.py b/robots/hdoj.py index 35a0b87..0b3fd74 100644 --- a/robots/hdoj.py +++ b/robots/hdoj.py @@ -7,12 +7,12 @@ from .exceptions import AuthFailed, RequestFailed, RegexError, SubmitProblemFail class HDOJRobot(Robot): def check_url(self, url): - regex = "^http://acm.hdu.edu.cn/showproblem.php?pid=\d{4}1001$" + regex = "^http://acm.hdu.edu.cn/showproblem.php?pid=\d{4}$" return re.compile(regex).match(url) is not None def login(self, username, password): r = self.post("http://acm.hdu.edu.cn/userloginex.php?action=login", - data={ "user[handle]": username, + data={"user[handle]": username, "user[password]": password, "login": "Sign In"}, headers={"Content-Type": "application/x-www-form-urlencoded", From 4d7aced9c52dd6b038043b0a7345345b24994074 Mon Sep 17 00:00:00 2001 From: Lost_in_wine Date: Sat, 5 Mar 2016 18:43:27 +0800 Subject: [PATCH 3/6] =?UTF-8?q?=E5=AE=8C=E6=88=90Hduoj=E6=8F=90=E4=BA=A4?= =?UTF-8?q?=E4=BB=A3=E7=A0=81=E4=B8=8E=E8=8E=B7=E5=8F=96=E7=BB=93=E6=9E=9C?= =?UTF-8?q?=E7=9A=84=E5=8A=9F=E8=83=BD?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- robots/hduoj.py | 152 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 152 insertions(+) create mode 100644 robots/hduoj.py diff --git a/robots/hduoj.py b/robots/hduoj.py new file mode 100644 index 0000000..4b153fc --- /dev/null +++ b/robots/hduoj.py @@ -0,0 +1,152 @@ +# coding=utf-8 +import re +import requests +from .robot import Robot +from .exceptions import AuthFailed, RequestFailed, RegexError, SubmitProblemFailed +from .utils import Language, Result + + +class HduojRobot(Robot): + + def check_url(self, url): + regex = r"^http://acm.hdu.edu.cn/showproblem.php\?pid=\d{4}$" + return re.compile(regex).match(url) is not None + + def login(self, username, password): + r = self.post("http://acm.hdu.edu.cn/userloginex.php?action=login", + data={"username": username, + "userpass": password, + "login": "Sign In"}, + headers={"Content-Type": "application/x-www-form-urlencoded", + "Referer": "http://acm.hdu.edu.cn/"}) + # 登陆成功会重定向到首页,否则200返回错误页面 + if r.status_code != 302: + raise AuthFailed("Failed to login Hduoj") + + self.cookies = dict(r.cookies) + + @property + def is_logged_in(self): + r = self.get("http://acm.hdu.edu.cn/control_panel.php", cookies=self.cookies) + # 登录状态是200,否则302到登陆页面 + return r.status_code == 200 + + def get(self, url, headers=None, cookies=None, allow_redirects=False): + r = super().get(url, headers=headers, cookies=cookies, allow_redirects=allow_redirects) + r.encoding = "gb2312" + return r + + def post(self, url, data, headers=None, cookies=None, allow_redirects=False): + return self._request("post", url, data=data, cookies=cookies, headers=headers, allow_redirects=allow_redirects) + + def _regex_page(self, url, regex): + r = self.get(url) + self.check_status_code(r) + data = {} + for k, v in regex.items(): + items = re.compile(v).findall(r.text) + if not items: + raise RegexError("No such data") + if k == "samples": + data[k] = [{"input": items[0], "output": items[1]}] + else: + data[k] = items[0] + + return data + + def get_problem(self, url): + if not self.check_url(url): + raise RequestFailed("Invaild Hduoj url") + regex = {"title": r"

(.*)

", + "time_limit": r"Time Limit:\s*[\d]*/([\d]*)\s*MS", + "memory_limit": r"Memory Limit:\s*[\d]*/([\d]*)\s*K", + "description": r"Problem Description\s*
([\s\S]*?)
", + "input_description": r"Input\s*
([\s\S]*?)
", + "output_description": r"Output\s*
([\s\S]*?)
", + "samples": r"Courier New,Courier,monospace;\">([\s\S]*?)" + } + problem_id = re.compile(r"\d{4}").search(url).group() + data = self._regex_page(url, regex) + data["problem_id"] = problem_id + data["submit_url"] = "http://acm.hdu.edu.cn/submit.php?action=submit" + return data + + def submit(self, submmit_url, language, code, origin_id): + if language == Language.C: + compiler_id = "1" + elif language == Language.CPP: + compiler_id = "0" + else: + compiler_id = "5" + + r = self.post(submmit_url, data={"check": "0", "problemid": origin_id, + "language": compiler_id, + "usercode": code}, + cookies=self.cookies, + headers={"Content-Type": "application/x-www-form-urlencoded", + "Referer": submmit_url}) + + if r.status_code != 302: + raise SubmitProblemFailed("Faild to submit problem, url: %s, status code %d" % (url, r.status_code)) + + def get_result(self, submission_id, username): + status_url = r"http://acm.hdu.edu.cn/status.php?&user=" + username + r = self.get(status_url, + headers={"Refer": status_url}) + self.check_status_code(r) + + data = re.compile(r"(\d+)\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}(?:[\s\S]*?)>" + r"(.*?)[\s\S]*?(\d*)MS(\d*)K").findall(r.text) + + submission_id = data[0][0] + code = data[0][1] + + if code == "Accepted": + result = Result.accepted + elif code == "Queuing": + result = Result.waiting + elif code == "Compiling": + result = Result.waiting + elif code == "Running": + result = Result.waiting + elif code == "Presentation Error": + result = Result.format_error + elif code == "Wrong Answer": + result = Result.wrong_answer + elif code == "Runtime Error": + result = Result.runtime_error + elif code == "Time Limit Exceeded": + result = Result.time_limit_exceeded + elif code == "Memory Limit Exceeded": + result = Result.memory_limit_exceeded + elif code == "Output Limit Exceeded": + pass + elif code == "Compilation Error": + result = Result.compile_error + elif code == "System Error": + result = Result.system_error + else: + result = Result.runtime_error + + if data[0][2]: + cpu_time = int(data[0][2]) + else: + cpu_time = None + + if data[0][3]: + memory = int(data[0][3]) + else: + memory = None + + error = None + + if result == Result.compile_error: + r = self.get(r"http://acm.hdu.edu.cn/viewerror.php?rid=" + submission_id, + headers={"Referer": "http://acm.hdu.edu.cn/status.php?first=&pid=&lang=0&status=0&user=" + username}) + self.check_status_code(r) + error = self._decode_html(re.compile("
([\s\S]*)
").findall(r.text)) + + + return {"result": result, "cpu_time": cpu_time, "memory": memory, + "info": {"result_text": self._clean_html(data[0][1])}, "error": error} + From af1da2b379f8bda7c3240a4a742095fd845f7ac5 Mon Sep 17 00:00:00 2001 From: Lost_in_wine Date: Sat, 5 Mar 2016 20:40:09 +0800 Subject: [PATCH 4/6] =?UTF-8?q?=E4=BF=AE=E6=AD=A3=E7=BC=96=E7=A0=81?= =?UTF-8?q?=E9=97=AE=E9=A2=98,=E6=A0=BC=E5=BC=8F=E9=97=AE=E9=A2=98,html?= =?UTF-8?q?=E8=A7=A3=E7=A0=81=E9=97=AE=E9=A2=98?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Hdu的gcc版本过老, "//"无法使用 - 采集到的html都要调用robots的_clean_html --- robots/hduoj.py | 43 +++++++++++++++++-------------------------- 1 file changed, 17 insertions(+), 26 deletions(-) diff --git a/robots/hduoj.py b/robots/hduoj.py index 4b153fc..49a9673 100644 --- a/robots/hduoj.py +++ b/robots/hduoj.py @@ -7,7 +7,6 @@ from .utils import Language, Result class HduojRobot(Robot): - def check_url(self, url): regex = r"^http://acm.hdu.edu.cn/showproblem.php\?pid=\d{4}$" return re.compile(regex).match(url) is not None @@ -36,9 +35,6 @@ class HduojRobot(Robot): r.encoding = "gb2312" return r - def post(self, url, data, headers=None, cookies=None, allow_redirects=False): - return self._request("post", url, data=data, cookies=cookies, headers=headers, allow_redirects=allow_redirects) - def _regex_page(self, url, regex): r = self.get(url) self.check_status_code(r) @@ -50,7 +46,7 @@ class HduojRobot(Robot): if k == "samples": data[k] = [{"input": items[0], "output": items[1]}] else: - data[k] = items[0] + data[k] = self._clean_html(items[0]) return data @@ -63,31 +59,31 @@ class HduojRobot(Robot): "description": r"Problem Description\s*
([\s\S]*?)
", "input_description": r"Input\s*
([\s\S]*?)
", "output_description": r"Output\s*
([\s\S]*?)
", - "samples": r"Courier New,Courier,monospace;\">([\s\S]*?)" - } + "samples": r'Courier New,Courier,monospace;">([\s\S]*?)'} problem_id = re.compile(r"\d{4}").search(url).group() data = self._regex_page(url, regex) data["problem_id"] = problem_id data["submit_url"] = "http://acm.hdu.edu.cn/submit.php?action=submit" return data - def submit(self, submmit_url, language, code, origin_id): + def submit(self, submit_url, language, code, origin_id): + code = code.encode("gb2312") if language == Language.C: - compiler_id = "1" + language = "1" elif language == Language.CPP: - compiler_id = "0" + language = "0" else: - compiler_id = "5" + language = "5" - r = self.post(submmit_url, data={"check": "0", "problemid": origin_id, - "language": compiler_id, - "usercode": code}, - cookies=self.cookies, - headers={"Content-Type": "application/x-www-form-urlencoded", - "Referer": submmit_url}) + r = self.post(submit_url, data={"check": "0", "problemid": origin_id, + "language": language, + "usercode": code}, + cookies=self.cookies, + headers={"Content-Type": "application/x-www-form-urlencoded", + "Referer": submit_url}) if r.status_code != 302: - raise SubmitProblemFailed("Faild to submit problem, url: %s, status code %d" % (url, r.status_code)) + raise SubmitProblemFailed("Faild to submit problem, url: %s, status code %d" % (submit_url, r.status_code)) def get_result(self, submission_id, username): status_url = r"http://acm.hdu.edu.cn/status.php?&user=" + username @@ -103,11 +99,7 @@ class HduojRobot(Robot): if code == "Accepted": result = Result.accepted - elif code == "Queuing": - result = Result.waiting - elif code == "Compiling": - result = Result.waiting - elif code == "Running": + elif code in ["Queuing", "Compiling", "Running"]: result = Result.waiting elif code == "Presentation Error": result = Result.format_error @@ -120,7 +112,7 @@ class HduojRobot(Robot): elif code == "Memory Limit Exceeded": result = Result.memory_limit_exceeded elif code == "Output Limit Exceeded": - pass + result = Result.runtime_error elif code == "Compilation Error": result = Result.compile_error elif code == "System Error": @@ -144,8 +136,7 @@ class HduojRobot(Robot): r = self.get(r"http://acm.hdu.edu.cn/viewerror.php?rid=" + submission_id, headers={"Referer": "http://acm.hdu.edu.cn/status.php?first=&pid=&lang=0&status=0&user=" + username}) self.check_status_code(r) - error = self._decode_html(re.compile("
([\s\S]*)
").findall(r.text)) - + error = self._clean_html(re.compile("
([\s\S]*)
").findall(r.text)) return {"result": result, "cpu_time": cpu_time, "memory": memory, "info": {"result_text": self._clean_html(data[0][1])}, "error": error} From ab18b1bec8ba0491fe2af573baaf9eb0b39c799e Mon Sep 17 00:00:00 2001 From: virusdefender Date: Sat, 5 Mar 2016 20:58:15 +0800 Subject: [PATCH 5/6] =?UTF-8?q?=E5=88=A0=E9=99=A4=E6=97=A0=E7=94=A8?= =?UTF-8?q?=E6=96=87=E4=BB=B6?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- robots/hdoj.py | 66 -------------------------------------------------- 1 file changed, 66 deletions(-) delete mode 100644 robots/hdoj.py diff --git a/robots/hdoj.py b/robots/hdoj.py deleted file mode 100644 index 0b3fd74..0000000 --- a/robots/hdoj.py +++ /dev/null @@ -1,66 +0,0 @@ -# coding=utf-8 -import re -import requests -from .robot import Robot -from .exceptions import AuthFailed, RequestFailed, RegexError, SubmitProblemFailed - - -class HDOJRobot(Robot): - def check_url(self, url): - regex = "^http://acm.hdu.edu.cn/showproblem.php?pid=\d{4}$" - return re.compile(regex).match(url) is not None - - def login(self, username, password): - r = self.post("http://acm.hdu.edu.cn/userloginex.php?action=login", - data={"user[handle]": username, - "user[password]": password, - "login": "Sign In"}, - headers={"Content-Type": "application/x-www-form-urlencoded", - "Referer": "http://acm.hdu.edu.cn/"}) - # 登陆成功会重定向到首页,否则200返回错误页面 - if r.status_code != 302: - raise AuthFailed() - return dict(r.cookies) - - @property - def is_logged_in(self): - print(self.cookies) - r = self.get("http://acm.hdu.edu.cn/control_panel.php", cookies=self.cookies) - # 登录状态是200,否则302到登陆页面 - return r.status_code == 200 - - def get(self, url, headers=None, cookies=None, allow_redirects=False): - r = super().get(url, headers=headers, cookies=cookies, allow_redirects=allow_redirects) - r.encoding = "gb2312" - return r - - def post(self, url, data, headers=None, cookies=None, allow_redirects=False): - return self._request("post", url, data=data, cookies=cookies, headers=headers, allow_redirects=allow_redirects) - - def _regex_page(self, url, regex): - r = self.get(url) - self.check_status_code(r) - data = {} - for k, v in regex.items(): - items = re.compile(v).findall(r.text) - if not items: - raise RegexError("No such data") - if k == "samples": - data[k] = [{"input": items[0], "output": items[1]}] - else: - data[k] = items[0] - print(data["samples"]) - return data - - - def get_problem(self, url): - regex = {"title": r"

(.*)

", - "time_limit": r"Time Limit:\s*[\d]*/([\d]*) MS", - "memory_limit": r"Memory Limit:\s*[\d]*/([\d]*) K", - "description": r"Problem Description
([\s\S]*?)
", - "input_description": r"Input
([\s\S]*?)
", - "output_description": r"Output
([\s\S]*?)
", - "samples": r"Courier New,Courier,monospace;\">([\s\S]*?)" - } - return self._regex_page(url, regex) - From cbcf5ed38ab32a523143d6d072a952e1e3fe471b Mon Sep 17 00:00:00 2001 From: Lost_in_wine Date: Sat, 5 Mar 2016 21:24:48 +0800 Subject: [PATCH 6/6] =?UTF-8?q?=E5=B0=86=E5=86=85=E5=AD=98=E7=94=B1?= =?UTF-8?q?=E5=8D=95=E4=BD=8DKB=E8=BD=AC=E6=8D=A2=E4=B8=BA=E5=8D=95?= =?UTF-8?q?=E4=BD=8DM?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- robots/hduoj.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/robots/hduoj.py b/robots/hduoj.py index 49a9673..9851f70 100644 --- a/robots/hduoj.py +++ b/robots/hduoj.py @@ -47,6 +47,8 @@ class HduojRobot(Robot): data[k] = [{"input": items[0], "output": items[1]}] else: data[k] = self._clean_html(items[0]) + data["memory_limit"] = int(data["memory_limit"]) // 1024 + data["time_limit"] = int(data["time_limit"]) return data @@ -136,7 +138,7 @@ class HduojRobot(Robot): r = self.get(r"http://acm.hdu.edu.cn/viewerror.php?rid=" + submission_id, headers={"Referer": "http://acm.hdu.edu.cn/status.php?first=&pid=&lang=0&status=0&user=" + username}) self.check_status_code(r) - error = self._clean_html(re.compile("
([\s\S]*)
").findall(r.text)) + error = self._clean_html(str(re.compile("
([\s\S]*)
").findall(r.text))) return {"result": result, "cpu_time": cpu_time, "memory": memory, "info": {"result_text": self._clean_html(data[0][1])}, "error": error}