-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathUtil.py
More file actions
72 lines (55 loc) · 2.94 KB
/
Copy pathUtil.py
File metadata and controls
72 lines (55 loc) · 2.94 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
#-*- coding:utf-8 -*-
import re
import logs
import email
import UserInfo
class parsehtml:
ptnforUname = re.compile("window.open\(\'\/(.+?)\',\'_blank\'\);")
ptnforSex = re.compile('checked=\"checked\" value="(.+?)"')
ptnforDegree= re.compile("name=\"profile.degree\" id=\"degree\" value=\"(.+?)\"")
@staticmethod
def patternGen(middlestr):
temp_patternstr = "name=\"" + middlestr + '\" value=\"(.+?)\"'
return re.compile(temp_patternstr)
@staticmethod
def parse(htmlcontent):
InfoCol = {}
USERNAME = parsehtml.check(parsehtml.ptnforUname,htmlcontent)
PASSWORD = parsehtml.check(parsehtml.patternGen("account.password"),htmlcontent)
if USERNAME == "" and PASSWORD == "":
logs.LOG.WriteLog("[*] HTML Parsing Failed At " + USERNAME + PASSWORD)
return None
Email = parsehtml.check(parsehtml.patternGen("profile.workEmail"),htmlcontent)
ChnName = parsehtml.check(parsehtml.patternGen("profile.chineseName"),htmlcontent)
Sex = parsehtml.check(parsehtml.ptnforSex, htmlcontent)
Academic = parsehtml.check(parsehtml.ptnforDegree,htmlcontent)
Job = parsehtml.check(parsehtml.patternGen("profile.scholarTitle"),htmlcontent)
Work_unit = parsehtml.check(parsehtml.patternGen("profile.workUnit"),htmlcontent)
Department = parsehtml.check(parsehtml.patternGen("profile.workDepartment"),htmlcontent)
Profession = parsehtml.check(parsehtml.patternGen("scholarFieldFirst"),htmlcontent)
Address = parsehtml.check(parsehtml.patternGen("profile.address"),htmlcontent)
EngName = parsehtml.check(parsehtml.patternGen("profile.englishName"),htmlcontent)
Title = parsehtml.check(parsehtml.patternGen("profile.scholarTitleEn"),htmlcontent)
Affiliation = parsehtml.check(parsehtml.patternGen("profile.workUnitEn"),htmlcontent)
InfoCol = UserInfo.dbopt.InfoCollect(USERNAME, PASSWORD, Email, ChnName, Sex, Academic,
Job, Work_unit, Department, Profession,
Address, EngName, Title, Affiliation)
return InfoCol
@staticmethod
def check(regex, htmlcontent):
temp = regex.findall(htmlcontent)
vari = ""
if len(temp) == 0:
return vari
else:
return temp[0]
@staticmethod
def findFriends(ajaxcontent):
regex1 = re.compile('\"profileUsername\":\"(.+?)\",')
res = list(set(regex1.findall(ajaxcontent)))
return res
@staticmethod
def findRecommend(htmlcontent):
regex1 = re.compile('username\":\"(.+?)\"')
res = list(set(regex1.findall(htmlcontent)))
return res