-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathutils.py
186 lines (140 loc) · 5.12 KB
/
utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
# -*- coding: utf-8 -*-
from reportlab.pdfgen import canvas
from reportlab.lib.pagesizes import A4
from reportlab.lib.styles import getSampleStyleSheet
from reportlab.platypus import Paragraph, Table, TableStyle
from reportlab.lib import colors
import requests, json
import smtplib
from email.mime.multipart import MIMEMultipart
from email.mime.text import MIMEText
from os.path import basename
from email.mime.application import MIMEApplication
def create_pdf(df, name):
"""
create a pdf for the dataframe
:param df: any dataframe
:type df: pandas.DataFrame
:param name: name of the result pdf
:type: str
:return: None
"""
# setup the canvas
width = A4[0]
styles = getSampleStyleSheet()
# fillna with N.A.
df = df.fillna("N.A.")
# process content of the dataframe to make each element a paragraph object
content = [list(i) for i in df.values]
res_content = []
for i in content:
_temp = []
for j in i:
_temp.append(Paragraph(j, styles["Normal"]))
res_content.append(_temp)
# process header of the dataframe
header = [Paragraph("<b>" + i + "</b>", styles["Normal"]) for i in df.columns.tolist()]
# form the final list to be feed into table
full_content = [header] + res_content
# create the table and set format
col_width = [(width - 10) / df.shape[1]] * df.shape[1]
table = Table(full_content, colWidths=col_width)
table.setStyle(TableStyle([
('INNERGRID', (0, 0), (-1, -1), 0.25, colors.black),
('BOX', (0, 0), (-1, -1), 0.25, colors.black),
]))
# foo pdf, just for getting the row sum
# THIS IS GENIUS!!
foo = canvas.Canvas("foo.pdf", pagesize=[width, 200000])
# get the actual table height and page height, wrapOn function
# will return the actual height and width of the table
table_height = table.wrapOn(foo, width, 2000000)[1]
page_height = table_height + 10
# get the actual table height
c = canvas.Canvas("{}.pdf".format(name), pagesize=[width, page_height])
table.wrapOn(c, width, page_height)
table.drawOn(c, 5, 5)
c.save()
def shorten_url(url, url_shorten_api, url_shorten_workspace):
"""
shorten url
:param url: url to be shorten
:type url: str
:param url_shorten_api: URL Link Shortener API
:type url_shorten_api: str
:param url_shorten_workspace: URL Link Shortener workspace ID
:type url_shorten_workspace: str
:return: shortened url
"""
linkRequest = {
"destination": url,
"domain": {"fullName": "rebrand.ly"}
}
requestHeaders = {
"Content-type": "application/json",
"apikey": url_shorten_api,
"workspace": url_shorten_workspace
}
r = requests.post("https://api.rebrandly.com/v1/links",
data=json.dumps(linkRequest),
headers=requestHeaders)
if (r.status_code == requests.codes.ok):
link = r.json()
return link["shortUrl"]
else:
return url
def process_news_output(res_df, url_shorten_api, url_shorten_workspace):
"""
Generate wechat message to send based on result dataframe
:param res_df: news dataframe
:type res_df: pandas.DataFrame
:param url_shorten_api: URL Link Shortener API
:type url_shorten_api: str
:param url_shorten_workspace: URL Link Shortener workspace ID
:type url_shorten_workspace: str
:return: processed string
"""
res_df['shorten_url'] = res_df['url'].apply(lambda x: shorten_url(x, url_shorten_api, url_shorten_workspace))
res_df = res_df.loc[-res_df['shorten_url'].str.startswith('https')].reset_index(drop=True)
res_df['shorten_url'] = 'https://' + res_df['shorten_url']
msg = ''
for i in res_df.iterrows():
news_info = "{}\n{}\n-------\n".format(i[1]['title'], i[1]['shorten_url'])
msg += news_info
return msg
def send_email(sender_email, sender_password, receiver_email, subject, body, attachement_path):
"""
Send results to an email
:param sender_email: sender's email
:type sender_email: str
:param sender_password: sender's password
:type sender_password: str
:param receiver_email: receivers email
:type receiver_email: str
:param subject: email subject
:type subject: str
:param body: email body
:type body: str
:param attachement_path: path for file to be sent over
:type attachement_path: str
:return: None
"""
# creates SMTP session
s = smtplib.SMTP('smtp.gmail.com', 587)
s.starttls()
s.login(sender_email, sender_password)
# instance of MIMEMultipart
msg = MIMEMultipart()
msg['From'] = sender_email
msg['To'] = receiver_email
msg['Subject'] = subject
msg.attach(MIMEText(body, 'plain'))
# open the file to be sent
with open(attachement_path, 'rb') as fil:
part = MIMEApplication(fil.read(), Name=basename(attachement_path))
part['Content-Disposition'] = 'attachment; filename="%s"' % basename(attachement_path)
msg.attach(part)
# send email
s.sendmail(sender_email, receiver_email, msg.as_string())
# terminating the session
s.quit()