Attachment 'C__Gmail_gmail.py'
Download 1 """
2 gmail.py -- Python interface to Gmail (http://www.gmail.com/)
3
4 Known to work with Python 2.3+.
5
6 Sample usage:
7
8 >>> c = GmailClient()
9 >>> c.login('username', 'password')
10 >>> c.get_inbox_conversations()
11 ['free viagra', 'welcome to css-discuss', 'hey, how are you?']
12 >>> c.get_inbox_conversations(is_unread=True)
13 ['free viagra', 'hey, how are you?']
14 >>> c.get_inbox_conversations(subject='css')
15 ['welcome to css-discuss']
16 >>> c.get_inbox_conversations()[2]
17 'hey, how are you?'
18 >>> c.get_inbox_conversations()[2].get_messages()
19 [<email.Message.Message instance at 0xf6bbad4c>, <email.Message.Message instance at 0xf6bbad8a>]
20 >>> print c.get_inbox_conversations()[2].get_messages()[0]
21 # outputs raw e-mail source
22 >>> c.get_contacts()
23 [['[email protected]', 'John Lennon'], ['[email protected]', 'Billy Shears']]
24 >>> c.add_contact('George', '[email protected]')
25 >>> c.delete_contact('[email protected]')
26 """
27
28 # Copyright (C) 2004, Adrian Holovaty
29 #
30 # This program is free software; you can redistribute it and/or modify it under
31 # the terms of the GNU General Public License as published by the Free Software
32 # Foundation; either version 2 of the License, or (at your option) any later
33 # version.
34 #
35 # This program is distributed in the hope that it will be useful, but WITHOUT
36 # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
37 # FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
38 # details.
39 #
40 # You should have received a copy of the GNU General Public License along with
41 # this program; if not, write to the Free Software Foundation, Inc., 59 Temple
42 # Place, Suite 330, Boston, MA 02111-1307 USA
43
44 # Changelog:
45 #
46 # 0.1 (2004-06-18)
47 # Initial version. Support for login() and get_inbox_messages().
48 # 0.2 (2004-06-20)
49 # Added get_contacts(), add_contact() and delete_contact().
50 # Changed get_inbox_messages() to get_inbox_conversations().
51 # Made get_inbox_conversations() subject parameter case-insensitive.
52 # 0.3 (2004-06-24) -- Patch from Gustavo Sverzut Barbieri (Thanks, Gustavo!)
53 # GmailClient.login() now raises LoginFailure on failure.
54 # Added socket.setdefaulttimeout(30).
55 # Added UTF-8 support (GmailClient._encode()).
56 #
57 # To do:
58 # * Optionally mark messages as read when they're retrieved.
59 # * Clean HTML in Conversation subjects.
60 # * Add GmailClient.get_conversations_by_label() method.
61
62
63 __version__ = "0.3"
64 __date__ = "2004-06-24"
65 __author__ = "Adrian Holovaty ([email protected])"
66
67 from Cookie import SimpleCookie
68 import email, random, re, socket, time, urllib, urllib2
69
70 EMAILS_RE = re.compile('\nD\((\["t",.*?\])\n\);', re.DOTALL)
71 MESSAGE_INFO_RE = re.compile('\nD\((\["mi",.*?\])\n\);', re.DOTALL)
72 # Gmail says this when it complains
73 FAILURE_MESSAGE = 'Your action was not successful'
74 LOGIN_FAILURE_MESSAGE = 'Username and password do not match.'
75
76 class BadGmailTransaction(Exception):
77 "Base exception raised when Gmail transactions don't work"
78 pass
79
80 class ContactCouldNotBeAdded(BadGmailTransaction):
81 pass
82
83 class ContactCouldNotBeDeleted(BadGmailTransaction):
84 pass
85
86 class LoginFailure(BadGmailTransaction):
87 pass
88
89 socket.setdefaulttimeout(30)
90
91 class GmailClient:
92 def __init__(self):
93 self._cookies = SimpleCookie()
94 self._folder_cache, self._message_cache = {}, {}
95 self._contacts = []
96
97 def login(self, username, password):
98 """
99 Logs into Gmail with the given username and password.
100 Raises LoginFailure if the login fails.
101 """
102 epoch_secs = int(time.time())
103 self._cookies["GMAIL_LOGIN"] = "T%s/%s/%s" % (epoch_secs-2, epoch_secs-1, epoch_secs)
104 p = self._get_page("https://www.google.com/accounts/ServiceLoginBoxAuth",
105 post_data="continue=https://gmail.google.com/gmail&service=mail&Email=%s&Passwd=%s&submit=null" % (username, password))
106 c = p.read()
107 p.close()
108 #r = re.search('var cookieVal\s*=\s*"([^"]+)"', c)
109 r = re.search('top.location \s*=\s*"([^"]+)"', c)
110 if not r or c.find(LOGIN_FAILURE_MESSAGE) > -1:
111 raise LoginFailure, "Wrong username or password."
112 self._cookies['GV'] = r.groups()[0]
113 #p = self._get_page("https://www.google.com/accounts/CheckCookie?continue=http%3A%2F%2Fgmail.google.com%2Fgmail&service=mail&chtml=LoginDoneHtml")
114 p = self._get_page("https://www.google.com/accounts/" + str(r.groups()[0]))
115 p.close()
116 p = self._get_page("http://www.google.com/")
117 p.close()
118 p = self._get_page("http://gmail.google.com/gmail?view=page&name=js")
119 c = p.read()
120 p.close()
121 r = re.search("var js_version\s*=\s*'([^']+)'", c)
122 if not r:
123 raise LoginFailure, "Gmail might have redesigned."
124 self._js_version = r.groups()[0]
125
126 def get_inbox_conversations(self, is_unread=None, is_starred=None, label=None, subject=None):
127 """
128 Returns a list of all the messages in the inbox matching the given
129 search parameters, as GmailMessageStub objects.
130
131 Parameters:
132 is_unread: Boolean (or just 1 or 0). Limits the results to read
133 vs. unread conversations. A conversation is read if
134 *every one* of its messages has been read.
135 is_starred: Boolean (or just 1 or 0). Limits the results to starred
136 vs. unstarred conversations.
137 label: String. Limits the results to conversations having the
138 exact given label.
139 subject: String. Limits the results to conversations containing
140 the given string in their subject. (Case-insensitive.)
141 """
142 message_stubs = []
143 for stub in self._get_message_stubs(folder='inbox'):
144 if is_unread is not None and stub.is_unread != is_unread:
145 continue
146 if is_starred is not None and stub.is_starred != is_starred:
147 continue
148 if label is not None and label not in stub.label_list:
149 continue
150 if subject is not None and stub.subject.lower().find(subject.lower()) == -1:
151 continue
152 message_stubs.append(stub)
153 return message_stubs
154
155 def add_contact(self, name, email, notes=''):
156 """
157 Adds a contact with the given name, e-mail and notes to this Gmail
158 account's address book. Raises ContactCouldNotBeDeleted on error.
159 """
160 p = self._get_page("https://gmail.google.com/gmail?view=address&act=a",
161 post_data="at=%s&name=%s&email=%s¬es=%s&ac=Add+Contact&operation=Edit" % \
162 (self._cookies['GMAIL_AT'].value, self._url_quote(name),
163 self._url_quote(email), self._url_quote(notes)))
164 if p.read().find(FAILURE_MESSAGE) > -1:
165 raise ContactCouldNotBeAdded, "Gmail might have redesigned."
166
167 def delete_contact(self, email):
168 """
169 Deletes the contact with the given e-mail address from this Gmail
170 account's address book. Raises ContactCouldNotBeDeleted on error.
171 """
172 contact_index = None
173 for i, c in enumerate(self.get_contacts()):
174 if c[0] == email:
175 contact_index = i + 1
176 break
177 if contact_index is None:
178 raise ContactCouldNotBeDeleted, "The e-mail address '%s' wasn't in your Gmail address book." % email
179 p = self._get_page("https://gmail.google.com/gmail?view=address&act=a",
180 post_data="operation=Delete&at=%s&email%s=%s" % \
181 (self._cookies['GMAIL_AT'].value, contact_index, urllib.quote_plus(email)))
182 if p.read().find(FAILURE_MESSAGE) > -1:
183 raise ContactCouldNotBeDeleted, "Gmail might have redesigned."
184
185 def get_contacts(self, clear_cache=False):
186 """
187 Returns a list of lists representing all the contacts for this Gmail
188 account, in the format ['email', 'contact name'].
189 """
190 if clear_cache or not self._contacts:
191 p = self._get_page("https://gmail.google.com/gmail?view=page&name=contacts&zx=%s%s" % \
192 (self._js_version, self._get_random_int()))
193 # The returned page contains only a JavaScript data structure that
194 # looks like this:
195 # [["[email protected]","John Lennon"]
196 # ,["[email protected]","Billy Shears"]
197 # ,["[email protected]","Percy Thrillington"]
198 # ]
199 # Because this is exactly the same syntax as Python lists, we can
200 # use an eval() on it to suck it into Python. THIS IS A SECURITY
201 # RISK, THOUGH, because it blindly trusts Gmail's page isn't going
202 # to include evil Python code.
203 A=p.read()
204 self.get_info(A)
205 self._contacts = eval(A)
206 return self._contacts
207
208 def _get_page(self, url, post_data=None):
209 """
210 Helper method that gets the given URL, handling the sending and storing
211 of cookies. Returns the requested page as a file-like object in the
212 format returned by urllib2.urlopen().
213 """
214 print url
215 req = urllib2.Request(self._encode(url))
216 if post_data is not None:
217 req.add_data(self._encode(post_data))
218 req.add_header('Cookie', self._encode(self._cookies.output(attrs=[], header='').strip()))
219 req.add_header('Charset', 'utf-8')
220 f = urllib2.urlopen(req)
221 if f.headers.dict.has_key('set-cookie'):
222 self._cookies.load(f.headers.dict['set-cookie'])
223 return f
224
225 def _get_random_int(self):
226 """
227 Helper method that returns a random number suitable for Gmail's "zx"
228 query parameter, which is needed (required?) in some cases.
229 """
230 return random.randint(0, 1000000000)
231
232 def _get_message_stubs(self, folder, offset=0):
233 """
234 Helper method that retrieves the given folder from a Gmail account
235 and returns a list of Conversation objects, each representing a
236 conversation in the folder. Saves its result in a cache the first time
237 it's called.
238
239 KNOWN LIMITATION: If a folder spans more than one page, this method
240 will only return the messages on the first page.
241 """
242 if not self._folder_cache.has_key(folder):
243 html = self._get_page("http://gmail.google.com/gmail?search=%s&view=tl&start=%d&init=1&zx=%s%s" % \
244 (folder, offset, self._js_version, self._get_random_int())).read()
245
246 r = re.search('\nD\((\["ts",.*?\])\n\);', html)
247 self.ts=eval(r.groups()[0])
248 r = re.search('\nD\((\["ds",.*?\])\n\);', html)
249 self.ds=eval(r.groups()[0])
250 r = re.search('\nD\((\["qu",.*?\])\n\);', html)
251 self.qu=eval(r.groups()[0])
252 # We can use Python's eval() on the JavaScript source Gmail spits
253 # out, because it's in Python-friendly list syntax. This IS a
254 # slight security risk, of course.
255 stub_list = []
256 for match in EMAILS_RE.findall(html):
257 for msg_bits in eval(match)[1:]:
258 stub_list.append(Conversation(self, *msg_bits))
259 self._folder_cache[folder] = stub_list
260 return self._folder_cache[folder]
261
262 def _get_raw_email(self, message_id):
263 """
264 Retrieves the message with the given message ID and returns it as an
265 email.Message.Message. Saves its result in a cache the first time an
266 e-mail is retrieved.
267 """
268 if not self._message_cache.has_key(message_id):
269 f = self._get_page("http://gmail.google.com/gmail?view=om&th=%s&zx=%s%s" % \
270 (message_id, self._js_version, self._get_random_int()))
271 self._message_cache[message_id] = email.message_from_string(f.read().lstrip())
272 return self._message_cache[message_id]
273
274 def _url_quote(self, value):
275 """
276 Helper method that quotes the given value for insertion into a query
277 string. Also encodes into UTF-8, which Google uses, in case of
278 non-ASCII characters.
279 """
280 value = self._encode(value)
281 return urllib.quote_plus(value)
282
283 def _encode(self, value):
284 """
285 Helper method. Google uses UTF-8, so convert to it, in order to allow
286 non-ASCII characters.
287 """
288 if isinstance(value, unicode):
289 value = value.encode("utf-8")
290 return value
291 def Close(self):
292 p = self._get_page("http://gmail.google.com/gmail?logout")
293 p.close()
294
295 class Conversation:
296 """
297 Represents the minimal information known about an conversation from
298 scraping a Gmail folder list page and provides a way of retrieving the full
299 messages.
300 """
301 def __init__(self, client, thread_id, is_unread, is_starred, date_html,
302 authors_html, flags, subject_html, snippet_html, label_list,
303 attach_html, matching_msgid, extra_snippet):
304 self.client, self.thread_id = client, thread_id
305 self.is_unread, self.is_starred = is_unread, is_starred
306 self.date_html, self.authors_html = date_html, authors_html
307 self.flags, self.subject = flags, subject_html
308 self.snippet_html, self.label_list = snippet_html, label_list
309 self.attach_html, self.matching_msgid = attach_html, matching_msgid
310 self.extra_snippet = extra_snippet
311 self._message_id_cache = []
312
313 def __repr__(self):
314 return self.subject
315
316 def get_messages(self):
317 """
318 Returns a list of all messages in this conversation, in chronological
319 order, as email.Message.Message objects.
320 """
321 if not self._message_id_cache:
322 html = self.client._get_page("https://gmail.google.com/gmail?view=cv&search=inbox&th=%s&zx=%s%s" % \
323 (self.thread_id, self.client._js_version, self.client._get_random_int())).read()
324 message_ids = []
325 for match in MESSAGE_INFO_RE.findall(html):
326 # Note the eval(), which is a security risk.
327 message_ids.append(eval(match)[3])
328 self._message_id_cache = message_ids
329 return [self.client._get_raw_email(i) for i in self._message_id_cache]
Attached Files
To refer to attachments on a page, use attachment:filename, as shown below in the list of files. Do NOT use the URL of the [get] link, since this is subject to change and can break easily.You are not allowed to attach a file to this page.