Contents
导出各种邮箱联系人
weafriend <[email protected]> reply-to [email protected] to [email protected] date Mon, Sep 15, 2008 at 10:15 subject [CPyUG:65691] 分享导出邮件联系人的脚本
- hi all
分享导出邮件联系人的脚本.
MailContact1.py
1 #!/usr/bin/env python
2
3 #coding=utf-8
4
5 from BeautifulSoup import BeautifulSoup
6
7 import os,urllib,urllib2,pdb
8
9 import cookielib
10
11 import httplib
12
13 import csv,re
14
15
16
17 GDATA_URL = '/accounts/ClientLogin'
18
19
20
21 class MailContactError(Exception):
22
23 pass
24
25
26
27 class MailContact:
28
29 def __init__(self,username,password):
30
31 pass
32
33 def login(self):
34
35 pass
36
37 def get_contacts(self):
38
39 pass
40
41 def get_contact_page(self):
42
43 pass
44
45
46
47 class GMailContact(MailContact):
48
49 """
50
51 A class to retrieve a users contacts from their Google Account.
52
53
54
55 Dependencies:
56
57 -------------
58
59 * BeautifulSoup.
60
61 * That's it. :-)
62
63
64
65 Usage:
66
67 ------
68
69 >>> g = GMailContact('[email protected]', 'password')
70
71 >>> g.login()
72
73 (200, 'OK')
74
75 >>> g.get_contacts()
76
77 >>> g.contacts
78
79 [(u'Persons Name', '[email protected]'), ...]
80
81
82
83
84
85 """
86
87 def __init__(self, username='[email protected]', password='test', service='cp'):
88
89 self.mail_type="@gmail.com"
90
91 self.username = username + self.mail_type
92
93 self.password = password
94
95 self.account_type = 'HOSTED_OR_GOOGLE' # Allow both Google Domain and Gmail accounts
96
97 self.service = service # Defaults to cp (contacts)
98
99 self.source = 'google-data-import' # Our application name
100
101 self.code = '' # Empty by default, populated by self.login()
102
103 self.contacts = [] # Empty list by default, populated by self.get_contacts()
104
105
106
107 def login(self):
108
109 """
110
111 Login to Google. No arguments.
112
113 """
114
115 data = urllib.urlencode({
116
117 'accountType': self.account_type,
118
119 'Email': self.username,
120
121 'Passwd': self.password,
122
123 'service': self.service,
124
125 'source': self.source
126
127 })
128
129 headers = {
130
131 'Content-type': 'application/x-www-form-urlencoded',
132
133 'Accept': 'text/plain'
134
135 }
136
137
138
139 conn = httplib.HTTPSConnection('google.com')
140
141 conn.request('POST', GDATA_URL, data, headers)
142
143 response = conn.getresponse()
144
145 if not str(response.status) == '200':
146
147 raise GdataError("Couldn't log in. HTTP Code: %s, %s" % (response.status, response.reason))
148
149
150
151 d = response.read()
152
153
154
155 self.code = d.split("\n")[2].replace('Auth=', '')
156
157 conn.close()
158
159 return response.status, response.reason
160
161
162
163 def _request(self, max_results=200):
164
165 """
166
167 Base function for requesting the contacts. We'll allow other methods eventually
168
169 """
170
171 url = '/m8/feeds/contacts/%s/base/?max-results=%d' % (self.username, max_results)
172
173
174
175 headers = {'Authorization': 'GoogleLogin auth=%s' % self.code}
176
177
178
179 conn = httplib.HTTPConnection('www.google.com')
180
181 conn.request('GET', url, headers=headers)
182
183 response = conn.getresponse()
184
185 if not str(response.status) == '200':
186
187 raise MailContactError("Couldn't log in. HTTP Code: %s, %s" % (response.status, response.reason))
188
189
190
191 page = response.read()
192
193 conn.close()
194
195 return page
196
197
198
199 def get_contacts(self, max_results=200):
200
201 """ Parses the contacts (using BeautifulSoup) from self._request, and then populates self.contacts
202
203 """
204
205 soup = BeautifulSoup(self._request(max_results))
206
207 self.contacts = []
208
209 for entry in soup.findAll('title'):
210
211 if len(entry.parent.findAll(['gd:email', 'title'])) == 2:
212
213 s = entry.parent.findAll(['gd:email', 'title'])
214
215 self.contacts.append((s[0].string, s[1].get('address')))
216
217
218
219 return
220
221
222
223 class M126Contact(MailContact):
224
225 def __init__(self,username,password):
226
227 self.mail_type="@126.com"
228
229 self.username = username
230
231 self.password = password
232
233 self.login_host = 'entry.mail.126.com'
234
235 self.login_url = '/cgi/login?redirTempName=https.htm&hid=10010102&lightweight=1&verifycookie=1&language=0&style=-1'
236
237 self.login_data = urllib.urlencode({
238
239 'domain':'126.com',
240
241 'language':0,
242
243 'bCookie':'',
244
245 'user':self.username,
246
247 'pass':self.password,
248
249 'style':-1,
250
251 'remUser':'',
252
253 'secure':'',
254
255 'enter.x':'%B5%C7+%C2%BC'
256
257 })
258
259 self.login_headers = {
260
261 'Content-type': 'application/x-www-form-urlencoded',
262
263 'Accept': 'text/xml,text/plain',
264
265 'Refer':'http://www.126.com/'
266
267 }
268
269 self.contact_host = 'g2a10.mail.126.com'
270
271 self.contact_url = '/coremail/fcg/ldvcapp?funcid=prtsearchres&sid=%(sid)s&listnum=200&tempname=address%%2faddress.htm'
272
273
274
275
276
277 def login(self):
278
279 conn = httplib.HTTPSConnection(self.login_host)
280
281 conn.request('POST', self.login_url,self.login_data,self.login_headers)
282
283 response = conn.getresponse()
284
285 if not str(response.status) == '200':
286
287 raise MailContactError("Couldn't log in. HTTP Code: %s, %s" % (response.status, response.reason))
288
289 #sc="Coremail=aaYgsaQsvSmKa%MBgzxnddkKzjPJUTbMddRUIgVwfeiBUd; path=/; domain=.126.com"
290
291 #sid="MBgzxnddkKzjPJUTbMddRUIgVwfeiBUd"
292
293 sc = response.getheader('Set-Cookie')
294
295 if not sc or sc.find("Coremail") == -1:
296
297 #用户密码不正确
298
299 raise MailContactError("Email user %s%s password %s not correct!" % (self.username,self.mail_type,self.password))
300
301 cookie=sc.split()[0]
302
303 coremail = cookie[cookie.find('=')+1:cookie.find(';')]
304
305 sid = coremail[coremail.find('%')+1:]
306
307 self.contact_url = self.contact_url % {'sid':sid}
308
309 self.contact_headers={
310
311 'Cookie':'MAIL126_SSN=%(user)s; NETEASE_SSN=%(user)s; nts_mail_user=%(user)s; logType=df; ntes_mail_firstpage=normal; \
312
313 Coremail=%(coremail)s;mail_host=g2a14.mail.126.com; mail_sid=%(sid)s; mail_uid=%(user)s@126.com; \
314
315 mail_style=dm3; oulink_h=520; ntes_mail_noremember=true' % {'user':self.username,'coremail':coremail,'sid':sid}
316
317 }
318
319 conn.close()
320
321
322
323 def get_contact_page(self):
324
325 conn = httplib.HTTPConnection(self.contact_host)
326
327 conn.request('GET',self.contact_url,headers=self.contact_headers)
328
329 response = conn.getresponse()
330
331 if not str(response.status) == '200':
332
333 raise MailContactError("Couldn't getc contact page. HTTP Code: %s, %s" % (response.status, response.reason))
334
335 page = response.read()
336
337 conn.close()
338
339 return page
340
341
342
343 def get_contacts(self):
344
345 page = self.get_contact_page()
346
347 self.contacts = []
348
349 soup = BeautifulSoup(page)
350
351 xmps = soup.findAll('xmp')
352
353 for x in xmps:
354
355 if x['id'].startswith('t'):
356
357 self.contacts.append((x.contents[0],x.space.string))
358
359
360
361 class M163Contact(MailContact):
362
363 def __init__(self,username,password):
364
365 self.mail_type="@163.com"
366
367 self.username = username
368
369 self.password = password
370
371 self.contacts = []
372
373 self.login_host = 'reg.163.com'
374
375 self.login_url = '/logins.jsp?type=1&url=http://fm163.163.com/coremail/fcg/ntesdoor2?lightweight=1&verifycookie=1&language=-1&style=-1'
376
377
378
379 self.login_data = urllib.urlencode({
380
381 'verifycookie':1,
382
383 'style':-1,
384
385 'product':'mail163',
386
387 'username':self.username,
388
389 'password':self.password,
390
391 'selType':-1,
392
393 'remUser':'',
394
395 'secure':'on'
396
397 })
398
399 self.login_headers = {
400
401 'Content-type': 'application/x-www-form-urlencoded',
402
403 'Accept': 'text/xml,text/plain',
404
405 'Refer':'http://mail.163.com/'
406
407 }
408
409 self.contact_host = 'g2a10.mail.163.com'
410
411
412
413
414
415 def login(self):
416
417 conn = httplib.HTTPSConnection(self.login_host)
418
419 conn.request('POST', self.login_url,self.login_data,self.login_headers)
420
421 response = conn.getresponse()
422
423 if not str(response.status) == '200':
424
425 raise MailContactError("Couldn't log in. HTTP Code: %s, %s" % (response.status, response.reason))
426
427
428
429 sc1 = response.getheader('Set-Cookie')
430
431 '''
432
433 Set-Cookie: NTES_SESS=ohAWkiyj.OCjHdh1BK4ToxPcUvFX2fSLaN3FaU0cRInzLoieELdifjyqnBdk4C8qWIZkirZ7.JF.IPFDuR7BcAtKL; domain=.163.com; path=/
434
435 Set-Cookie: NETEASE_SSN=weafriend; domain=.163.com; path=/; expires=Mon, 08-Jun-2009 10:42:26 GMT
436
437 Set-Cookie: NETEASE_ADV=11&24&1212921746999; domain=.163.com; path=/; expires=Mon, 08-Jun-2009 10:42:26 GMT
438
439 '''
440
441 ntes_sess,ntes_adv = None,None
442
443 for s in sc1.split():
444
445 if s.startswith('NTES_SESS'):
446
447 ntes_sess=s[s.find('=')+1:s.find(';')]
448
449 elif s.startswith('NETEASE_ADV'):
450
451 ntes_adv=s[s.find('=')+1:s.find(';')]
452
453 if not ntes_sess or not ntes_adv:
454
455 #用户密码不正确
456
457 raise MailContactError("Email user %s%s password %s not correct!" % (self.username,self.mail_type,self.password))
458
459
460
461 url = '/coremail/fcg/ntesdoor2?lightweight=1&verifycookie=1&language=-1&style=-1&username=weafriend'
462
463 headers = {'cookie':sc1}
464
465 conn = httplib.HTTPConnection('fm163.163.com')
466
467 conn.request('GET',url,{},headers)
468
469 response = conn.getresponse()
470
471 sc2 = response.getheader('Set-Cookie')
472
473 coremail = sc2[sc2.find('=')+1:sc2.find(';')]
474
475 sid = coremail[coremail.find('%')+1:]
476
477 self.contact_url = '/coremail/fcg/ldvcapp?funcid=prtsearchres&sid=' + sid +'&listnum=200&tempname=address%2faddress.htm'
478
479
480
481
482
483 self.contact_headers = {
484
485 'Cookie':'MAIL163_SSN=%(user)s; vjlast=1212911118; vjuids=-99d7a91f6.1156a6ea3cd.0.9e6d0e6f029e78; \
486
487 _ntes_nuid=7118c6a1c9d16ee59a045a2e66186af8; NTES_adMenuNum=3; \
488
489 _ntes_nnid=7118c6a1c9d16ee59a045a2e66186af8,0|www|urs|163mail|news|ent|sports|digi|lady|tech|stock|travel|music|2008|;\
490
491 NTES_UFC=9110001100010000000000000000000000100000000000000002331026300000; logType=-1; nts_mail_user=weafriend:-1:1; \
492
493 Province=010; _ntes_nvst=1212911122953,|www|urs|; Coremail=%(coremail)s; \
494
495 wmsvr_domain=g1a109.mail.163.com; ntes_mail_truename=; ntes_mail_province=; ntes_mail_sex=; mail_style=js3; \
496
497 mail_host=g1a109.mail.163.com; mail_sid=%(sid)s; USERTRACK=58.31.69.214.1212911333143304; \
498
499 ntes_mail_firstpage=normal; NTES_SESS=%(ntes_sess)s; \
500
501 NETEASE_SSN=%(user)s; NETEASE_ADV=%(ntes_adv)s' % {'user':self.username,'coremail':coremail,'sid':sid,'ntes_sess':ntes_sess,'ntes_adv':ntes_adv}
502
503 }
504
505 return True
506
507
508
509
510
511
512
513 def get_contact_page(self):
514
515 conn = httplib.HTTPConnection(self.contact_host)
516
517 conn.request('GET',self.contact_url,headers=self.contact_headers)
518
519 response = conn.getresponse()
520
521 if not str(response.status) == '200':
522
523 raise MailContactError("Couldn't getc contact page. HTTP Code: %s, %s" % (response.status, response.reason))
524
525 page = response.read()
526
527 conn.close()
528
529 return page
530
531
532
533 def get_contacts(self):
534
535 page = self.get_contact_page()
536
537 soup = BeautifulSoup(page)
538
539 xmps = soup.findAll('xmp')
540
541 for x in xmps:
542
543 if x['id'].startswith('t'):
544
545 self.contacts.append((x.contents[0],x.space.string))
546
547
548
549
550
551
552
553
554
555 class SohuContact(MailContact):
556
557 def __init__(self,username,password):
558
559 self.mail_type="@sohu.com"
560
561 self.username = username
562
563 self.password = password
564
565 self.contacts = []
566
567 self.login_host = 'passport.sohu.com'
568
569 self.login_url = 'http://passport.sohu.com/login.jsp'
570
571 self.login_data = urllib.urlencode({
572
573 'loginid':self.username+self.mail_type,
574
575 'passwd':self.password,
576
577 'sg':'5175b065623bb194e85903f5e8c43386',
578
579 'eru':'http://login.mail.sohu.com/login.php',
580
581 'ru':'http://login.mail.sohu.com/login_comm.php',
582
583 'appid':1000,
584
585 'fl':'1',
586
587 'ct':1126084880,
588
589 'vr':'1|1'
590
591 })
592
593 self.login_headers = {
594
595 'User-agent':'Opera/9.23',
596
597 'Content-type': 'application/x-www-form-urlencoded',
598
599 'Accept': 'text/xml,text/plain'
600
601 }
602
603 opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cookielib.CookieJar()))
604
605 urllib2.install_opener(opener)
606
607 self.contact_host = 'www50.mail.sohu.com'
608
609 self.contact_url = '/webapp/contact'
610
611
612
613 def login(self):
614
615 req = urllib2.Request(self.login_url,self.login_data)
616
617 conn = urllib2.urlopen(req)
618
619 self.contact_url = os.path.dirname(conn.geturl())+'/contact'
620
621
622
623 def get_contacts(self):
624
625 req = urllib2.Request(self.contact_url)
626
627 conn = urllib2.urlopen(req)
628
629 buf = conn.readlines()
630
631 import simplejson
632
633 info = simplejson.loads(buf[0])
634
635 for i in info['listString']:
636
637 self.contacts.append((i['name'],i['email']))
638
639
640
641 class HotmailContact(MailContact):
642
643 def __init__(self,username,password):
644
645 self.mail_type="@hotmail.com"
646
647 self.username = username
648
649 self.password = password
650
651 self.contacts = []
652
653 self.login_host = 'login.live.com'
654
655 self.login_url = '/ppsecure/post.srf?id=2'
656
657 self.login_data = urllib.urlencode({
658
659 'login':self.username+self.mail_type,
660
661 'passwd':self.password,
662
663 'PPSX':'Pass',
664
665 'LoginOption':2,
666
667 'PwdPad':'IfYouAreReadingThisYouHaveTooMuchFreeTime'[0:-len(self.password)],
668
669 'PPFT':'B1S2dWnsGTFLpX9h8fxfE*ym5OABStpt0fjo%21YICXQOy1b%21xP4dRx8F1h1w6tR8ZyLP4h3TYGS8gSZGku3j7CxQ4poqr'
670
671 })
672
673 self.login_headers = {
674
675 'Content-type': 'application/x-www-form-urlencoded',
676
677 'Accept': 'text/xml,text/plain',
678
679 'Cookie': 'CkTst=G1213457870062; MobileProf=2AV3mTOwJEE8smIfIyq69wbCn08y6UX7910BtLhqTto2MYrNSBW5hhlEuGlMJdMwwGq1WcxtENCAI1JSyTNfrS23ArFLxDjBNk!xtbIj0iglbu8DQVg9TnSTPtHj975deR; MUID=C2DC0F9324AA47DCB05CE14B989D89C2; ANON=A=E81AEA51F927860B07BBA712FFFFFFFF&E=69f&W=2; s_lastvisit=1213455335875; MH=MSFT; wlidperf=throughput=2087.201125175809&latency=1.422; MSPRequ=lt=1213455763&co=1&id=2; MSPOK=uuid-d75c4c53-1b6e-433c-af95-c3c0175a48cd; CkTst=G1213455761093; [email protected]; MSPCID=0f45e10de2ad38c9; NAP=V=1.7&E=6b4&C=bKkGf4IbC96JLFhsoKyccKm1Kf7jjhX5I3C1ofjvyMoY3iI9j0b6gg&W=2; MSPSoftVis=@:@; BrowserSense=Win=1&Downlevel=0&WinIEOnly=0&Firefox=1&FirefoxVersion=2.0; mktstate=U=&E=en-us; mkt1=norm=en-us; s_cc=true; s_sq=%5B%5BB%5D%5D; MSPP3RD=3688532421',
680
681 'Referer': 'https://login.live.com/ppsecure/post.srf?id=2&bk=1213455763'
682
683 }
684
685
686
687 self.contact_host = 'by120w.bay120.mail.live.com'
688
689 self.contact_url = '/mail/GetContacts.aspx'
690
691
692
693 def getInputValue(self,name,content):
694
695 pass
696
697 def login(self):
698
699 #登录过程见http://blog.jiexoo.com/2008/05/21/%e7%94%a8httpclient%e8%8e%b7%e5%8f%96hotmail%e8%81%94%e7%b3%bb%e4%ba%ba%e5%88%97%e8%a1%a8/
700
701 conn = httplib.HTTPSConnection(self.login_host)
702
703 conn.request('GET','login.srf?id=2')
704
705 response = conn.getresponse()
706
707
708
709 conn = httplib.HTTPSConnection(self.login_host)
710
711 conn.request('POST', self.login_url,self.login_data,self.login_headers)
712
713 response = conn.getresponse()
714
715 if not str(response.status) == '200':
716
717 raise MailContactError("Couldn't getc contact page. HTTP Code: %s, %s" % (response.status, response.reason))
718
719 page = response.read()
720
721 print page
722
723
724
725
726
727 def get_contacts(self):
728
729 conn = httplib.HTTPConnection(self.contact_host)
730
731 conn.request('GET',self.contact_url)
732
733 response = conn.getresponse()
734
735 if not str(response.status) == '200':
736
737 raise MailContactError("Couldn't getc contact page. HTTP Code: %s, %s" % (response.status, response.reason))
738
739 page = response.read()
740
741 conn.close()
742
743 print page
744
745
746
747 class SinaContact(MailContact):
748
749 pass
750
751
752
753
754
755
756
757 class YahooContact(MailContact):
758
759 pass
760
761
762
763 class MsnContact(MailContact):
764
765 pass
766
767
768
769 def get_mailcontact(user,password,mailtype):
770
771 if mailtype == "126.com":
772
773 g = M126Contact(user,password)
774
775 elif mailtype == "163.com":
776
777 g = M163Contact(user,password)
778
779 elif mailtype == "sohu.com":
780
781 g = SohuContact(user,password)
782
783 elif mailtype == "hotmail.com":
784
785 g = HotmailContact(user,password)
786
787 elif mailtype == "sina.com":
788
789 g = SinaContact(user,password)
790
791 elif mailtype == "gmail.com":
792
793 g = GMailContact(user,password)
794
795 try:
796
797 g.login()
798
799 g.get_contacts()
800
801 return g.contacts
802
803 except:
804
805 return []
806
807
808
809
810
811
812
813
814
815
816
817 def get_csvcontact(iter):
818
819 contact,name = [],None
820
821 reader = csv.reader(iter)
822
823 for r in reader:
824
825 for c in r:
826
827 if not c or not len(c.strip()):
828
829 continue
830
831 m=re.search('\w+@\w+(?:\.\w+)+',c)
832
833 if m:
834
835 print name,m.group(0)
836
837 contact.append((name,m.group(0)))
838
839 break
840
841 else:
842
843 name = c
844
845 return contact
846
847
848
849 def get_imcontact(iter):
850
851 contact = []
852
853 reader = csv.reader(iter)
854
855 for r in reader:
856
857 for c in r:
858
859 m=re.search('\w+@\w+(?:\.\w+)+',c)
860
861 if m:
862
863 print m
864
865 contact.append((m))
866
867 return contact
868
869
870
871 if __name__=='__main__':
872
873 pdb.set_trace()
874
875 httplib.HTTPSConnection.debuglevel=1
876
877 httplib.HTTPConnection.debuglevel=1
878
879 g = GMailContact('***', '***')
880
881 g.login()
882
883 g.get_contacts()
884
885 print g.contacts
886
887
888
889 g = M163ContactContact('***', '***')
890
891 g.login()
892
893 g.get_contacts()
894
895 print g.contacts
反馈