::-- ZoomQuiet [2008-03-11 09:58:19]
Contents
1. 断点续传&多线程下载
CPyUG:42890~ python 实现断点续传和多线程下载
1.1. C实现的
{{{unyi sun <ccnusjy@gmail.com> reply-to python-cn@googlegroups.com, to python-cn@googlegroups.com, date Tue, Mar 11, 2008 at 1:20 PM subject [CPyUG:42994] Re: python 实现断点续传和多线程下载
}}}我用C写的一个下载器,支持断点续传了多线程以及服务器Redirect,用Python写的话,应该更简洁
Toggle line numbers
1 /**
2 ** description:qdown is a multithread downloader
3 ** author:Sunjoy
4 ** from:ICT.CAS.
5 ** date:2007-9-10
6 **
7 */
8
9 #include <stdio.h>
10 #include <stdlib.h>
11 #include <unistd.h>
12 #include <string.h>
13 #include <pthread.h>
14 #include <sys/types.h>
15 #include <sys/socket.h>
16 #include <netinet/in.h>
17 #include <arpa/inet.h>
18 #include <netdb.h>
19 #define MAX_THREAD 100
20
21 typedef struct URLInfo
22 {
23 char schema[8];
24 char host[256];
25 char host_name[256];
26 unsigned int port;
27 char file[256];
28 }URLInfo;
29
30 typedef struct Connection
31 {
32 int sock;
33 URLInfo url_info;
34 int avaliable;
35 }Connection;
36
37 typedef struct Resource
38 {
39 char file_url[256];
40 int file_size;
41 char file_name[256];
42 }Resource;
43
44 typedef struct ThreadArg
45 {
46 Resource* res;
47 int start_pos;
48 int limit;
49 int no;
50 }ThreadArg;
51
52 typedef struct BreakPoint
53 {
54 int downloaded;
55 int thread_amount;
56 int tasks[MAX_THREAD][2];
57
58 }BreakPoint;
59
60 pthread_mutex_t g_mut;
61 int g_total=0;
62 int g_downloaded=0;
63 BreakPoint g_breakpoint;
64
65 URLInfo parse_url(const char *url);
66 Connection open_url(const char * url);
67 Resource get_resource(const char *url);
68 void join_url(const char* old_url,const char* redirect,char * new_url);
69 void download(const char* url,int thread_amount,const char* file_name);
70 void* download_part(void* args);
71 void* monitor(void *args);
72 void store_breakpoint(char * cfgName);
73
74 void store_breakpoint(char * cfgName)
75 {
76 int z;
77 FILE* f;
78 f=fopen(cfgName,"w");
79 fprintf(f,"%d\n",g_breakpoint.downloaded);
80 fprintf(f,"%d\n",g_breakpoint.thread_amount);
81 for(z=0;z<g_breakpoint.thread_amount;z++){
82 fprintf(f,"%d-%d\n",g_breakpoint.tasks[z][0],g_breakpoint.tasks[z][1]);
83 }
84 fclose(f);
85 }
86
87 void join_url(const char* old_url,const char* redirect,char * new_url)
88 {
89 char stack1[256][256]={0},stack2[256][256]={0};
90 int i=0,j=0,p1=0,p2=0;
91 char seg[256]={0};
92 URLInfo temp_urlinfo;
93
94 memset(new_url,0,sizeof(new_url));
95 if(strstr(redirect,"://")!=NULL){
96 strcpy(new_url,redirect);
97 }
98 else{
99 while(1){
100 while(redirect[i]!='/' && redirect[i]!=0){
101 seg[j++]=redirect[i++];
102 }
103 strcpy(stack1[p1++],seg);
104 memset(seg,0,sizeof(seg));
105 j=0;
106 if(redirect[i]==0)
107 break;
108 i++;
109 }
110 for(i=0;i<p1;i++){
111 if(!strcmp(stack1[i],"..") && p2>-1)
112 p2--;
113 else if(strcmp(stack1[i],".")){
114 strcpy(stack2[p2++],stack1[i]);
115 }
116 }
117 //printf("##%s\n",stack2[0]);
118
119 if(!strcmp(stack2[0],"")){
120 temp_urlinfo=parse_url(old_url);
121 sprintf(new_url,"%s://%s:%d/",temp_urlinfo.schema,temp_urlinfo.host,temp_urlinfo.port);
122 }
123 else{
124 i=strlen(old_url)-1;
125 while(old_url[i]!='/')
126 i--;
127 //printf("##%c\n",old_url[i]);
128 strncpy(new_url,old_url,i+1);
129 new_url[i+1]=0;
130 }
131 //printf("##%s\n",new_url);
132 for(j=0;j<p2-1;j++){
133 strcat(new_url,stack2[j]);
134 strcat(new_url,"/");
135 }
136 strcat(new_url,stack2[p2-1]);
137 }
138 }
139
140 URLInfo parse_url(const char* url){
141 int i=0,j=0;
142 char schema[8]={0};
143 char host[256]={0};
144 char port[8]={0};
145 char file[256]={0};
146 char IP[32]={0};
147 URLInfo url_info;
148 struct hostent* hptr;
149
150 while(url[i]!=':'){
151 schema[j++]=url[i++];
152 }
153
154 for(i+=3,j=0;url[i]!=':' && url[i]!='/' && url[i]!=0;){
155 host[j++]=url[i++];
156 }
157
158 if(url[i]==':'){
159 for(i+=1,j=0;url[i]!='/';){
160 port[j++]=url[i++];
161 }
162 sscanf(port,"%d",&url_info.port);
163 }
164 else{
165 url_info.port=80;
166 }
167
168 if(url[i]!=0){
169 for(j=0;url[i]!=0;){
170 file[j++]=url[i++];
171 }
172 }
173 else{
174 file[0]='/';
175 }
176
177 strcpy(url_info.schema,schema);
178 strcpy(url_info.file,file);
179 strcpy(url_info.host_name,host);
180 hptr=gethostbyname(host);
181
182 if(hptr!=NULL){
183 strcpy(url_info.host,
184 inet_ntop(hptr->h_addrtype,*(hptr->h_addr_list),IP,sizeof(IP))
185 );
186 }
187 //printf("%s\n",url_info.host);
188 return url_info;
189 }
190 Connection open_url(const char* url){
191 Connection conn;
192 struct sockaddr_in remote_addr,local_addr;
193
194 conn.avaliable=0;
195 conn.url_info=parse_url(url);
196
197 local_addr.sin_family=AF_INET;
198 local_addr.sin_addr.s_addr=htonl(INADDR_ANY);
199 local_addr.sin_port=htons(0);
200 remote_addr.sin_family=AF_INET;
201 remote_addr.sin_addr.s_addr=inet_addr(conn.url_info.host);
202 remote_addr.sin_port=htons(conn.url_info.port);
203
204 conn.sock=socket(AF_INET,SOCK_STREAM,0);
205 if(bind(conn.sock,
206 (struct sockaddr*)&local_addr,
207 sizeof(local_addr))<0){
208 printf("bind error\n");
209 }
210
211
212
213 if(conn.sock){
214 if(
215 connect(conn.sock,(struct sockaddr*)&remote_addr,sizeof(remote_addr))!=-1
216 ){
217 conn.avaliable=1;
218 }
219 }
220
221 return conn;
222 }
223
224 Resource get_resource(const char* url){
225 char pack[1024]={0};
226 char buf[1024]={0};
227 char redirect[256]={0},new_url[256]={0},old_url[256]={0};
228 static int redirect_count=0;
229 char* i;
230 char* j;
231 char* z;
232 Resource res;
233
234 Connection conn=open_url(url);
235 if(!conn.avaliable){
236 return res;
237 }
238 sprintf(pack,"GET %s HTTP/1.1\nHost: %s\nAccept: */*\nReferer: http://%s\nUser-Agent: Mozilla/4.0 (compatible; MSIE 5.00; Windows 98)\nPragma: no-cache\nCache-Control: no-cache\nConnection: close\n\n",conn.url_info.file,conn.url_info.host_name,conn.url_info.host_name);
239 send(conn.sock,pack,strlen(pack),0);
240 recv(conn.sock,buf,sizeof(buf),0);
241 //printf("%s\n",buf);
242 if(strstr(buf,"HTTP/1.1 404")!=NULL || strstr(buf,"HTTP/1.0 404")!=NULL){
243 return res;
244 }
245 i=(char *)strstr(buf,"Location:");
246 if(i!=NULL && redirect_count<5){
247 sscanf(i,"Location: %s",redirect);
248 sprintf(old_url,"%s://%s:%d%s",conn.url_info.schema,conn.url_info.host_name,conn.url_info.port,conn.url_info.file);
249 join_url(old_url,redirect,new_url);
250 //printf("@#%s\n",new_url);
251 redirect_count++;
252 return get_resource(new_url);
253 }
254 i=(char *)strstr(buf,"Content-Length:");
255 if(i!=NULL){
256 sscanf(i,"Content-Length: %d",&res.file_size);
257 }
258 strcpy(res.file_url,url);
259 //printf("#%d\n",res.file_size);
260 for(z=(char*)url;(j=strstr(z,"/"))!=NULL;){
261 z=j+sizeof(char);
262 }
263 strcpy(res.file_name,z);
264 close(conn.sock);
265 return res;
266 }
267
268 void* download_part(void * args)
269 {
270 ThreadArg* targ=(ThreadArg*)args;
271 Connection conn;
272 FILE* f=NULL;
273 char pack[1024]={0};
274 char buf[1024]={0};
275 int i=0,ct=0;
276 char* body=NULL;
277 //printf("%s,%d-%d\n",targ->res->file_url, targ->start_pos,targ->limit);
278 conn=open_url(targ->res->file_url);
279 while(!conn.avaliable){
280 sleep(1);
281 conn=open_url(targ->res->file_url);
282 }
283 if(conn.avaliable){
284
285 f=fopen(targ->res->file_name,"rb+");
286 fseek(f,targ->start_pos,0);
287 sprintf(pack,"GET %s HTTP/1.1\nHost: %s\nAccept: */*\nReferer: http://%s\nUser-Agent: Mozilla/4.0 (compatible; MSIE 5.00; Windows 98)\nRange: bytes=%d-%d\nPragma: no-cache\nCache-Control: no-cache\n\n",conn.url_info.file,conn.url_info.host_name,conn.url_info.host_name,targ->start_pos,targ->start_pos+targ->limit-1);
288 //printf("%s",pack);
289 begin_down:
290 send(conn.sock,pack,strlen(pack),0);
291 i=recv(conn.sock,buf,sizeof(buf),0);
292
293 if(strstr(buf,"HTTP/1.1 206")==NULL && strstr(buf,"HTTP/1.0 206")==NULL && strstr(buf,"HTTP/1.1 200")==NULL && strstr(buf,"HTTP/1.0 200")==NULL){
294 sleep(2);
295 memset(buf,0,sizeof(buf));
296 conn=open_url(targ->res->file_url);
297 goto begin_down;
298 }
299 //printf("##%s\n",body);
300 body=strstr(buf,"\r\n\r\n")+4;
301 if(body!=NULL){
302 i=i-(body-buf);
303 fwrite(body,sizeof(char),i,f);
304 //printf("@@@@%x\n",buf);
305 fflush(f);
306 ct+=i;
307 pthread_mutex_lock(&g_mut);
308 g_downloaded+=i;
309 pthread_mutex_unlock(&g_mut);
310
311 while(ct< targ->limit){
312 i=recv(conn.sock,buf,sizeof(buf),0);
313 if(i==0){
314 fclose(f);
315 conn.avaliable=0;
316 while(!conn.avaliable){
317 sleep(2);
318 //printf("waiting...\n");
319 conn=open_url(targ->res->file_url);
320 }
321 memset(pack,0,sizeof(pack));
322 memset(buf,0,sizeof(buf));
323 sprintf(pack,"GET %s HTTP/1.1\nHost: %s\nAccept: */*\nReferer: http://%s\nUser-Agent: Mozilla/4.0 (compatible; MSIE 5.00; Windows 98)\nRange: bytes=%d-%d\nPragma: no-cache\nCache-Control: no-cache\n\n",conn.url_info.file,conn.url_info.host_name,conn.url_info.host_name,targ->start_pos+ct,targ->start_pos+targ->limit-1);
324 f=fopen(targ->res->file_name,"rb+");
325 fseek(f,targ->start_pos+ct,0);
326 goto begin_down;
327 }
328
329 fwrite(buf,sizeof(char),i,f);
330 fflush(f);
331 ct+=i;
332 pthread_mutex_lock(&g_mut);
333 g_downloaded+=i;
334 g_breakpoint.tasks[targ->no][0]=targ->start_pos+ct;
335 g_breakpoint.tasks[targ->no][1]=targ->limit-ct;
336 g_breakpoint.downloaded=g_downloaded;
337 pthread_mutex_unlock(&g_mut);
338 }
339 fclose(f);
340 g_breakpoint.downloaded=g_downloaded;
341 close(conn.sock);
342 }
343 }
344 pthread_exit(NULL);
345 }
346 void* monitor(void* args){
347 float p;
348 int i,j,z,old;
349 FILE* f;
350 char cfgName[256];
351 strcpy(cfgName,(char*)args);
352 strcat(cfgName,".cfg");
353
354 while(1){
355 p=g_downloaded/(g_total+0.0);
356 if(g_downloaded>=g_total)
357 break;
358 i=p*100/10;
359 if(old!=g_downloaded){
360
361
362 printf("\r");
363 for(j=0;j<i;j++){
364 printf("==");
365 }
366 printf("%2.0f%%",p*100);
367 fflush(stdout);
368
369 store_breakpoint(cfgName);
370 old=g_downloaded;
371 }
372 }
373 printf("\r====================100%%\n");
374 remove(cfgName);
375 pthread_exit(NULL);
376 }
377
378
379 void download(const char* url,int thread_amount,const char* file_name)
380 {
381 ThreadArg targs[MAX_THREAD];
382 pthread_attr_t * thAttr = NULL;
383 pthread_t tids[MAX_THREAD],monitor_id,controler_id;
384 Resource res;
385 int i,block_size,t_start_pos,t_limit;
386 FILE* f;
387 char cfgName[256]={0};
388
389 if(thread_amount>MAX_THREAD)
390 return;
391 res=get_resource(url);
392
393 if(!strcmp(res.file_url,""))
394 return;
395
396 if(strcmp(file_name,""))
397 strcpy(res.file_name,file_name);
398
399 if(!strcmp(res.file_name,""))
400 strcpy(res.file_name,"default_down");
401
402 if(res.file_size<1000000)
403 thread_amount=1;
404
405 block_size=res.file_size/thread_amount;
406 pthread_mutex_init(&g_mut,NULL);
407
408 strcpy(cfgName,res.file_name);
409 strcat(cfgName,".cfg");
410 printf("downloading %s,%d bytes... \n",res.file_name,res.file_size);
411
412 if(fopen(cfgName,"r")==NULL){
413 new_task:
414 f=fopen(res.file_name,"wb");
415 if(f==NULL){
416 strcpy(res.file_name,"default_down");
417 f=fopen(res.file_name,"wb");
418 }
419 fclose(f);
420 g_total=res.file_size;
421
422 for(i=0;i<thread_amount;i++){
423 targs[i].res=&res;
424 targs[i].start_pos=block_size*i;
425 targs[i].limit=block_size;
426 if(i==thread_amount-1)
427 targs[i].limit+= (res.file_size%thread_amount);
428
429 targs[i].no=i;
430 g_breakpoint.tasks[i][0]=targs[i].start_pos;
431 g_breakpoint.tasks[i][1]=block_size;
432 pthread_create(&tids[i], thAttr, download_part, (void *)&targs[i]);
433 }
434
435 }
436 else{
437 f=fopen(cfgName,"r");
438 if(fscanf(f,"%d",&g_downloaded)==-1)
439 goto new_task;
440 //printf("#%d\n",g_downloaded);
441 g_total=res.file_size;
442 fscanf(f,"%d",&thread_amount);
443 for(i=0;i<thread_amount;i++){
444 fscanf(f,"%d-%d",&t_start_pos,&t_limit);
445 targs[i].res=&res;
446 targs[i].start_pos=t_start_pos;
447 targs[i].limit=t_limit;
448 targs[i].no=i;
449 g_breakpoint.tasks[i][0]=targs[i].start_pos;
450 g_breakpoint.tasks[i][1]=t_limit;
451 pthread_create(&tids[i], thAttr, download_part, (void *)&targs[i]);
452 }
453 fclose(f);
454 }
455
456 pthread_create(&monitor_id,NULL,monitor,(void *)res.file_name);
457 g_breakpoint.thread_amount=thread_amount;
458 g_breakpoint.downloaded=g_downloaded;
459 //printf("#%d\n",g_downloaded);
460 /*for(i=0;i<thread_amount;i++){
461 pthread_join(tids[i],NULL);
462 }*/
463
464 pthread_join(monitor_id,NULL);
465 }
466
467
468
469 int main (int ac, char * av[])
470 {
471 int thread_amount=5;
472 char file_name[256]={0};
473 if(ac<2){
474 printf("usage: qdown URL [thread_amount] [save as]\n");
475 printf("example: qdown http://www.baidu.com/img/logo.gif 5 /home/sunjoy/log.gif\n");
476 }
477 else{
478 if(ac>=3)
479 sscanf(av[2],"%d",&thread_amount);
480 if(ac>=4){
481 strcpy(file_name,av[3]);
482 }
483 download(av[1],thread_amount,file_name);
484
485 }
486
487 return 0;
488 }