## page was renamed from MicroProj/2008-03-11 ##language:zh #pragma section-numbers on ::-- ZoomQuiet [<>] <> ## 默许导航,请保留 <> = 断点续传&多线程下载 = [[http://groups.google.com/group/python-cn/t/f5cd716e5d8c258f|CPyUG:42890~ python 实现断点续传和多线程下载]] == C实现的 == {{{unyi sun reply-to python-cn@googlegroups.com, to python-cn@googlegroups.com, date Tue, Mar 11, 2008 at 1:20 PM subject [CPyUG:42994] Re: python 实现断点续传和多线程下载 }}}我用C写的一个下载器,支持断点续传了多线程以及服务器Redirect,用Python写的话,应该更简洁 {{{#!cplusplus /** ** description:qdown is a multithread downloader ** author:Sunjoy ** from:ICT.CAS. ** date:2007-9-10 ** */ #include #include #include #include #include #include #include #include #include #include #define MAX_THREAD 100 typedef struct URLInfo { char schema[8]; char host[256]; char host_name[256]; unsigned int port; char file[256]; }URLInfo; typedef struct Connection { int sock; URLInfo url_info; int avaliable; }Connection; typedef struct Resource { char file_url[256]; int file_size; char file_name[256]; }Resource; typedef struct ThreadArg { Resource* res; int start_pos; int limit; int no; }ThreadArg; typedef struct BreakPoint { int downloaded; int thread_amount; int tasks[MAX_THREAD][2]; }BreakPoint; pthread_mutex_t g_mut; int g_total=0; int g_downloaded=0; BreakPoint g_breakpoint; URLInfo parse_url(const char *url); Connection open_url(const char * url); Resource get_resource(const char *url); void join_url(const char* old_url,const char* redirect,char * new_url); void download(const char* url,int thread_amount,const char* file_name); void* download_part(void* args); void* monitor(void *args); void store_breakpoint(char * cfgName); void store_breakpoint(char * cfgName) { int z; FILE* f; f=fopen(cfgName,"w"); fprintf(f,"%d\n",g_breakpoint.downloaded); fprintf(f,"%d\n",g_breakpoint.thread_amount); for(z=0;z-1) p2--; else if(strcmp(stack1[i],".")){ strcpy(stack2[p2++],stack1[i]); } } //printf("##%s\n",stack2[0]); if(!strcmp(stack2[0],"")){ temp_urlinfo=parse_url(old_url); sprintf(new_url,"%s://%s:%d/",temp_urlinfo.schema,temp_urlinfo.host,temp_urlinfo.port); } else{ i=strlen(old_url)-1; while(old_url[i]!='/') i--; //printf("##%c\n",old_url[i]); strncpy(new_url,old_url,i+1); new_url[i+1]=0; } //printf("##%s\n",new_url); for(j=0;jh_addrtype,*(hptr->h_addr_list),IP,sizeof(IP)) ); } //printf("%s\n",url_info.host); return url_info; } Connection open_url(const char* url){ Connection conn; struct sockaddr_in remote_addr,local_addr; conn.avaliable=0; conn.url_info=parse_url(url); local_addr.sin_family=AF_INET; local_addr.sin_addr.s_addr=htonl(INADDR_ANY); local_addr.sin_port=htons(0); remote_addr.sin_family=AF_INET; remote_addr.sin_addr.s_addr=inet_addr(conn.url_info.host); remote_addr.sin_port=htons(conn.url_info.port); conn.sock=socket(AF_INET,SOCK_STREAM,0); if(bind(conn.sock, (struct sockaddr*)&local_addr, sizeof(local_addr))<0){ printf("bind error\n"); } if(conn.sock){ if( connect(conn.sock,(struct sockaddr*)&remote_addr,sizeof(remote_addr))!=-1 ){ conn.avaliable=1; } } return conn; } Resource get_resource(const char* url){ char pack[1024]={0}; char buf[1024]={0}; char redirect[256]={0},new_url[256]={0},old_url[256]={0}; static int redirect_count=0; char* i; char* j; char* z; Resource res; Connection conn=open_url(url); if(!conn.avaliable){ return res; } sprintf(pack,"GET %s HTTP/1.1\nHost: %s\nAccept: */*\nReferer: http://%s\nUser-Agent: Mozilla/4.0 (compatible; MSIE 5.00; Windows 98)\nPragma: no-cache\nCache-Control: no-cache\nConnection: close\n\n",conn.url_info.file,conn.url_info.host_name,conn.url_info.host_name); send(conn.sock,pack,strlen(pack),0); recv(conn.sock,buf,sizeof(buf),0); //printf("%s\n",buf); if(strstr(buf,"HTTP/1.1 404")!=NULL || strstr(buf,"HTTP/1.0 404")!=NULL){ return res; } i=(char *)strstr(buf,"Location:"); if(i!=NULL && redirect_count<5){ sscanf(i,"Location: %s",redirect); sprintf(old_url,"%s://%s:%d%s",conn.url_info.schema,conn.url_info.host_name,conn.url_info.port,conn.url_info.file); join_url(old_url,redirect,new_url); //printf("@#%s\n",new_url); redirect_count++; return get_resource(new_url); } i=(char *)strstr(buf,"Content-Length:"); if(i!=NULL){ sscanf(i,"Content-Length: %d",&res.file_size); } strcpy(res.file_url,url); //printf("#%d\n",res.file_size); for(z=(char*)url;(j=strstr(z,"/"))!=NULL;){ z=j+sizeof(char); } strcpy(res.file_name,z); close(conn.sock); return res; } void* download_part(void * args) { ThreadArg* targ=(ThreadArg*)args; Connection conn; FILE* f=NULL; char pack[1024]={0}; char buf[1024]={0}; int i=0,ct=0; char* body=NULL; //printf("%s,%d-%d\n",targ->res->file_url, targ->start_pos,targ->limit); conn=open_url(targ->res->file_url); while(!conn.avaliable){ sleep(1); conn=open_url(targ->res->file_url); } if(conn.avaliable){ f=fopen(targ->res->file_name,"rb+"); fseek(f,targ->start_pos,0); sprintf(pack,"GET %s HTTP/1.1\nHost: %s\nAccept: */*\nReferer: http://%s\nUser-Agent: Mozilla/4.0 (compatible; MSIE 5.00; Windows 98)\nRange: bytes=%d-%d\nPragma: no-cache\nCache-Control: no-cache\n\n",conn.url_info.file,conn.url_info.host_name,conn.url_info.host_name,targ->start_pos,targ->start_pos+targ->limit-1); //printf("%s",pack); begin_down: send(conn.sock,pack,strlen(pack),0); i=recv(conn.sock,buf,sizeof(buf),0); if(strstr(buf,"HTTP/1.1 206")==NULL && strstr(buf,"HTTP/1.0 206")==NULL && strstr(buf,"HTTP/1.1 200")==NULL && strstr(buf,"HTTP/1.0 200")==NULL){ sleep(2); memset(buf,0,sizeof(buf)); conn=open_url(targ->res->file_url); goto begin_down; } //printf("##%s\n",body); body=strstr(buf,"\r\n\r\n")+4; if(body!=NULL){ i=i-(body-buf); fwrite(body,sizeof(char),i,f); //printf("@@@@%x\n",buf); fflush(f); ct+=i; pthread_mutex_lock(&g_mut); g_downloaded+=i; pthread_mutex_unlock(&g_mut); while(ct< targ->limit){ i=recv(conn.sock,buf,sizeof(buf),0); if(i==0){ fclose(f); conn.avaliable=0; while(!conn.avaliable){ sleep(2); //printf("waiting...\n"); conn=open_url(targ->res->file_url); } memset(pack,0,sizeof(pack)); memset(buf,0,sizeof(buf)); sprintf(pack,"GET %s HTTP/1.1\nHost: %s\nAccept: */*\nReferer: http://%s\nUser-Agent: Mozilla/4.0 (compatible; MSIE 5.00; Windows 98)\nRange: bytes=%d-%d\nPragma: no-cache\nCache-Control: no-cache\n\n",conn.url_info.file,conn.url_info.host_name,conn.url_info.host_name,targ->start_pos+ct,targ->start_pos+targ->limit-1); f=fopen(targ->res->file_name,"rb+"); fseek(f,targ->start_pos+ct,0); goto begin_down; } fwrite(buf,sizeof(char),i,f); fflush(f); ct+=i; pthread_mutex_lock(&g_mut); g_downloaded+=i; g_breakpoint.tasks[targ->no][0]=targ->start_pos+ct; g_breakpoint.tasks[targ->no][1]=targ->limit-ct; g_breakpoint.downloaded=g_downloaded; pthread_mutex_unlock(&g_mut); } fclose(f); g_breakpoint.downloaded=g_downloaded; close(conn.sock); } } pthread_exit(NULL); } void* monitor(void* args){ float p; int i,j,z,old; FILE* f; char cfgName[256]; strcpy(cfgName,(char*)args); strcat(cfgName,".cfg"); while(1){ p=g_downloaded/(g_total+0.0); if(g_downloaded>=g_total) break; i=p*100/10; if(old!=g_downloaded){ printf("\r"); for(j=0;jMAX_THREAD) return; res=get_resource(url); if(!strcmp(res.file_url,"")) return; if(strcmp(file_name,"")) strcpy(res.file_name,file_name); if(!strcmp(res.file_name,"")) strcpy(res.file_name,"default_down"); if(res.file_size<1000000) thread_amount=1; block_size=res.file_size/thread_amount; pthread_mutex_init(&g_mut,NULL); strcpy(cfgName,res.file_name); strcat(cfgName,".cfg"); printf("downloading %s,%d bytes... \n",res.file_name,res.file_size); if(fopen(cfgName,"r")==NULL){ new_task: f=fopen(res.file_name,"wb"); if(f==NULL){ strcpy(res.file_name,"default_down"); f=fopen(res.file_name,"wb"); } fclose(f); g_total=res.file_size; for(i=0;i=3) sscanf(av[2],"%d",&thread_amount); if(ac>=4){ strcpy(file_name,av[3]); } download(av[1],thread_amount,file_name); } return 0; } }}} ##= 反馈 = [[/PageCommentData|PageCommentData]]