用Python下載檔案
6 min readMay 5, 2019
使用requests
import requests
url = ‘https://www.python.org/static/img/python-logo@2x.png'
myfile = requests.get(url)
open(‘c:/users/LikeGeeks/downloads/PythonImage.png’, ‘wb’).write(myfile.content)
使用wget
pip install wgetimport wgeturl = “https://www.python.org/static/img/python-logo@2x.png"wget.download(url, ‘c:/users/LikeGeeks/downloads/pythonLogo.png’)
下載重新導向的檔案
import requests
url = 'insert url'
response = requests.get(url, allow_redirects=True)
with open('filename.pdf') as pypdf:
pypdf.write(response.content)
區塊下載大檔案
import requests
r = requests.get(url, Stream=True)
with open("filename.pdf",'wb') as pypdf:
for chunk in r.iter_content(chunk_size=1024)
if chunk:
pypdf.write(ch)
下載多個檔案 (平行/大量下載)
import osimport requestsfrom time import timefrom multiprocessing.pool import ThreadPool
def url_response(url):
path, url = url
r = requests.get(url, stream = True)
with open(path, ‘wb’) as f:
for ch in r:
f.write(ch)urls = [(“Event1”, “https://www.python.org/events/python-events/805/"),(“Event2”, “https://www.python.org/events/python-events/801/"),(“Event3”, “https://www.python.org/events/python-events/790/"),(“Event4”, “https://www.python.org/events/python-events/798/"),(“Event5”, “https://www.python.org/events/python-events/807/"),(“Event6”, “https://www.python.org/events/python-events/807/"),(“Event7”, “https://www.python.org/events/python-events/757/"),(“Event8”, “https://www.python.org/events/python-user-group/816/")]start = time()ThreadPool(9).imap_unordered(url_response, urls)
下載時顯示進度條(tqdm)
### pip install tqdmimport requests as req
from tqdm import tqdmurl = 'http://swf.com.tw/scrap/img/IR.png'def download(url):
filename = url.split('/')[-1]
r = req.get(url, stream=True) with open(filename, 'wb') as f:
for data in tqdm(r.iter_content(1024)):
f.write(data) return filenamedownload(url)
用urllib下載網站
urllib.request.urlretrieve(‘url’, ‘path’)urllib.request.urlretrieve(‘https://www.python.org/', ‘c:/users/LikeGeeks/documents/PythonOrganization.html’)
透過proxy下載
myProxy = urllib.request.ProxyHandler({‘http’: ‘127.0.0.2’})openProxy = urllib.request.build_opener(myProxy)urllib.request.urlretrieve(‘https://www.python.org/')
import requestsmyProxy = { ‘http’: ‘http://127.0.0.2:3001' }requests.get(“https://www.python.org/", proxies=myProxy)
下載Google drive檔案
pip install googledrivedownloaderfrom google_drive_downloader import GoogleDriveDownloader as gdgd.download_file_from_google_drive(file_id=’0B7XV2PwnZyfNalJ6cFd6dXBrckE’,dest_path=’./data/2ndHalfJava.zip’, unzip=True)
下載Youtube影片
pip install pytubeInstall pytubefrom pytube import YouTubeYouTube(‘https://www.youtube.com/watch?v=himEMfYQJ1w').streams.first().download()streams = yt.streams.all()