Python爬蟲學習筆記(十) — 儲存資料(JSON, CSV, MySQL, MongoDB)

JSON

將 python object轉JSON
json.dumps()
將JSON轉為python object
json.loads()
#案例
import json
jsondata = '''
{
"Uin":0,
"UserName":"@c482d142bc698bc3971d9f8c26335c5c",
"NickName":"小帥b",
"HeadImgUrl":"/cgi-bin/mmwebwx-bin/webwxgeticon?seq=500080&username=@c482d142bc698bc3971d9f8c26335c5c&skey=@crypt_b0f5e54e_b80a5e6dffebd14896dc9c72049712bf",
...此處省略一些.."DisplayName":"",
"ChatRoomId":0,
"KeyWord":"che",
"EncryChatRoomId":"",
"IsOwner":0
}
'''
myfriend = json.loads(jsondata)
memberList = myfriend.get('MemberList')

CSV

# 以字典的方式寫入數據到 xiaoshuaib.csv
import csv
with open('xiaoshuaib.csv', mode='w') as csv_file:
fieldnames = ['你是谁', '你几岁', '你多高']
writer = csv.DictWriter(csv_file, fieldnames=fieldnames)
writer.writeheader()
writer.writerow({'你是谁': '小帅b', '你几岁': '18岁', '你多高': '18cm'})
writer.writerow({'你是谁': '小帅c', '你几岁': '19岁', '你多高': '17cm'})
writer.writerow({'你是谁': '小帅d', '你几岁': '20岁', '你多高': '16cm'})
#Pandas讀取csv
pip install pandas
import pandas as pd
xiaoshuaib = pd.read_csv('xiaoshuaib.csv')
#Pandas存成csv
import pandas as pd
b = ['小帥b', '小帥c', '小帥d']
c = ['18歲', '19歲', '20歲']
d = ['18cm', '17cm', '16cm']
df = pd.DataFrame({'你是誰' : b, '你幾歲' : c, '你多高' : d})
df.to_csv("xsb.csv", index=False, sep=',')

MySQL

安裝MySQL#建立資料庫mysql> create database if not exists Idol;Query OK, 1 row affected (0.01 sec)#安裝pymysql
pip install pymysql
#開始寫程式
import pymysql
# 使用 connect 方法,傳入數據庫地址,賬號密碼,數據庫名就可以得到你的數據庫對象db = pymysql.connect("你的數據庫地址", "數據庫賬號",
"數據庫密碼", "Idol")
# 接著我們獲取 cursor 來操作我們的 Idol 這個數據庫
cursor = db.cursor()
# 比如我們來創建一張數據表
sql = """create table beautyGirls (
name char(20) not null,
age int)"""
cursor.execute(sql)
# 最後我們關閉這個數據庫的連接
db.close()
#插入數據資料
import pymysql
# 使用 connect 方法,傳入數據庫地址,賬號密碼,數據庫名就可以得到你的數據庫對象
db = pymysql.connect("localhost", "root", "root", "Idol")
# 接著我們獲取 cursor 來操作我們的 avIdol 這個數據庫
cursor = db.cursor()
# 插入一條記錄
sql = "insert into beautyGirls(name, age) values ('Mrs.cang', 18)"
try:
cursor.execute(sql)
db.commit()
except:
# 回滾
db.rollback()
# 最後我們關閉這個數據庫的連接
db.close()
#刪除數據資料sql = "delete from beautyGirls where age = '%d'" % (18)
try:
cursor.execute(sql)
db.commit()
except:
db.rollback()
------------------------------------------------------------------
#安裝mysqlclient
pip install mysqlclient
#建立資料庫
mysql> CREATE DATABASE xsb CHARACTER SET utf8 COLLATE utf8_general_ci;
#建立表格
mysql> CREATE TABLE xsb (name char(20), age char(20), length char(20));
Query OK, 0 rows affected (0.34 sec)
#將CSV存到資料庫
import pandas as pd
from sqlalchemy import create_engine
df = pd.read_csv('xsb.csv')# 當engine連接的時候我們就插入數據
engine = create_engine('mysql://root@localhost/xsb?charset=utf8')
with engine.connect() as conn, conn.begin():
df.to_sql('xsb', conn, if_exists='replace')

MongoDB

安裝MongoDB
# 輸入 mongo 就可以操作 MongoDB 了
from pymongo import MongoClient
conn = MongoClient('mongodb://localhost:27017/')#27017是你MongoDB的默認port
db = conn.Idol #創建一個 Idol 數據庫,如果 mongodb 沒有會自行創建
db.col.insert({"name": '王曉明', 'bwh': '{ "b": 90, "w": 59, "h": 85}' , 'age': 30})#批量插入資料
db.col.insert([
{"name": '王曉明', 'bwh': '{ "b": 90, "w": 59, "h": 85}' , 'age': 30},
{"name": '大頭', 'bwh': '{ "b": 86, "w": 58, "h": 86}' , 'age': 35},
{"name": '路易莎', 'bwh': '{ "b": 80, "w": 54, "h": 80}' , 'age': 22},
{"name": '黃河入海流', 'bwh': '{ "b": 85, "w": 56, "h": 86}' , 'age': 22},
{"name": '床前明月光', 'bwh': '{ "b": 88, "w": 57, "h": 86}' , 'age': 28}
])
#用For查詢
for item in db.col.find():
print(item)
#刪除某筆資料
db.col.remove({"name": "王曉明"})
#刪除全部
db.col.remove()
#把大頭換成小頭
db.col.update({'name': '大頭'}, {'$set': {'name': '小頭'}})

Written by

Machine Learning / Deep Learning / Python / Flutter cakeresume.com/yanwei-liu

Get the Medium app

A button that says 'Download on the App Store', and if clicked it will lead you to the iOS App store
A button that says 'Get it on, Google Play', and if clicked it will lead you to the Google Play store