用Python分析元旦旅游景点,告诉你哪些地方性价比较高
元旦去哪里玩
数据获取
去哪儿网数据采集相对简单,找到真实url后,构造参数拼接,用request请求到json数据,以追加模式将数据存储为csv文件即可。
import requests import random from time import sleep import csv import pandas as pd from fake_useragent import UserAgent def get_data(keyword, page): ua = UserAgent(verify_ssl=False) headers = { "User-Agent": ua.random} url = f'http://piao.qunar.com/ticket/list.json?keyword={keyword}®ion=&from=mpl_search_suggest&page={page}' res = requests.request("GET", url, headers=headers) sleep(random.uniform(1, 2)) try: res_json = res.json() sight_List = res_json['data']['sightList'] print(sight_List) except: pass if __name__ == '__main__': keyword = "厦门" for page in range(1, 100): #控制页数 print(f"正在提取第{page}页") sleep(random.uniform(1, 2)) get_data(keyword, page) 数据处理
导入相关包
首先导入数据处理和数据可视化相关第三方库,便于后续操作。
import pandas as pd import numpy as np import matplotlib.pyplot as plt import seaborn as sns %matplotlib inline plt.rcParams['font.sans-serif']