您现在的位置是:首页 本地
经典数据分析案例3-全国热门旅游景点数据分析
207人已围观
简介国庆节马上结束了,本节通过去全国各地区景点门票的售卖情况,简单分析一下全国比较热门的景点分布和...
国庆节马上结束了,本节通过去全国各地区景点门票的售卖情况,简单分析一下全国比较热门的景点分布和国民出游情况,看看哪些景点比较受欢迎。
涉及到的内容:Pandas — 数据处理、Pyecharts — 数据可视化、jieba — 分词、collections — 数据统计。
1、导入模块
import jieba
import pandas as pd
from collections import Counter
from pyecharts.charts import Line,Pie,Scatter,Bar,Map,Grid
from pyecharts.charts import WordCloud
from pyecharts import options as opts
from pyecharts.globals import ThemeType
from pyecharts.globals import SymbolType
from pyecharts.commons.utils import JsCode2、读取数据
df = pd.read_excel(./旅游景点.xlsx)
df.head()
3、查看索引、数据类型和内存信息
df.info()
4、查看数值型列汇总统计
df.describe()
df.loc[df[销量]==0,:].head()
df = df[df[销量]!=0]
df.shape
df.isnull().sum()
df[星级].fillna(未知, inplace=True)
df.isnull().sum()
df.fillna(未知, inplace=True)
df.isnull().sum()
df.sort_values(销量, ascending=False).head()
5、数据可视化
销量前20热门景点数据
# 线性渐变
color_js = """new echarts.graphic.LinearGradient(0, 0, 1, 0,
[{offset: 0, color: #009ad6}, {offset: 1, color: #ed1941}], false)"""
sort_info = df.sort_values(by=销量, ascending=True)
b1 = (
Bar()
.add_xaxis(list(sort_info[名称])[-20:])
.add_yaxis(热门景点销量, sort_info[销量].values.tolist()[-20:],itemstyle_opts=opts.ItemStyleOpts(color=JsCode(color_js)))
.reversal_axis()
.set_global_opts(
title_opts=opts.TitleOpts(title=热门景点销量数据),
yaxis_opts=opts.AxisOpts(name=景点名称),
xaxis_opts=opts.AxisOpts(name=销量),
)
.set_series_opts(label_opts=opts.LabelOpts(position="right"))
)
# 将图形整体右移
g1 = (
Grid()
.add(b1, grid_opts=opts.GridOpts(pos_left=20%, pos_right=5%))
)
g1.render_notebook()
假期出行数据全国地图分布
华东、华南、华中等地区属于国民出游热点地区,尤其是北京、上海、江苏、广东、四川、陕西等地区出行比较密集
df_tmp1 = df[[城市,销量]]
df_counts = df_tmp1.groupby(城市).sum()
m1 = (
Map()
.add(假期出行分布, [list(z) for z in zip(df_counts.index.values.tolist(), df_counts.values.tolist())], china)
.set_global_opts(
title_opts=opts.TitleOpts(title=假期出行数据地图分布),
visualmap_opts=opts.VisualMapOpts(max_=100000, is_piecewise=False,range_color=["white", "#fa8072", "#ed1941"]),
)
)
m1.render_notebook()
各省市4A-5A景区数量柱状图
# 线性渐变
color_js = """new echarts.graphic.LinearGradient(0, 1, 0, 0,
[{offset: 0, color: #009ad6}, {offset: 1, color: #ed1941}], false)"""
df_tmp2 = df[df[星级].isin([4A, 5A])]
df_counts = df_tmp2.groupby(城市).count()[星级]
b2 = (
Bar()
.add_xaxis(df_counts.index.values.tolist())
.add_yaxis(4A-5A景区数量, df_counts.values.tolist(),itemstyle_opts=opts.ItemStyleOpts(color=JsCode(color_js)))
.set_global_opts(
title_opts=opts.TitleOpts(title=各省市4A-5A景区数量),
datazoom_opts=[opts.DataZoomOpts(), opts.DataZoomOpts(type_=inside)],
)
)
b2.render_notebook()
各省市4A-5A景区数量玫瑰图
df0 = df_counts.copy()
df0.sort_values(ascending=False, inplace=True)
c1 = (
Pie()
.add(, [list(z) for z in zip(df0.index.values.tolist(), df0.values.tolist())],
radius=[30%, 100%],
center=[50%, 60%],
rosetype=area,
)
.set_global_opts(title_opts=opts.TitleOpts(title=地区景点数量),
legend_opts=opts.LegendOpts(is_show=False),
toolbox_opts=opts.ToolboxOpts())
.set_series_opts(label_opts=opts.LabelOpts(is_show=True, position=inside, font_size=12,
formatter={b}: {c}, font_style=italic,
font_weight=bold, font_family=Microsoft YaHei
))
)
c1.render_notebook()
各省市4A-5A景区数量阴影散点图
item_style = {normal: {shadowColor: #000000,
shadowBlur: 20,
shadowOffsetX:5,
shadowOffsetY:15
}
}
s1 = (
Scatter()
.add_xaxis(df_counts.index.values.tolist())
.add_yaxis(4A-5A景区数量, df_counts.values.tolist(),symbol_size=50,itemstyle_opts=item_style)
.set_global_opts(visualmap_opts=opts.VisualMapOpts(is_show=False,
type_=size,
range_size=[5,50]))
)
s1.render_notebook()
各省市4A-5A景区地图分布
df_tmp3 = df[df[星级].isin([4A, 5A])]
df_counts = df_tmp3.groupby(城市).count()[星级]
m2 = (
Map()
.add(4A-5A景区分布, [list(z) for z in zip(df_counts.index.values.tolist(), df_counts.values.tolist())], china)
.set_global_opts(
title_opts=opts.TitleOpts(title=地图数据分布),
visualmap_opts=opts.VisualMapOpts(max_=50, is_piecewise=True),
)
)
m2.render_notebook()
门票价格区间占比玫瑰图
# 门票价格占比
price_level = [0, 50, 100, 150, 200, 250, 300, 350, 400, 500]
label_level = [0-50, 50-100, 100-150, 150-200, 200-250, 250-300, 300-350, 350-400, 400-500]
jzmj_cut = pd.cut(df[价格], price_level, labels=label_level)
df_price = jzmj_cut.value_counts()
df_price
p1 = (
Pie(init_opts=opts.InitOpts(
width=800px, height=600px,
)
)
.add(
,
[list(z) for z in zip(df_price.index.tolist(), df_price.values.tolist())],
radius=[20%, 60%],
center=[40%, 50%],
rosetype=radius,
label_opts=opts.LabelOpts(is_show=True),
)
.set_global_opts(title_opts=opts.TitleOpts(title=门票价格占比,pos_left=33%,pos_top="5%"),
legend_opts=opts.LegendOpts(type_=scroll, pos_left="80%",pos_top="25%",orient="vertical")
)
.set_series_opts(label_opts=opts.LabelOpts(formatter={b}: {c} ({d}%)),position=outside)
)
p1.render_notebook()
门票价格区间数量散点图
color_js = """new echarts.graphic.RadialGradient(
0.5, 0.5, 1,
[{offset: 0,
color: #009ad6},
{offset: 1,
color: #ed1941}
])"""
s2 = (
Scatter()
.add_xaxis(df_price.index.tolist())
.add_yaxis(门票价格区间, df_price.values.tolist(),symbol_size=50,itemstyle_opts=opts.ItemStyleOpts(color=JsCode(color_js)))
.set_global_opts(
yaxis_opts=opts.AxisOpts(name=数量),
xaxis_opts=opts.AxisOpts(name=价格区间(元)))
.set_global_opts(visualmap_opts=opts.VisualMapOpts(is_show=False,
# 设置通过图形大小来表现数据
type_=size,
# 图形大小映射范围
range_size=[5,50]))
)
s2.render_notebook()
景点简介词云
contents = "".join(%s % i for i in df[简介].values.tolist())
contents_list = jieba.cut(contents)
ac = Counter(contents_list)
stopwords = []
with open(./stopwords.txt, "r",encoding=utf-8) as f: # 打开文件
data = f.read() # 读取文件
stopwords = data.split(\n)
for i in stopwords:
del ac[i]
w1 = (
WordCloud()
.add("",
ac.most_common(150),
word_size_range=[5, 100],
textstyle_opts=opts.TextStyleOpts(font_family="cursive"),
shape=star)
.set_global_opts(title_opts=opts.TitleOpts(title="景点简介词云"))
)
w1.render_notebook()
景点简介词云-自定义模板
w2 = (
WordCloud()
.add(
"",
ac.most_common(200),
word_size_range=[5, 80],
textstyle_opts=opts.TextStyleOpts(font_family="cursive"),
mask_image=./1.jpg
)
.set_global_opts(
title_opts=opts.TitleOpts(title="自定义样式词云图"),
)
)
w2.render_notebook()
6、总结
- 华东、华南、华中等地区属于国民出游热点地区,尤其是北京、上海、江苏、广东、四川、陕西等地区出行比较密集。
- 江苏、安徽、河南、北京、湖北等地区4A、5A级景区数量比较多。
- 门票价格100以内居多,大概占比70%,还是比较实惠的,而且一般景区还存在学生优惠等。
- 其他待定。。。