実践データ分析100本ノック(第3章-顧客の全体像を把握する-21~24)
2020年12月21日
実践データ分析100本ノック(第3章-顧客の全体像を把握する-21~24)
# %%
import pandas as pd
uselog = pd.read_csv('./samples/3/use_log.csv')
print(len(uselog))
uselog.head()
# 197428
# log_id customer_id usedate
# 0 L00000049012330 AS009373 2018-04-01
# 1 L00000049012331 AS015315 2018-04-01
# 2 L00000049012332 AS040841 2018-04-01
# 3 L00000049012333 AS046594 2018-04-01
# 4 L00000049012334 AS073285 2018-04-01
# %%
customer = pd.read_csv('./samples/3/customer_master.csv')
print(len(customer))
customer.head()
# 4192
# customer_id name class gender start_date end_date campaign_id is_deleted
# 0 OA832399 XXXX C01 F 2015-05-01 00:00:00 NaN CA1 0
# 1 PL270116 XXXXX C01 M 2015-05-01 00:00:00 NaN CA1 0
# 2 OA974876 XXXXX C01 M 2015-05-01 00:00:00 NaN CA1 0
# 3 HD024127 XXXXX C01 F 2015-05-01 00:00:00 NaN CA1 0
# 4 HD661448 XXXXX C03 F 2015-05-01 00:00:00 NaN CA1 0
# %%
class_master = pd.read_csv('./samples/3/class_master.csv')
print(len(class_master))
class_master.head()
# 3
# class class_name price
# 0 C01 オールタイム 10500
# 1 C02 デイタイム 7500
# 2 C03 ナイト 6000
# %%
campaign_master = pd.read_csv('./samples/3/campaign_master.csv')
print(len(campaign_master))
campaign_master.head()
# 3
# campaign_id campaign_name
# 0 CA1 通常
# 1 CA2 入会費半額
# 2 CA3 入会費無料
# %%
customer_join = pd.merge(customer, class_master, on="class", how="left")
customer_join = pd.merge(customer_join, campaign_master, on="campaign_id", how="left")
customer_join.head()
# customer_id name class gender start_date end_date campaign_id is_deleted class_name price campaign_name
# 0 OA832399 XXXX C01 F 2015-05-01 00:00:00 NaN CA1 0 オールタイム 10500 通常
# 1 PL270116 XXXXX C01 M 2015-05-01 00:00:00 NaN CA1 0 オールタイム 10500 通常
# 2 OA974876 XXXXX C01 M 2015-05-01 00:00:00 NaN CA1 0 オールタイム 10500 通常
# 3 HD024127 XXXXX C01 F 2015-05-01 00:00:00 NaN CA1 0 オールタイム 10500 通常
# 4 HD661448 XXXXX C03 F 2015-05-01 00:00:00 NaN CA1 0 ナイト 6000 通常
# %%
customer_join.isnull().sum()
# customer_id 0
# name 0
# class 0
# gender 0
# start_date 0
# end_date 2842
# campaign_id 0
# is_deleted 0
# class_name 0
# price 0
# campaign_name 0
# dtype: int64
# %%
customer_join.groupby("class_name").count()["customer_id"]
# class_name
# オールタイム 2045
# デイタイム 1019
# ナイト 1128
# Name: customer_id, dtype: int64
# %%
customer_join.groupby("campaign_name").count()["customer_id"]
# campaign_name
# 入会費半額 650
# 入会費無料 492
# 通常 3050
# Name: customer_id, dtype: int64
# %%
customer_join.groupby("gender").count()["customer_id"]
# gender
# F 1983
# M 2209
# Name: customer_id, dtype: int64
# %%
customer_join.groupby("is_deleted").count()["customer_id"]
# is_deleted
# 0 2842
# 1 1350
# Name: customer_id, dtype: int64
# %%
# datetime型に変換
customer_join["start_date"] = pd.to_datetime(customer_join["start_date"])
# 2018年4月1日移行に入会した人数を確認
customer_start = customer_join.loc[customer_join["start_date"] > pd.to_datetime("20180401")]
print(len(customer_start))
# 1361
# %%
# datetime型に変換
customer_join["end_date"] = pd.to_datetime(customer_join["end_date"])
# 2019年3月31日以降に退会したユーザー、もしくは在籍中のユーザーに絞る
customer_newer = customer_join.loc[(customer_join["end_date"] >= pd.to_datetime("20190331")) | (customer_join["end_date"].isna())]
# 人数を確認
print(len(customer_newer))
# 欠損値(退会していないユーザー)と2019年3月31日に退会したユーザーのみ該当しているか確認
customer_newer["end_date"].unique()
# 2953
# array(['NaT', '2019-03-31T00:00:00.000000000'],
# dtype='datetime64[ns]')
# %%
customer_newer.groupby("class_name").count()["customer_id"]
# class_name
# オールタイム 1444
# デイタイム 696
# ナイト 813
# Name: customer_id, dtype: int64
# %%
customer_newer.groupby("campaign_name").count()["customer_id"]
# campaign_name
# 入会費半額 311
# 入会費無料 242
# 通常 2400
# Name: customer_id, dtype: int64
# %%
customer_newer.groupby("gender").count()["customer_id"]
# gender
# F 1400
# M 1553
# Name: customer_id, dtype: int64
# %%
ディスカッション
コメント一覧
まだ、コメントがありません