netkeibaからデータを取ってきたのでpandasをいじってみる(Part01)
netkeibaからデータを取ってきたのでpandasをいじってみる
メモ程度です。
#%%
import mysql.connector as mariadb
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import csv
mariadb_connection = mariadb.connect(host='localhost', user='root', password='', database='netkeiba')
cursor = mariadb_connection.cursor(dictionary=True)
query = "SELECT race_number, rank, horse_number, horse_name, horse_sex, horse_age, jockey_weight, jockey_name, race_time, popularity, odds, passage, trainer, horse_weight, horse_weight_changes FROM horse_datas WHERE url='https://race.netkeiba.com/race/result.html?race_id=202004030109&rf=race_list' OR url='https://race.netkeiba.com/race/result.html?race_id=202004030110&rf=race_list' OR url='https://race.netkeiba.com/race/result.html?race_id=202004030112&rf=race_list'"
cursor.execute(query)
datas = cursor
df = pd.DataFrame(datas)
# %%
df
# %%
df.dtypes
# 型を確認
# print(df.dtypes)
# race_number object
# rank object
# horse_number object
# horse_name object
# horse_sex object
# horse_age object
# jockey_weight object
# jockey_name object
# race_time object
# popularity object
# odds object
# passage object
# trainer object
# horse_weight object
# horse_weight_changes object
# dtype: object
# %%
df['odds'] = df['odds'].astype(float)
# オッズのカラムの型をfloatに変更
# %%
df.dtypes
# 型を確認
# print(df.dtypes)
# race_number object
# rank object
# horse_number object
# horse_name object
# horse_sex object
# horse_age object
# jockey_weight object
# jockey_name object
# race_time object
# popularity object
# odds float64
# passage object
# trainer object
# horse_weight object
# horse_weight_changes object
# dtype: object
# %%
df['odds'].max()
# 一番高いオッズ
# print(df['odds'].max())
# 318.2
# %%
df.groupby('race_number').max()['odds']
# レースごとに一番高いオッズ
# print(df.groupby('race_number').max()['odds'])
# race_number
# 10 318.2
# 12 301.4
# 9 198.5
# %%
df['cost'] = 100
# 新しいカラムをcostを追加
# %%
df['earn'] = df['cost'] * df['odds']
# 新しいカラムearnを追加
# %%
df['profit'] = df['earn'] - df['cost']
# 新しいカラムprofitを追加
# %%
df.groupby('race_number').min()['profit']
# レース毎の利益の最小値
# print(df.groupby("race_number").min()['profit'])
# race_number
# 10 240.0
# 12 220.0
# 9 280.0
# %%
df.groupby('race_number').max()['profit']
# レース毎の利益の最大値
# print(df.groupby("race_number").max()['profit'])
# race_number
# 10 31720.0
# 12 30040.0
# 9 19750.0
# %%
df.groupby('race_number').max()['profit'].sum()
# レース毎の利益の最大値の合計
# print(df.groupby('race_number').max()['profit'].sum())
# 81510.0
# %%
df[(df['popularity'])=='1']
# 一番人気の馬を出力
# print(df.loc[(df['popularity'])=='1'])
# race_number rank horse_number horse_name horse_sex horse_age jockey_weight jockey_name race_time popularity odds passage trainer horse_weight horse_weight_changes cost earn profit
# 0 9 1 1 テルツェット 牝 3 52.0 三浦 1:33.3 1 3.8 8-9 和田郎 428 +12 100 380.0 280.0
# 23 10 7 4 ルガーサント 牝 3 52.0 戸崎圭 1:21.4 1 3.4 5-5 西村 428 -2 100 340.0 240.0
# 35 12 1 2 ブルーノジュネス 牡 3 54.0 田中勝 1:53.9 1 3.2 5-3-4-4 宗像 484 +4 100 320.0 220.0
# %%
df[(df['popularity'] == '1') & (df['rank'] == '1')]
# 一番人気且つ着順一位の馬を出力
# print(df[(df['popularity'] == '1') & (df['rank'] == '1')])
# race_number rank horse_number horse_name horse_sex horse_age jockey_weight jockey_name race_time popularity odds passage trainer horse_weight horse_weight_changes cost earn profit
# 0 9 1 1 テルツェット 牝 3 52.0 三浦 1:33.3 1 3.8 8-9 和田郎 428 +12 100 380.0 280.0
# 35 12 1 2 ブルーノジュネス 牡 3 54.0 田中勝 1:53.9 1 3.2 5-3-4-4 宗像 484 +4 100 320.0 220.0
ディスカッション
コメント一覧
まだ、コメントがありません