import pandas as pd import datetime import os import base64 from catboost import CatBoostClassifier, Pool import streamlit as st st.set_page_config( page_title="Hockey Match Prediction", page_icon="πŸ’", layout="wide" ) # Ѐункция Π·Π°Π³Ρ€ΡƒΠ·ΠΊΠΈ Π΄Π°Π½Π½Ρ‹Ρ… @st.cache_data def load_data(): df = pd.read_csv("rink_master_47816_wteams.csv") df['gameDate'] = pd.to_datetime(df['gameDate']) # Π˜Π·Π²Π»Π΅Ρ‡Π΅Π½ΠΈΠ΅ Π³ΠΎΠ΄Π° ΠΈ мСсяца, ΠΈ созданиС Π½ΠΎΠ²ΠΎΠ³ΠΎ столбца Season df["Year"] = df["gameDate"].dt.year df["Month"] = df["gameDate"].dt.month df["Season"] = df["Year"].astype(str) + "-" + (df["Year"] + 1).astype(str) # Π‘ΠΎΠ·Π΄Π°Π½ΠΈΠ΅ SeasonWeight ΠΈ NormalizedWeight seasons = df["Season"].unique() season_weights = {season: i + 1 for i, season in enumerate(sorted(seasons))} max_season_weight = max(season_weights.values()) min_season_weight = min(season_weights.values()) df["SeasonWeight"] = df["Season"].map(season_weights) df["NormalizedWeight"] = (df["SeasonWeight"] - min_season_weight) / ( max_season_weight - min_season_weight ) df["Weights"] = df.groupby("Season")["NormalizedWeight"].transform("mean") return df data = load_data() # ΠžΠΏΡ€Π΅Π΄Π΅Π»Π΅Π½ΠΈΠ΅ Ρ€Π΅Π·ΡƒΠ»ΡŒΡ‚Π°Ρ‚Π° def determine_result(row): if ( row["Win"] != 0 or row["regulationWins"] != 0 or row["regulationAndOtWins"] != 0 or row["shootoutWins"] != 0 ): return 1 # ПобСда elif row["Loss"] != 0 or row["OTLoss"] != 0: return 0 # ΠŸΠΎΡ€Π°ΠΆΠ΅Π½ΠΈΠ΅ else: return -1 # НСопрСдСлСно data["Result"] = data.apply(determine_result, axis=1) # Маппинг ΠΊΠΎΠΌΠ°Π½Π΄ Π½Π° числовыС значСния fullname_to_code = { "New Jersey Devils": 1, "New York Islanders": 2, "New York Rangers": 3, "Philadelphia Flyers": 4, "Pittsburgh Penguins": 5, "Boston Bruins": 6, "Buffalo Sabres": 7, "MontrΓ©al Canadiens": 8, "Ottawa Senators": 9, "Toronto Maple Leafs": 10, "Carolina Hurricanes": 11, "Florida Panthers": 12, "Tampa Bay Lightning": 13, "Washington Capitals": 14, "Chicago Blackhawks": 15, "Detroit Red Wings": 16, "Nashville Predators": 17, "St. Louis Blues": 18, "Calgary Flames": 19, "Colorado Avalanche": 20, "Edmonton Oilers": 21, "Vancouver Canucks": 22, "Anaheim Ducks": 23, "Dallas Stars": 24, "Los Angeles Kings": 25, "San Jose Sharks": 26, "Columbus Blue Jackets": 27, "Minnesota Wild": 28, "Winnipeg Jets": 29, "Arizona Coyotes": 30, "Vegas Golden Knights": 31, "Seattle Kraken": 32, } data["Team"] = data["Team"].map(fullname_to_code) data["Opponent"] = data["Opponent"].map(fullname_to_code) # Π Π°Π·Π΄Π΅Π»Π΅Π½ΠΈΠ΅ Π΄Π°Π½Π½Ρ‹Ρ… Π½Π° ΠΎΠ±ΡƒΡ‡Π°ΡŽΡ‰ΡƒΡŽ ΠΈ Ρ‚Π΅ΡΡ‚ΠΎΠ²ΡƒΡŽ Π²Ρ‹Π±ΠΎΡ€ΠΊΠΈ train = data[data["gameDate"] < "2023-10-10"] test = data[data["gameDate"] >= "2023-10-10"] # ΠžΠΏΡ€Π΅Π΄Π΅Π»Π΅Π½ΠΈΠ΅ ΠΊΠΎΠ»ΠΎΠ½ΠΎΠΊ, ΠΊΠΎΡ‚ΠΎΡ€Ρ‹Π΅ Π±ΡƒΠ΄ΡƒΡ‚ ΡƒΠ΄Π°Π»Π΅Π½Ρ‹ features_to_drop = [ "Result", "gameDate", "gameID", "gamesPlayed", "Win", "Loss", "Tie", "OTLoss", "points", "pointPct", "regulationWins", "regulationAndOtWins", "shootoutWins", "goalsFor", "goalsAgainst", "goalsForPerGame", "goalsAgainstPerGame", "powerPlayPct", "penaltyKillPct", "powerPlayNetPct", "penaltyKillNetPct", "shotsForPerGame", "shotsAgainstPerGame", "faceoffWinPct", "Year", "Month", "Season", "NonRegulationTime", "SeasonWeight", "NormalizedWeight" ] code_to_fullname = {v: k for k, v in fullname_to_code.items()} # Π‘ΠΎΠ·Π΄Π°Π½ΠΈΠ΅ ΠΎΠ±Ρ€Π°Ρ‚Π½ΠΎΠ³ΠΎ ΠΌΠ°ΠΏΠΏΠΈΠ½Π³Π° # Π£Π±Π΅Π΄ΠΈΡ‚Π΅ΡΡŒ, Ρ‡Ρ‚ΠΎ ΠΊΠΎΠ»ΠΎΠ½ΠΊΠΈ для удалСния ΡΡƒΡ‰Π΅ΡΡ‚Π²ΡƒΡŽΡ‚ Π² Π΄Π°Π½Π½Ρ‹Ρ… features_to_drop = [col for col in features_to_drop if col in train.columns] # ОбновлСниС ΠΏΡ€ΠΈΠ·Π½Π°ΠΊΠΎΠ², Π²ΠΊΠ»ΡŽΡ‡Π°Ρ Weight X_train = train.drop(columns=features_to_drop) y_train = train["Result"] X_test = test.drop(columns=features_to_drop) y_test = test["Result"] # Ѐункция для Π·Π°Π³Ρ€ΡƒΠ·ΠΊΠΈ ΠΌΠΎΠ΄Π΅Π»ΠΈ CatBoost @st.cache_resource def load_catboost_model(file_path): try: model = CatBoostClassifier() model.load_model(file_path) # st.write(f"Π’ΠΈΠΏ Π·Π°Π³Ρ€ΡƒΠΆΠ΅Π½Π½ΠΎΠΉ ΠΌΠΎΠ΄Π΅Π»ΠΈ: {type(model)}") # Для ΠΎΡ‚Π»Π°Π΄ΠΊΠΈ return model except Exception as e: st.write(f"Ошибка ΠΏΡ€ΠΈ Π·Π°Π³Ρ€ΡƒΠ·ΠΊΠ΅ ΠΌΠΎΠ΄Π΅Π»ΠΈ CatBoost: {e}") return None model_path = "catboost_model.cb" model = load_catboost_model(model_path) # # ΠŸΡ€ΠΎΠ²Π΅Ρ€ΠΊΠ° ΠΏΡ€ΠΈΠ·Π½Π°ΠΊΠΎΠ² # model_feature_names = model.feature_names_ # st.write("ΠŸΡ€ΠΈΠ·Π½Π°ΠΊΠΈ ΠΌΠΎΠ΄Π΅Π»ΠΈ:", model_feature_names) # st.write("ΠŸΡ€ΠΈΠ·Π½Π°ΠΊΠΈ Π² Π΄Π°Π½Π½Ρ‹Ρ… для обучСния:", X_train.columns.tolist()) # Маппинг для homeRoad home_road_mapping = { 1: "На Π²Ρ‹Π΅Π·Π΄Π΅", 0: "Π”ΠΎΠΌΠ°" } win_mapping = { 1: "ПобСда", 0: "НС ΠΏΠΎΠ±Π΅Π΄Π°: ΠŸΠΎΡ€Π°ΠΆΠ΅Π½ΠΈΠ΅ ΠΈΠ»ΠΈ ΠΠΈΡ‡ΡŒΡ" } # Ѐункция для прСдсказания исхода def predict_winner(row, model): # print(f"Π’ΠΈΠΏ ΠΌΠΎΠ΄Π΅Π»ΠΈ Π² predict_winner: {type(model)}") # Для ΠΎΡ‚Π»Π°Π΄ΠΊΠΈ try: # ΠŸΠΎΠ΄Π³ΠΎΡ‚ΠΎΠ²ΠΊΠ° Π²Ρ…ΠΎΠ΄Π½Ρ‹Ρ… Π΄Π°Π½Π½Ρ‹Ρ… features = pd.DataFrame([row], columns=X_train.columns).fillna(0) # Π‘ΠΎΠ·Π΄Π°Π½ΠΈΠ΅ ΠΎΠ±ΡŠΠ΅ΠΊΡ‚Π° Pool для CatBoost pool = Pool(data=features, feature_names=X_train.columns.tolist()) # Π‘Π΄Π΅Π»Π°ΠΉΡ‚Π΅ прСдсказаниС prediction = model.predict(pool) prediction_proba = model.predict_proba(pool) # st.write(f"ΠŸΡ€Π΅Π΄ΡΠΊΠ°Π·Π°Π½ΠΈΠ΅: {prediction}, Π²Π΅Ρ€ΠΎΡΡ‚Π½ΠΎΡΡ‚ΡŒ: {prediction_proba}") # Для ΠΎΡ‚Π»Π°Π΄ΠΊΠΈ # Π’Π΅Ρ€Π½ΠΈΡ‚Π΅ Ρ€Π΅Π·ΡƒΠ»ΡŒΡ‚Π°Ρ‚ ΠΈ Π²Π΅Ρ€ΠΎΡΡ‚Π½ΠΎΡΡ‚ΡŒ result = 'ПобСда с Π²Π΅Ρ€ΠΎΡΡ‚Π½ΠΎΡΡ‚ΡŒΡŽ' if prediction[0] == 1 else 'НС ΠΏΠΎΠ±Π΅Π΄Π°: ΠŸΠΎΡ€Π°ΠΆΠ΅Π½ΠΈΠ΅ ΠΈΠ»ΠΈ ΠΠΈΡ‡ΡŒΡ с Π²Π΅Ρ€ΠΎΡΡ‚Π½ΠΎΡΡ‚ΡŒΡŽ' probability = prediction_proba[0][1] if prediction[0] == 1 else prediction_proba[0][0] return result, probability except Exception as e: print(f"Ошибка ΠΏΡ€ΠΈ прСдсказании: {e}") return "Ошибка" st.markdown( """ """, unsafe_allow_html=True ) st.markdown('

ΠŸΡ€Π΅Π΄ΡΠΊΠ°Π·Π°Π½ΠΈΠ΅ исходов Ρ…ΠΎΠΊΠΊΠ΅ΠΉΠ½Ρ‹Ρ… ΠΌΠ°Ρ‚Ρ‡Π΅ΠΉ NHL πŸ’πŸ₯…πŸ†

', unsafe_allow_html=True) # Π”ΠΎΠ±Π°Π²Π»Π΅Π½ΠΈΠ΅ Π½Π°Π²ΠΈΠ³Π°Ρ†ΠΈΠΈ ΠΏΠΎ страницам st.sidebar.markdown("## Навигация") page = st.sidebar.selectbox("Π’Ρ‹Π±Π΅Ρ€ΠΈΡ‚Π΅ страницу", ["Основная", "Π“Ρ€Π°Ρ„ΠΈΠΊΠΈ"]) if page == "Основная": st.sidebar.title("Поиск ΠΏΠΎ Ρ„ΠΈΠ»ΡŒΡ‚Ρ€Π°ΠΌ") selected_date = st.sidebar.date_input("Π’Ρ‹Π±Π΅Ρ€ΠΈΡ‚Π΅ Π΄Π°Ρ‚Ρƒ", value=datetime.date(2023, 10, 8), key="date_input") selected_team = st.sidebar.selectbox("Π’Ρ‹Π±Π΅Ρ€ΠΈΡ‚Π΅ ΠΊΠΎΠΌΠ°Π½Π΄Ρƒ", options=["ВсС"] + list(fullname_to_code.keys()), key="team_select") selected_opponent = st.sidebar.selectbox("Π’Ρ‹Π±Π΅Ρ€ΠΈΡ‚Π΅ ΠΎΠΏΠΏΠΎΠ½Π΅Π½Ρ‚Π°", options=["ВсС"] + list(fullname_to_code.keys()), key="opponent_select") selected_home_road = st.sidebar.selectbox("Π“Π΄Π΅ ΠΈΠ³Ρ€Π°Π΅Ρ‚ ΠΊΠΎΠΌΠ°Π½Π΄Π°?", options=["ВсС", "Π”ΠΎΠΌΠ°", "На Π²Ρ‹Π΅Π·Π΄Π΅"], key="home_road_select") # Π€ΠΈΠ»ΡŒΡ‚Ρ€Π°Ρ†ΠΈΡ Π΄Π°Π½Π½Ρ‹Ρ… ΠΏΠΎ Π²Ρ‹Π±Ρ€Π°Π½Π½Ρ‹ΠΌ критСриям filtered_data = data[data['gameDate'] == pd.to_datetime(selected_date)] if selected_team != "ВсС": filtered_data = filtered_data[filtered_data['Team'] == fullname_to_code[selected_team]] if selected_opponent != "ВсС": filtered_data = filtered_data[filtered_data['Opponent'] == fullname_to_code[selected_opponent]] if selected_home_road != "ВсС": filtered_data = filtered_data[filtered_data['homeRoad'] == (1 if selected_home_road == "Π”Π°" else 0)] if not filtered_data.empty: st.write(f"Π˜Π³Ρ€Ρ‹ Π½Π° {selected_date}:") col1, col2, col3, col4, col5 = st.columns(5) col1.write("Команда") col2.write("ΠžΠΏΠΏΠΎΠ½Π΅Π½Ρ‚") col3.write("Π“Π΄Π΅ ΠΈΠ³Ρ€Π°Π΅Ρ‚ ΠΊΠΎΠΌΠ°Π½Π΄Π°?") col4.write("ΠΠΊΡ‚ΡƒΠ°Π»ΡŒΠ½Ρ‹ΠΉ исход ΠΌΠ°Ρ‚Ρ‡Π°") col5.write("ΠŸΡ€Π΅Π΄ΡΠΊΠ°Π·Π°Π½ΠΈΠ΅") for index, row in filtered_data.iterrows(): col1, col2, col3, col4, col5 = st.columns(5) col1.write(code_to_fullname[row['Team']]) col2.write(code_to_fullname[row['Opponent']]) col3.write(home_road_mapping.get(row['homeRoad'], 'НСизвСстно')) col4.write(win_mapping.get(row['Win'], 'НСт')) if col5.button('ΠŸΡ€Π΅Π΄ΡΠΊΠ°Π·Π°Π½ΠΈΠ΅', key=index): row_dict = row.to_dict() prediction, probability = predict_winner(row_dict, model) st.write(f"ΠŸΡ€Π΅Π΄ΡΠΊΠ°Π·Π°Π½ΠΈΠ΅ для ΠΈΠ³Ρ€Ρ‹ {code_to_fullname[row['Team']]} vs {code_to_fullname[row['Opponent']]}: {prediction} {probability:.2f}") else: st.write("НСт ΠΈΠ³Ρ€ Π½Π° Π²Ρ‹Π±Ρ€Π°Π½Π½ΡƒΡŽ Π΄Π°Ρ‚Ρƒ.") # Установка Ρ„ΠΎΠ½ΠΎΠ²ΠΎΠ³ΠΎ изобраТСния background_image_path = "7.jpeg" if os.path.exists(background_image_path): with open(background_image_path, "rb") as image_file: encoded_image = base64.b64encode(image_file.read()).decode() st.markdown( f""" """, unsafe_allow_html=True ) else: st.error(f"Π˜Π·ΠΎΠ±Ρ€Π°ΠΆΠ΅Π½ΠΈΠ΅ Π½Π΅ Π½Π°ΠΉΠ΄Π΅Π½ΠΎ ΠΏΠΎ ΠΏΡƒΡ‚ΠΈ: {background_image_path}") elif page == "Π“Ρ€Π°Ρ„ΠΈΠΊΠΈ": st.title("Π“Ρ€Π°Ρ„ΠΈΠΊΠΈ ΠΈ Анализ") # Π˜ΠΌΠΏΠΎΡ€Ρ‚ΠΈΡ€ΠΎΠ²Π°Π½ΠΈΠ΅ Π±ΠΈΠ±Π»ΠΈΠΎΡ‚Π΅ΠΊ для Π³Ρ€Π°Ρ„ΠΈΠΊΠΎΠ² import matplotlib.pyplot as plt import seaborn as sns # ΠžΡ‚ΠΎΠ±Ρ€Π°ΠΆΠ΅Π½ΠΈΠ΅ локального изобраТСния image_path = "graphs/1.png" # Π£ΠΊΠ°ΠΆΠΈΡ‚Π΅ ΠΏΡƒΡ‚ΡŒ ΠΊ Π²Π°ΡˆΠ΅ΠΌΡƒ ΠΈΠ·ΠΎΠ±Ρ€Π°ΠΆΠ΅Π½ΠΈΡŽ st.image(image_path, use_column_width=True) # ΠžΡ‚ΠΎΠ±Ρ€Π°ΠΆΠ΅Π½ΠΈΠ΅ Π²Ρ‚ΠΎΡ€ΠΎΠ³ΠΎ локального изобраТСния image_path2 = "graphs/2.png" # Π£ΠΊΠ°ΠΆΠΈΡ‚Π΅ ΠΏΡƒΡ‚ΡŒ ΠΊΠΎ Π²Ρ‚ΠΎΡ€ΠΎΠΌΡƒ ΠΈΠ·ΠΎΠ±Ρ€Π°ΠΆΠ΅Π½ΠΈΡŽ st.image(image_path2, use_column_width=True) image_path3 = "graphs/3.png" # Π£ΠΊΠ°ΠΆΠΈΡ‚Π΅ ΠΏΡƒΡ‚ΡŒ ΠΊΠΎ Π²Ρ‚ΠΎΡ€ΠΎΠΌΡƒ ΠΈΠ·ΠΎΠ±Ρ€Π°ΠΆΠ΅Π½ΠΈΡŽ st.image(image_path3, use_column_width=True) st.write("ΠŸΡ€ΠΎΡ†Π΅Π½Ρ‚ ΠΏΠΎΠ±Π΅Π΄ Π² Π΄ΠΎΠΌΠ°ΡˆΠ½ΠΈΡ… ΠΈΠ³Ρ€Π°Ρ…: 54.55%") st.write("ΠŸΡ€ΠΎΡ†Π΅Π½Ρ‚ ΠΏΠΎΠ±Π΅Π΄ Π² Π²Ρ‹Π΅Π·Π΄Π½Ρ‹Ρ… ΠΈΠ³Ρ€Π°Ρ…: 45.45%") st.write("Π”ΠΎΠΌΠ°ΡˆΠ½ΡΡ Π°Ρ€Π΅Π½Π° ΡƒΠ²Π΅Π»ΠΈΡ‡ΠΈΠ²Π°Π΅Ρ‚ Π²Π΅Ρ€ΠΎΡΡ‚Π½ΠΎΡΡ‚ΡŒ ΠΏΠΎΠ±Π΅Π΄Ρ‹ Π½Π°: 9.10%") image_path4 = "graphs/4.png" # Π£ΠΊΠ°ΠΆΠΈΡ‚Π΅ ΠΏΡƒΡ‚ΡŒ ΠΊΠΎ Π²Ρ‚ΠΎΡ€ΠΎΠΌΡƒ ΠΈΠ·ΠΎΠ±Ρ€Π°ΠΆΠ΅Π½ΠΈΡŽ st.image(image_path4, use_column_width=True) image_path5 = "graphs/5.png" # Π£ΠΊΠ°ΠΆΠΈΡ‚Π΅ ΠΏΡƒΡ‚ΡŒ ΠΊΠΎ Π²Ρ‚ΠΎΡ€ΠΎΠΌΡƒ ΠΈΠ·ΠΎΠ±Ρ€Π°ΠΆΠ΅Π½ΠΈΡŽ st.image(image_path5, use_column_width=True)