import pandas as pd import datetime import os import base64 from catboost import CatBoostClassifier, Pool import streamlit as st st.set_page_config( page_title="Hockey Match Prediction", page_icon="π", layout="wide" ) # Π€ΡΠ½ΠΊΡΠΈΡ Π·Π°Π³ΡΡΠ·ΠΊΠΈ Π΄Π°Π½Π½ΡΡ @st.cache_data def load_data(): df = pd.read_csv("rink_master_47816_wteams.csv") df['gameDate'] = pd.to_datetime(df['gameDate']) # ΠΠ·Π²Π»Π΅ΡΠ΅Π½ΠΈΠ΅ Π³ΠΎΠ΄Π° ΠΈ ΠΌΠ΅ΡΡΡΠ°, ΠΈ ΡΠΎΠ·Π΄Π°Π½ΠΈΠ΅ Π½ΠΎΠ²ΠΎΠ³ΠΎ ΡΡΠΎΠ»Π±ΡΠ° Season df["Year"] = df["gameDate"].dt.year df["Month"] = df["gameDate"].dt.month df["Season"] = df["Year"].astype(str) + "-" + (df["Year"] + 1).astype(str) # Π‘ΠΎΠ·Π΄Π°Π½ΠΈΠ΅ SeasonWeight ΠΈ NormalizedWeight seasons = df["Season"].unique() season_weights = {season: i + 1 for i, season in enumerate(sorted(seasons))} max_season_weight = max(season_weights.values()) min_season_weight = min(season_weights.values()) df["SeasonWeight"] = df["Season"].map(season_weights) df["NormalizedWeight"] = (df["SeasonWeight"] - min_season_weight) / ( max_season_weight - min_season_weight ) df["Weights"] = df.groupby("Season")["NormalizedWeight"].transform("mean") return df data = load_data() # ΠΠΏΡΠ΅Π΄Π΅Π»Π΅Π½ΠΈΠ΅ ΡΠ΅Π·ΡΠ»ΡΡΠ°ΡΠ° def determine_result(row): if ( row["Win"] != 0 or row["regulationWins"] != 0 or row["regulationAndOtWins"] != 0 or row["shootoutWins"] != 0 ): return 1 # ΠΠΎΠ±Π΅Π΄Π° elif row["Loss"] != 0 or row["OTLoss"] != 0: return 0 # ΠΠΎΡΠ°ΠΆΠ΅Π½ΠΈΠ΅ else: return -1 # ΠΠ΅ΠΎΠΏΡΠ΅Π΄Π΅Π»Π΅Π½ΠΎ data["Result"] = data.apply(determine_result, axis=1) # ΠΠ°ΠΏΠΏΠΈΠ½Π³ ΠΊΠΎΠΌΠ°Π½Π΄ Π½Π° ΡΠΈΡΠ»ΠΎΠ²ΡΠ΅ Π·Π½Π°ΡΠ΅Π½ΠΈΡ fullname_to_code = { "New Jersey Devils": 1, "New York Islanders": 2, "New York Rangers": 3, "Philadelphia Flyers": 4, "Pittsburgh Penguins": 5, "Boston Bruins": 6, "Buffalo Sabres": 7, "MontrΓ©al Canadiens": 8, "Ottawa Senators": 9, "Toronto Maple Leafs": 10, "Carolina Hurricanes": 11, "Florida Panthers": 12, "Tampa Bay Lightning": 13, "Washington Capitals": 14, "Chicago Blackhawks": 15, "Detroit Red Wings": 16, "Nashville Predators": 17, "St. Louis Blues": 18, "Calgary Flames": 19, "Colorado Avalanche": 20, "Edmonton Oilers": 21, "Vancouver Canucks": 22, "Anaheim Ducks": 23, "Dallas Stars": 24, "Los Angeles Kings": 25, "San Jose Sharks": 26, "Columbus Blue Jackets": 27, "Minnesota Wild": 28, "Winnipeg Jets": 29, "Arizona Coyotes": 30, "Vegas Golden Knights": 31, "Seattle Kraken": 32, } data["Team"] = data["Team"].map(fullname_to_code) data["Opponent"] = data["Opponent"].map(fullname_to_code) # Π Π°Π·Π΄Π΅Π»Π΅Π½ΠΈΠ΅ Π΄Π°Π½Π½ΡΡ Π½Π° ΠΎΠ±ΡΡΠ°ΡΡΡΡ ΠΈ ΡΠ΅ΡΡΠΎΠ²ΡΡ Π²ΡΠ±ΠΎΡΠΊΠΈ train = data[data["gameDate"] < "2023-10-10"] test = data[data["gameDate"] >= "2023-10-10"] # ΠΠΏΡΠ΅Π΄Π΅Π»Π΅Π½ΠΈΠ΅ ΠΊΠΎΠ»ΠΎΠ½ΠΎΠΊ, ΠΊΠΎΡΠΎΡΡΠ΅ Π±ΡΠ΄ΡΡ ΡΠ΄Π°Π»Π΅Π½Ρ features_to_drop = [ "Result", "gameDate", "gameID", "gamesPlayed", "Win", "Loss", "Tie", "OTLoss", "points", "pointPct", "regulationWins", "regulationAndOtWins", "shootoutWins", "goalsFor", "goalsAgainst", "goalsForPerGame", "goalsAgainstPerGame", "powerPlayPct", "penaltyKillPct", "powerPlayNetPct", "penaltyKillNetPct", "shotsForPerGame", "shotsAgainstPerGame", "faceoffWinPct", "Year", "Month", "Season", "NonRegulationTime", "SeasonWeight", "NormalizedWeight" ] code_to_fullname = {v: k for k, v in fullname_to_code.items()} # Π‘ΠΎΠ·Π΄Π°Π½ΠΈΠ΅ ΠΎΠ±ΡΠ°ΡΠ½ΠΎΠ³ΠΎ ΠΌΠ°ΠΏΠΏΠΈΠ½Π³Π° # Π£Π±Π΅Π΄ΠΈΡΠ΅ΡΡ, ΡΡΠΎ ΠΊΠΎΠ»ΠΎΠ½ΠΊΠΈ Π΄Π»Ρ ΡΠ΄Π°Π»Π΅Π½ΠΈΡ ΡΡΡΠ΅ΡΡΠ²ΡΡΡ Π² Π΄Π°Π½Π½ΡΡ features_to_drop = [col for col in features_to_drop if col in train.columns] # ΠΠ±Π½ΠΎΠ²Π»Π΅Π½ΠΈΠ΅ ΠΏΡΠΈΠ·Π½Π°ΠΊΠΎΠ², Π²ΠΊΠ»ΡΡΠ°Ρ Weight X_train = train.drop(columns=features_to_drop) y_train = train["Result"] X_test = test.drop(columns=features_to_drop) y_test = test["Result"] # Π€ΡΠ½ΠΊΡΠΈΡ Π΄Π»Ρ Π·Π°Π³ΡΡΠ·ΠΊΠΈ ΠΌΠΎΠ΄Π΅Π»ΠΈ CatBoost @st.cache_resource def load_catboost_model(file_path): try: model = CatBoostClassifier() model.load_model(file_path) # st.write(f"Π’ΠΈΠΏ Π·Π°Π³ΡΡΠΆΠ΅Π½Π½ΠΎΠΉ ΠΌΠΎΠ΄Π΅Π»ΠΈ: {type(model)}") # ΠΠ»Ρ ΠΎΡΠ»Π°Π΄ΠΊΠΈ return model except Exception as e: st.write(f"ΠΡΠΈΠ±ΠΊΠ° ΠΏΡΠΈ Π·Π°Π³ΡΡΠ·ΠΊΠ΅ ΠΌΠΎΠ΄Π΅Π»ΠΈ CatBoost: {e}") return None model_path = "catboost_model.cb" model = load_catboost_model(model_path) # # ΠΡΠΎΠ²Π΅ΡΠΊΠ° ΠΏΡΠΈΠ·Π½Π°ΠΊΠΎΠ² # model_feature_names = model.feature_names_ # st.write("ΠΡΠΈΠ·Π½Π°ΠΊΠΈ ΠΌΠΎΠ΄Π΅Π»ΠΈ:", model_feature_names) # st.write("ΠΡΠΈΠ·Π½Π°ΠΊΠΈ Π² Π΄Π°Π½Π½ΡΡ Π΄Π»Ρ ΠΎΠ±ΡΡΠ΅Π½ΠΈΡ:", X_train.columns.tolist()) # ΠΠ°ΠΏΠΏΠΈΠ½Π³ Π΄Π»Ρ homeRoad home_road_mapping = { 1: "ΠΠ° Π²ΡΠ΅Π·Π΄Π΅", 0: "ΠΠΎΠΌΠ°" } win_mapping = { 1: "ΠΠΎΠ±Π΅Π΄Π°", 0: "ΠΠ΅ ΠΏΠΎΠ±Π΅Π΄Π°: ΠΠΎΡΠ°ΠΆΠ΅Π½ΠΈΠ΅ ΠΈΠ»ΠΈ ΠΠΈΡΡΡ" } # Π€ΡΠ½ΠΊΡΠΈΡ Π΄Π»Ρ ΠΏΡΠ΅Π΄ΡΠΊΠ°Π·Π°Π½ΠΈΡ ΠΈΡΡ ΠΎΠ΄Π° def predict_winner(row, model): # print(f"Π’ΠΈΠΏ ΠΌΠΎΠ΄Π΅Π»ΠΈ Π² predict_winner: {type(model)}") # ΠΠ»Ρ ΠΎΡΠ»Π°Π΄ΠΊΠΈ try: # ΠΠΎΠ΄Π³ΠΎΡΠΎΠ²ΠΊΠ° Π²Ρ ΠΎΠ΄Π½ΡΡ Π΄Π°Π½Π½ΡΡ features = pd.DataFrame([row], columns=X_train.columns).fillna(0) # Π‘ΠΎΠ·Π΄Π°Π½ΠΈΠ΅ ΠΎΠ±ΡΠ΅ΠΊΡΠ° Pool Π΄Π»Ρ CatBoost pool = Pool(data=features, feature_names=X_train.columns.tolist()) # Π‘Π΄Π΅Π»Π°ΠΉΡΠ΅ ΠΏΡΠ΅Π΄ΡΠΊΠ°Π·Π°Π½ΠΈΠ΅ prediction = model.predict(pool) prediction_proba = model.predict_proba(pool) # st.write(f"ΠΡΠ΅Π΄ΡΠΊΠ°Π·Π°Π½ΠΈΠ΅: {prediction}, Π²Π΅ΡΠΎΡΡΠ½ΠΎΡΡΡ: {prediction_proba}") # ΠΠ»Ρ ΠΎΡΠ»Π°Π΄ΠΊΠΈ # ΠΠ΅ΡΠ½ΠΈΡΠ΅ ΡΠ΅Π·ΡΠ»ΡΡΠ°Ρ ΠΈ Π²Π΅ΡΠΎΡΡΠ½ΠΎΡΡΡ result = 'ΠΠΎΠ±Π΅Π΄Π° Ρ Π²Π΅ΡΠΎΡΡΠ½ΠΎΡΡΡΡ' if prediction[0] == 1 else 'ΠΠ΅ ΠΏΠΎΠ±Π΅Π΄Π°: ΠΠΎΡΠ°ΠΆΠ΅Π½ΠΈΠ΅ ΠΈΠ»ΠΈ ΠΠΈΡΡΡ Ρ Π²Π΅ΡΠΎΡΡΠ½ΠΎΡΡΡΡ' probability = prediction_proba[0][1] if prediction[0] == 1 else prediction_proba[0][0] return result, probability except Exception as e: print(f"ΠΡΠΈΠ±ΠΊΠ° ΠΏΡΠΈ ΠΏΡΠ΅Π΄ΡΠΊΠ°Π·Π°Π½ΠΈΠΈ: {e}") return "ΠΡΠΈΠ±ΠΊΠ°" st.markdown( """ """, unsafe_allow_html=True ) st.markdown('