Spaces:
Sleeping
Sleeping
# Ultralytics YOLO ๐, AGPL-3.0 license | |
""" | |
MLflow Logging for Ultralytics YOLO. | |
This module enables MLflow logging for Ultralytics YOLO. It logs metrics, parameters, and model artifacts. | |
For setting up, a tracking URI should be specified. The logging can be customized using environment variables. | |
Commands: | |
1. To set a project name: | |
`export MLFLOW_EXPERIMENT_NAME=<your_experiment_name>` or use the project=<project> argument | |
2. To set a run name: | |
`export MLFLOW_RUN=<your_run_name>` or use the name=<name> argument | |
3. To start a local MLflow server: | |
mlflow server --backend-store-uri runs/mlflow | |
It will by default start a local server at http://127.0.0.1:5000. | |
To specify a different URI, set the MLFLOW_TRACKING_URI environment variable. | |
4. To kill all running MLflow server instances: | |
ps aux | grep 'mlflow' | grep -v 'grep' | awk '{print $2}' | xargs kill -9 | |
""" | |
from ultralytics.utils import LOGGER, RUNS_DIR, SETTINGS, TESTS_RUNNING, colorstr | |
try: | |
import os | |
assert not TESTS_RUNNING or "test_mlflow" in os.environ.get("PYTEST_CURRENT_TEST", "") # do not log pytest | |
assert SETTINGS["mlflow"] is True # verify integration is enabled | |
import mlflow | |
assert hasattr(mlflow, "__version__") # verify package is not directory | |
from pathlib import Path | |
PREFIX = colorstr("MLflow: ") | |
SANITIZE = lambda x: {k.replace("(", "").replace(")", ""): float(v) for k, v in x.items()} | |
except (ImportError, AssertionError): | |
mlflow = None | |
def on_pretrain_routine_end(trainer): | |
""" | |
Log training parameters to MLflow at the end of the pretraining routine. | |
This function sets up MLflow logging based on environment variables and trainer arguments. It sets the tracking URI, | |
experiment name, and run name, then starts the MLflow run if not already active. It finally logs the parameters | |
from the trainer. | |
Args: | |
trainer (ultralytics.engine.trainer.BaseTrainer): The training object with arguments and parameters to log. | |
Global: | |
mlflow: The imported mlflow module to use for logging. | |
Environment Variables: | |
MLFLOW_TRACKING_URI: The URI for MLflow tracking. If not set, defaults to 'runs/mlflow'. | |
MLFLOW_EXPERIMENT_NAME: The name of the MLflow experiment. If not set, defaults to trainer.args.project. | |
MLFLOW_RUN: The name of the MLflow run. If not set, defaults to trainer.args.name. | |
MLFLOW_KEEP_RUN_ACTIVE: Boolean indicating whether to keep the MLflow run active after the end of the training phase. | |
""" | |
global mlflow | |
uri = os.environ.get("MLFLOW_TRACKING_URI") or str(RUNS_DIR / "mlflow") | |
LOGGER.debug(f"{PREFIX} tracking uri: {uri}") | |
mlflow.set_tracking_uri(uri) | |
# Set experiment and run names | |
experiment_name = os.environ.get("MLFLOW_EXPERIMENT_NAME") or trainer.args.project or "/Shared/YOLOv8" | |
run_name = os.environ.get("MLFLOW_RUN") or trainer.args.name | |
mlflow.set_experiment(experiment_name) | |
mlflow.autolog() | |
try: | |
active_run = mlflow.active_run() or mlflow.start_run(run_name=run_name) | |
LOGGER.info(f"{PREFIX}logging run_id({active_run.info.run_id}) to {uri}") | |
if Path(uri).is_dir(): | |
LOGGER.info(f"{PREFIX}view at http://127.0.0.1:5000 with 'mlflow server --backend-store-uri {uri}'") | |
LOGGER.info(f"{PREFIX}disable with 'yolo settings mlflow=False'") | |
mlflow.log_params(dict(trainer.args)) | |
except Exception as e: | |
LOGGER.warning(f"{PREFIX}WARNING โ ๏ธ Failed to initialize: {e}\n" f"{PREFIX}WARNING โ ๏ธ Not tracking this run") | |
def on_train_epoch_end(trainer): | |
"""Log training metrics at the end of each train epoch to MLflow.""" | |
if mlflow: | |
mlflow.log_metrics( | |
metrics={ | |
**SANITIZE(trainer.lr), | |
**SANITIZE(trainer.label_loss_items(trainer.tloss, prefix="train")), | |
}, | |
step=trainer.epoch, | |
) | |
def on_fit_epoch_end(trainer): | |
"""Log training metrics at the end of each fit epoch to MLflow.""" | |
if mlflow: | |
mlflow.log_metrics(metrics=SANITIZE(trainer.metrics), step=trainer.epoch) | |
def on_train_end(trainer): | |
"""Log model artifacts at the end of the training.""" | |
if mlflow: | |
mlflow.log_artifact(str(trainer.best.parent)) # log save_dir/weights directory with best.pt and last.pt | |
for f in trainer.save_dir.glob("*"): # log all other files in save_dir | |
if f.suffix in {".png", ".jpg", ".csv", ".pt", ".yaml"}: | |
mlflow.log_artifact(str(f)) | |
keep_run_active = os.environ.get("MLFLOW_KEEP_RUN_ACTIVE", "False").lower() in ("true") | |
if keep_run_active: | |
LOGGER.info(f"{PREFIX}mlflow run still alive, remember to close it using mlflow.end_run()") | |
else: | |
mlflow.end_run() | |
LOGGER.debug(f"{PREFIX}mlflow run ended") | |
LOGGER.info( | |
f"{PREFIX}results logged to {mlflow.get_tracking_uri()}\n" | |
f"{PREFIX}disable with 'yolo settings mlflow=False'" | |
) | |
callbacks = ( | |
{ | |
"on_pretrain_routine_end": on_pretrain_routine_end, | |
"on_train_epoch_end": on_train_epoch_end, | |
"on_fit_epoch_end": on_fit_epoch_end, | |
"on_train_end": on_train_end, | |
} | |
if mlflow | |
else {} | |
) | |