回传数据解析，兼容v5和v10

2025-04-18 14:41:53 +08:00
commit 010f5c445a
888 changed files with 93632 additions and 0 deletions
--- a/ultralytics/utils/callbacks/mlflow.py
+++ b/ultralytics/utils/callbacks/mlflow.py
@ -0,0 +1,133 @@
+# Ultralytics YOLO 🚀, AGPL-3.0 license
+"""
+MLflow Logging for Ultralytics YOLO.
+
+This module enables MLflow logging for Ultralytics YOLO. It logs metrics, parameters, and model artifacts.
+For setting up, a tracking URI should be specified. The logging can be customized using environment variables.
+
+Commands:
+    1. To set a project name:
+        `export MLFLOW_EXPERIMENT_NAME=<your_experiment_name>` or use the project=<project> argument
+
+    2. To set a run name:
+        `export MLFLOW_RUN=<your_run_name>` or use the name=<name> argument
+
+    3. To start a local MLflow server:
+        mlflow server --backend-store-uri runs/mlflow
+       It will by default start a local server at http://127.0.0.1:5000.
+       To specify a different URI, set the MLFLOW_TRACKING_URI environment variable.
+
+    4. To kill all running MLflow server instances:
+        ps aux | grep 'mlflow' | grep -v 'grep' | awk '{print $2}' | xargs kill -9
+"""
+
+from ultralytics.utils import LOGGER, RUNS_DIR, SETTINGS, TESTS_RUNNING, colorstr
+
+try:
+    import os
+
+    assert not TESTS_RUNNING or "test_mlflow" in os.environ.get("PYTEST_CURRENT_TEST", "")  # do not log pytest
+    assert SETTINGS["mlflow"] is True  # verify integration is enabled
+    import mlflow
+
+    assert hasattr(mlflow, "__version__")  # verify package is not directory
+    from pathlib import Path
+
+    PREFIX = colorstr("MLflow: ")
+    SANITIZE = lambda x: {k.replace("(", "").replace(")", ""): float(v) for k, v in x.items()}
+
+except (ImportError, AssertionError):
+    mlflow = None
+
+
+def on_pretrain_routine_end(trainer):
+    """
+    Log training parameters to MLflow at the end of the pretraining routine.
+
+    This function sets up MLflow logging based on environment variables and trainer arguments. It sets the tracking URI,
+    experiment name, and run name, then starts the MLflow run if not already active. It finally logs the parameters
+    from the trainer.
+
+    Args:
+        trainer (ultralytics.engine.trainer.BaseTrainer): The training object with arguments and parameters to log.
+
+    Global:
+        mlflow: The imported mlflow module to use for logging.
+
+    Environment Variables:
+        MLFLOW_TRACKING_URI: The URI for MLflow tracking. If not set, defaults to 'runs/mlflow'.
+        MLFLOW_EXPERIMENT_NAME: The name of the MLflow experiment. If not set, defaults to trainer.args.project.
+        MLFLOW_RUN: The name of the MLflow run. If not set, defaults to trainer.args.name.
+        MLFLOW_KEEP_RUN_ACTIVE: Boolean indicating whether to keep the MLflow run active after the end of the training phase.
+    """
+    global mlflow
+
+    uri = os.environ.get("MLFLOW_TRACKING_URI") or str(RUNS_DIR / "mlflow")
+    LOGGER.debug(f"{PREFIX} tracking uri: {uri}")
+    mlflow.set_tracking_uri(uri)
+
+    # Set experiment and run names
+    experiment_name = os.environ.get("MLFLOW_EXPERIMENT_NAME") or trainer.args.project or "/Shared/YOLOv8"
+    run_name = os.environ.get("MLFLOW_RUN") or trainer.args.name
+    mlflow.set_experiment(experiment_name)
+
+    mlflow.autolog()
+    try:
+        active_run = mlflow.active_run() or mlflow.start_run(run_name=run_name)
+        LOGGER.info(f"{PREFIX}logging run_id({active_run.info.run_id}) to {uri}")
+        if Path(uri).is_dir():
+            LOGGER.info(f"{PREFIX}view at http://127.0.0.1:5000 with 'mlflow server --backend-store-uri {uri}'")
+        LOGGER.info(f"{PREFIX}disable with 'yolo settings mlflow=False'")
+        mlflow.log_params(dict(trainer.args))
+    except Exception as e:
+        LOGGER.warning(f"{PREFIX}WARNING ⚠️ Failed to initialize: {e}\n" f"{PREFIX}WARNING ⚠️ Not tracking this run")
+
+
+def on_train_epoch_end(trainer):
+    """Log training metrics at the end of each train epoch to MLflow."""
+    if mlflow:
+        mlflow.log_metrics(
+            metrics={
+                **SANITIZE(trainer.lr),
+                **SANITIZE(trainer.label_loss_items(trainer.tloss, prefix="train")),
+            },
+            step=trainer.epoch,
+        )
+
+
+def on_fit_epoch_end(trainer):
+    """Log training metrics at the end of each fit epoch to MLflow."""
+    if mlflow:
+        mlflow.log_metrics(metrics=SANITIZE(trainer.metrics), step=trainer.epoch)
+
+
+def on_train_end(trainer):
+    """Log model artifacts at the end of the training."""
+    if mlflow:
+        mlflow.log_artifact(str(trainer.best.parent))  # log save_dir/weights directory with best.pt and last.pt
+        for f in trainer.save_dir.glob("*"):  # log all other files in save_dir
+            if f.suffix in {".png", ".jpg", ".csv", ".pt", ".yaml"}:
+                mlflow.log_artifact(str(f))
+        keep_run_active = os.environ.get("MLFLOW_KEEP_RUN_ACTIVE", "False").lower() in ("true")
+        if keep_run_active:
+            LOGGER.info(f"{PREFIX}mlflow run still alive, remember to close it using mlflow.end_run()")
+        else:
+            mlflow.end_run()
+            LOGGER.debug(f"{PREFIX}mlflow run ended")
+
+        LOGGER.info(
+            f"{PREFIX}results logged to {mlflow.get_tracking_uri()}\n"
+            f"{PREFIX}disable with 'yolo settings mlflow=False'"
+        )
+
+
+callbacks = (
+    {
+        "on_pretrain_routine_end": on_pretrain_routine_end,
+        "on_train_epoch_end": on_train_epoch_end,
+        "on_fit_epoch_end": on_fit_epoch_end,
+        "on_train_end": on_train_end,
+    }
+    if mlflow
+    else {}
+)