Example (gpu_snapshot.py)

This is the source code for 06_gpu_and_ml.gpu_snapshot.

import modal

image = modal.Image.debian_slim().uv_pip_install("sentence-transformers<6")
app_name = "example-gpu-snapshot"
app = modal.App(app_name, image=image)

snapshot_key = "v1"  # change this to invalidate the snapshot cache

with image.imports():  # import in the global scope so imports can be snapshot
    from sentence_transformers import SentenceTransformer


@app.cls(
    gpu="a10",
    enable_memory_snapshot=True,
    experimental_options={"enable_gpu_snapshot": True},
)
class SnapshotEmbedder:
    @modal.enter(snap=True)
    def load(self):
        # during enter phase of container lifecycle,
        # load the model onto the GPU so it can be snapshot
        print("loading model")
        self.model = SentenceTransformer("BAAI/bge-small-en-v1.5", device="cuda")
        print(f"snapshotting {snapshot_key}")

    @modal.method()
    def run(self, sentences: list[str]) -> list[list[float]]:
        # later invocations of the Function will start here
        embeddings = self.model.encode(sentences, normalize_embeddings=True)
        return embeddings.tolist()


if __name__ == "__main__":
    # after deployment, we can use the class from anywhere
    SnapshotEmbedder = modal.Cls.from_name(app_name, "SnapshotEmbedder")
    embedder = SnapshotEmbedder()
    try:
        print("calling Modal Function")
        print(embedder.run.remote(sentences=["what is the meaning of life?"]))
    except modal.exception.NotFoundError:
        raise Exception(
            f"To take advantage of GPU snapshots, deploy first with modal deploy {__file__}"
        )