Example (gpu_snapshot.py)
This is the source code for 06_gpu_and_ml.gpu_snapshot.
import modal
image = modal.Image.debian_slim().uv_pip_install("sentence-transformers<6")
app_name = "example-gpu-snapshot"
app = modal.App(app_name, image=image)
snapshot_key = "v1" # change this to invalidate the snapshot cache
with image.imports(): # import in the global scope so imports can be snapshot
from sentence_transformers import SentenceTransformer
@app.cls(
gpu="a10",
enable_memory_snapshot=True,
experimental_options={"enable_gpu_snapshot": True},
)
class SnapshotEmbedder:
@modal.enter(snap=True)
def load(self):
# during enter phase of container lifecycle,
# load the model onto the GPU so it can be snapshot
print("loading model")
self.model = SentenceTransformer("BAAI/bge-small-en-v1.5", device="cuda")
print(f"snapshotting {snapshot_key}")
@modal.method()
def run(self, sentences: list[str]) -> list[list[float]]:
# later invocations of the Function will start here
embeddings = self.model.encode(sentences, normalize_embeddings=True)
return embeddings.tolist()
if __name__ == "__main__":
# after deployment, we can use the class from anywhere
SnapshotEmbedder = modal.Cls.from_name(app_name, "SnapshotEmbedder")
embedder = SnapshotEmbedder()
try:
print("calling Modal Function")
print(embedder.run.remote(sentences=["what is the meaning of life?"]))
except modal.exception.NotFoundError:
raise Exception(
f"To take advantage of GPU snapshots, deploy first with modal deploy {__file__}"
)