{"id":"phenompeople-bert-server-gpu","name":"bert-server-gpu","homepage":"https://hub.docker.com/r/phenompeople/bert-server-gpu","repo_url":"https://hub.docker.com/r/phenompeople/bert-server-gpu","category":"ai-ml","subcategories":[],"tags":["ai-ml","nlp","bert","inference","gpu","self-hosted","server"],"what_it_does":"bert-server-gpu appears to be a self-hosted server for running BERT models with GPU acceleration, exposing model inference via some server interface (details not provided in the prompt).","use_cases":["Local/air-gapped BERT inference for classification, similarity, or embeddings","Low-latency NLP inference using GPUs","Building an internal API around BERT models"],"not_for":["No-code usage without deployment effort","Scenarios needing a managed hosted service with guaranteed uptime/SLA"],"best_when":null,"avoid_when":null,"alternatives":["Hugging Face Inference Endpoints","TorchServe","Triton Inference Server","vLLM (for compatible transformer serving workflows)","SageMaker / Vertex AI custom training+serving"],"af_score":22.8,"security_score":28.0,"reliability_score":25.0,"package_type":"mcp_server","discovery_source":["docker_mcp"],"priority":"low","status":"evaluated","version_evaluated":null,"last_evaluated":"2026-04-04T21:33:08.929024+00:00","interface":{"has_rest_api":false,"has_graphql":false,"has_grpc":false,"has_mcp_server":false,"mcp_server_url":null,"has_sdk":false,"sdk_languages":[],"openapi_spec_url":null,"webhooks":false},"auth":{"methods":[],"oauth":false,"scopes":false,"notes":"Authentication/interface security cannot be determined from the provided information."},"pricing":{"model":null,"free_tier_exists":false,"free_tier_limits":null,"paid_tiers":[],"requires_credit_card":false,"estimated_workload_costs":null,"notes":"Cost depends on infrastructure (GPU, hosting, bandwidth); no pricing information provided."},"requirements":{"requires_signup":false,"requires_credit_card":false,"domain_verification":false,"data_residency":[],"compliance":[],"min_contract":null},"agent_readiness":{"af_score":22.8,"security_score":28.0,"reliability_score":25.0,"mcp_server_quality":0.0,"documentation_accuracy":0.0,"error_message_quality":0.0,"error_message_notes":null,"auth_complexity":50.0,"rate_limit_clarity":0.0,"tls_enforcement":30.0,"auth_strength":30.0,"scope_granularity":20.0,"dependency_hygiene":30.0,"secret_handling":30.0,"security_notes":"No security documentation provided. As a self-hosted inference server, deployers should ensure TLS termination, authentication/authorization, secret handling (env vars/vault), and patching of dependencies/ML runtime.","uptime_documented":0.0,"version_stability":40.0,"breaking_changes_history":40.0,"error_recovery":20.0,"idempotency_support":"false","idempotency_notes":null,"pagination_style":"none","retry_guidance_documented":false,"known_agent_gotchas":["Server inference endpoints often require careful batching, request size limits, and GPU memory management.","Idempotency/retry behavior is typically endpoint-specific (not known here)."]}}