{"id":"together-api","name":"Together AI API","homepage":"https://docs.together.ai","repo_url":null,"category":"ai-ml","subcategories":["llm","inference","embedding","fine-tuning"],"tags":["together","llm","inference","embedding","fine-tuning","rest-api","sdk","open-source-models","gpu-cloud"],"what_it_does":"Together AI's inference API for running open-source LLMs (Llama, Mistral, Mixtral, etc.) with OpenAI-compatible endpoints for chat, completion, and embedding tasks.","use_cases":["Running open-source LLMs with OpenAI-compatible API format","Generating embeddings for semantic search and RAG pipelines","Fine-tuning open-source models on custom datasets","High-throughput inference with open-source models at lower cost","Building agents that need diverse model options without vendor lock-in"],"not_for":["Teams needing only proprietary frontier models (use OpenAI/Anthropic directly)","Sub-10ms inference requirements (for those, use Groq)","Teams without technical knowledge to evaluate open-source models","Applications requiring model output guarantees and SLAs"],"best_when":"An agent needs access to open-source LLMs via an OpenAI-compatible API at competitive pricing, especially for high-throughput or fine-tuned model inference.","avoid_when":"You need the absolute fastest inference (Groq), or are limited to frontier proprietary models.","alternatives":["groq-api","replicate-api","deepgram-api"],"af_score":79.0,"security_score":72.0,"reliability_score":null,"package_type":"mcp_server","discovery_source":["github"],"priority":"low","status":"evaluated","version_evaluated":"current","last_evaluated":"2026-03-01T09:50:06.300870+00:00","performance":{"latency_p50_ms":500,"latency_p99_ms":3000,"uptime_sla_percent":99.5,"rate_limits":"Varies by plan; typically 600 requests/minute","data_source":"llm_estimated","measured_on":null}}