
Cloud
Learning Level
Beyond basic Cloud Run deployments, enterprise applications require optimization across performance, cost, security, and scalability. This lesson covers advanced patterns used in production systems.
By the end of this lesson, you'll understand:
Deploy your Cloud Run services across multiple regions for global availability and low latency:
# Deploy to us-central1
gcloud run deploy my-api \
--region us-central1 \
--image gcr.io/my-project/my-api:v1.0.0
# Deploy same service to europe-west1
gcloud run deploy my-api \
--region europe-west1 \
--image gcr.io/my-project/my-api:v1.0.0
# Deploy to asia-northeast1
gcloud run deploy my-api \
--region asia-northeast1 \
--image gcr.io/my-project/my-api:v1.0.0
# View all deployments
gcloud run services list --platform managedGlobal Load Balancing Setup:
# Create a Network Endpoint Group for each region
gcloud compute network-endpoint-groups create my-api-us \
--region=us-central1 \
--network-endpoint-type=SERVERLESS \
--cloud-run-service=my-api \
--cloud-run-region=us-central1
gcloud compute network-endpoint-groups create my-api-eu \
--region=europe-west1 \
--network-endpoint-type=SERVERLESS \
--cloud-run-service=my-api \
--cloud-run-region=europe-west1
# Create backend services pointing to NEGs
gcloud compute backend-services create my-api-backend \
--global \
--protocol=HTTPS \
--health-checks=cloud-run-health-check
# Add NEGs to backend service
gcloud compute backend-services add-backend my-api-backend \
--global \
--instance-group=my-api-us \
--instance-group-region=us-central1
gcloud compute backend-services add-backend my-api-backend \
--global \
--instance-group=my-api-eu \
--instance-group-region=europe-west1
# Create URL map for routing
gcloud compute url-maps create my-api-lb \
--default-service=my-api-backend
# Create HTTPS proxy
gcloud compute target-https-proxies create my-api-proxy \
--url-map=my-api-lb \
--ssl-certificates=my-certificate
# Create forwarding rule
gcloud compute forwarding-rules create my-api-lb-rule \
--global \
--target-https-proxy=my-api-proxy \
--address=my-static-ip \
--ports=443Multi-Region Traffic Management with Cloud CDN:
# Enable Cloud CDN on the backend service
gcloud compute backend-services update my-api-backend \
--global \
--enable-cdn
# Configure cache settings
gcloud compute backend-services update my-api-backend \
--global \
--cache-mode=CACHE_ALL_STATIC \
--default-ttl=3600 \
--max-ttl=86400
# Set client affinity for session management
gcloud compute backend-services update my-api-backend \
--global \
--session-affinity=CLIENT_IP \
--timeout=30s#### Using Committed Use Discounts
# Estimate Cloud Run costs
gcloud compute project-info describe \
--project=my-project \
--format='value(metrics)' | grep -i "cloud-run"
# Create a committed use discount (CUD) recommendation
# Check Cloud Console Recommendations > Commitments
# Example: Commit to 100 vCPU-months in a region
gcloud compute commitments create my-api-commitment \
--region=us-central1 \
--plan=one-year \
--resources=vcpu=100,memory=400
# View active commitments
gcloud compute commitments list --filter="region:us-central1"#### Request-Based Scaling with Predictive Metrics
# Configure autoscaling for predictable traffic patterns
gcloud run deploy my-api \
--region=us-central1 \
--max-instances=1000 \
--min-instances=10 \
--memory=2Gi \
--cpu=2 \
--concurrency=80
# Monitor scaling behavior
gcloud monitoring read-time-series \
--filter='resource.type="cloud_run_revision"' \
--filter='metric.type="run.googleapis.com/request_count"' \
--start-time="-1h"
# Create alerting policy for scaling issues
gcloud alpha monitoring policies create \
--notification-channels=CHANNEL_ID \
--display-name="Cloud Run High Latency Alert" \
--condition-display-name="P99 Latency > 1s" \
--condition-threshold-value=1000 \
--condition-threshold-duration=300s#### VPC Connector Configuration for Private Resources
# Create a VPC connector for accessing private databases
gcloud compute networks vpc-access connectors create my-connector \
--network=my-vpc \
--region=us-central1 \
--range=10.8.0.0/28 \
--max-throughput=300
# Deploy Cloud Run with VPC connector
gcloud run deploy my-api \
--region=us-central1 \
--image=gcr.io/my-project/my-api:v1.0.0 \
--vpc-connector=my-connector \
--vpc-egress=all-traffic
# Example Node.js application using private Cloud SQLApplication code using VPC Connector:
// server.js - Accessing private Cloud SQL database
const express = require('express');
const mysql = require('mysql2/promise');
const app = express();
// Connection pool for Cloud SQL (private via VPC connector)
const pool = mysql.createPool({
host: '10.45.0.2', // Private IP of Cloud SQL instance
port: 3306,
user: 'api-user',
password: process.env.DB_PASSWORD,
database: 'production',
waitForConnections: true,
connectionLimit: 10,
queueLimit: 0
});
// Endpoint with database connection
app.get('/users/:id', async (req, res) => {
const connection = await pool.getConnection();
try {
const [rows] = await connection.execute(
'SELECT id, email, created_at FROM users WHERE id = ?',
[req.params.id]
);
if (rows.length === 0) {
return res.status(404).json({ error: 'User not found' });
}
res.json(rows[0]);
} catch (error) {
console.error('Database error:', error);
res.status(500).json({ error: 'Internal server error' });
} finally {
connection.release();
}
});
app.listen(process.env.PORT || 3000);#### Network Policy and Private Service Connection
# Create Shared VPC setup for enterprise deployments
gcloud compute shared-vpc host-projects create my-host-project
# Attach service project
gcloud compute shared-vpc associated-projects attach my-service-project \
--host-project=my-host-project
# Deploy Cloud Run in shared VPC with Private Service Connection
gcloud run deploy my-api \
--region=us-central1 \
--image=gcr.io/my-project/my-api:v1.0.0 \
--network=projects/my-host-project/global/networks/my-vpc \
--subnet=projects/my-host-project/regions/us-central1/subnetworks/my-subnet \
--vpc-egress=private-ranges-only#### Using Service Account for Authentication
# Create service account for inter-service communication
gcloud iam service-accounts create cloud-run-invoker \
--display-name="Cloud Run Service Invoker"
# Grant service account permission to invoke Cloud Run services
gcloud run services add-iam-policy-binding my-api \
--region=us-central1 \
--member=serviceAccount:cloud-run-invoker@my-project.iam.gserviceaccount.com \
--role=roles/run.invoker
# Deploy dependent service with service account
gcloud run deploy my-worker \
--region=us-central1 \
--image=gcr.io/my-project/my-worker:v1.0.0 \
--service-account=cloud-run-invoker@my-project.iam.gserviceaccount.com
# Get service account key for local testing
gcloud iam service-accounts keys create key.json \
--iam-account=cloud-run-invoker@my-project.iam.gserviceaccount.comApplication code for service-to-service calls:
// worker.js - Calling another Cloud Run service
const axios = require('axios');
const {GoogleAuth} = require('google-auth-library');
const auth = new GoogleAuth();
async function callCloudRunService(serviceUrl) {
// Get identity token for current service account
const client = await auth.getIdTokenClient(serviceUrl);
try {
const response = await client.request({
url: serviceUrl,
method: 'POST',
data: {
jobId: process.env.JOB_ID,
timestamp: new Date().toISOString()
}
});
return response.data;
} catch (error) {
console.error('Service call failed:', error.message);
throw error;
}
}
// Usage: Call another Cloud Run service
async function processJob(jobId) {
const apiUrl = 'https://my-api-abc123-uc.a.run.app/jobs/process';
try {
const result = await callCloudRunService(apiUrl);
console.log('Job processed:', result);
} catch (error) {
// Implement retry logic
setTimeout(() => processJob(jobId), 5000);
}
}
processJob('job-12345');#### Custom Metrics and Dashboards
# Create a Cloud Monitoring dashboard for Cloud Run metrics
gcloud monitoring dashboards create --config-from-file=dashboard.json
# Example dashboard configuration
cat > dashboard.json << 'EOF'
{
"displayName": "Cloud Run Production Dashboard",
"mosaicLayout": {
"columns": 12,
"tiles": [
{
"width": 6,
"height": 4,
"widget": {
"title": "Request Count by Region",
"xyChart": {
"dataSets": [{
"timeSeriesQuery": {
"timeSeriesFilter": {
"filter": "resource.type=cloud_run_revision AND metric.type=run.googleapis.com/request_count",
"aggregation": {
"alignmentPeriod": "60s",
"perSeriesAligner": "ALIGN_RATE"
}
}
}
}]
}
}
},
{
"xPos": 6,
"width": 6,
"height": 4,
"widget": {
"title": "P99 Request Latency",
"xyChart": {
"dataSets": [{
"timeSeriesQuery": {
"timeSeriesFilter": {
"filter": "resource.type=cloud_run_revision AND metric.type=run.googleapis.com/request_latencies",
"aggregation": {
"alignmentPeriod": "60s",
"perSeriesAligner": "ALIGN_PERCENTILE_99"
}
}
}
}]
}
}
}
]
}
}
EOF
gcloud monitoring dashboards create --config-from-file=dashboard.json#### Advanced Logging with Log-based Metrics
# Create log-based metric for errors
gcloud logging metrics create error_count \
--description="Count of error log entries" \
--log-filter='severity=ERROR AND resource.type=cloud_run_revision'
# Create metric for slow requests
gcloud logging metrics create slow_requests \
--description="Requests taking > 1 second" \
--log-filter='resource.type=cloud_run_revision AND jsonPayload.latency_ms > 1000'
# Set up alerting on log-based metrics
gcloud alpha monitoring policies create \
--notification-channels=CHANNEL_ID \
--display-name="High Error Rate Alert" \
--condition-display-name="Error count > 10/min" \
--condition-threshold-value=10 \
--condition-threshold-duration=300s \
--condition-threshold-filter='metric.type="logging.googleapis.com/user/error_count"'Structured logging in application:
// logging.js - Structured logging for Cloud Run
const logging = require('@google-cloud/logging');
const loggingClient = new logging.Logging({
projectId: process.env.GCP_PROJECT
});
const logger = loggingClient.log('cloud-run-app');
function logStructured(severity, message, metadata = {}) {
const entry = logger.entry(
{ severity, ...metadata },
{
timestamp: new Date().toISOString(),
message,
trace: process.env.CLOUD_TRACE_CONTEXT,
...metadata
}
);
return logger.write(entry);
}
// Usage in request handler
app.use((req, res, next) => {
const startTime = Date.now();
res.on('finish', () => {
const latency = Date.now() - startTime;
const severity = res.statusCode >= 400 ? 'ERROR' : 'INFO';
logStructured(severity, `HTTP ${req.method} ${req.path}`, {
httpRequest: {
requestMethod: req.method,
requestUrl: req.url,
status: res.statusCode,
latency: `${latency}ms`,
userAgent: req.get('user-agent')
},
latency_ms: latency,
path: req.path
});
});
next();
});#### From Compute Engine to Cloud Run
# Step 1: Containerize existing Compute Engine application
# Create Dockerfile from source code
# Step 2: Build and test locally
docker build -t my-api:v1.0.0 .
docker run -p 8080:3000 my-api:v1.0.0
# Step 3: Push to Container Registry
docker tag my-api:v1.0.0 gcr.io/my-project/my-api:v1.0.0
docker push gcr.io/my-project/my-api:v1.0.0
# Step 4: Deploy to Cloud Run
gcloud run deploy my-api \
--region=us-central1 \
--image=gcr.io/my-project/my-api:v1.0.0 \
--max-instances=100 \
--memory=2Gi \
--cpu=2 \
--timeout=900
# Step 5: Setup traffic splitting for gradual migration
gcloud run services update-traffic my-api \
--region=us-central1 \
--to-revisions=my-api-cloud-run-revision-v1=50,my-api-compute-engine=50
# Step 6: Monitor error rates and latency
# Once satisfied, switch 100% traffic to Cloud Run
gcloud run services update-traffic my-api \
--region=us-central1 \
--to-revisions=my-api-cloud-run-revision-v1=100
# Step 7: Shut down Compute Engine instance
gcloud compute instances delete my-api-instance --zone=us-central1-a#### From App Engine to Cloud Run
# App Engine to Cloud Run migration script
#!/bin/bash
# 1. Export App Engine app to container
gcloud app describe --format='value(runtime)'
# 2. For standard runtime, create Dockerfile
cat > Dockerfile << 'EOF'
FROM google-appengine/debian9
# Copy app engine runtime files
COPY . /app
WORKDIR /app
# Install dependencies
RUN apt-get update && apt-get install -y nodejs npm
RUN npm install
# Expose the same port App Engine uses
EXPOSE 8080
# Run the app
CMD ["npm", "start"]
EOF
# 3. Build and push image
docker build -t gcr.io/my-project/my-app:v1.0.0 .
docker push gcr.io/my-project/my-app:v1.0.0
# 4. Deploy to Cloud Run
gcloud run deploy my-app \
--region=us-central1 \
--image=gcr.io/my-project/my-app:v1.0.0 \
--set-env-vars DATASTORE_MODE=DISABLED,FIRESTORE_ENABLED=true
# 5. Update routing via App Engine dispatch.yaml
cat > dispatch.yaml << 'EOF'
dispatch:
- service: "api"
domain: "api.example.com"
- service: "web"
domain: "example.com"
EOF
gcloud app deploy dispatch.yaml#### Secret Management at Scale
# Create secrets in Cloud Secret Manager
echo -n "prod-db-password" | gcloud secrets create prod-db-password \
--replication-policy="automatic" \
--data-file=-
# Grant Cloud Run service account access to secret
gcloud secrets add-iam-policy-binding prod-db-password \
--member=serviceAccount:my-api@my-project.iam.gserviceaccount.com \
--role=roles/secretmanager.secretAccessor
# Deploy Cloud Run with secret reference
gcloud run deploy my-api \
--region=us-central1 \
--image=gcr.io/my-project/my-api:v1.0.0 \
--set-env-vars DB_PASSWORD=sm://prod-db-password \
--service-account=my-api@my-project.iam.gserviceaccount.comApplication accessing secrets securely:
// secrets.js - Secure secret access
const {SecretManagerServiceClient} = require('@google-cloud/secret-manager');
const secretManager = new SecretManagerServiceClient();
async function getSecret(secretName) {
const projectId = process.env.GCP_PROJECT;
const name = `projects/${projectId}/secrets/${secretName}/versions/latest`;
try {
const [version] = await secretManager.accessSecretVersion({name});
const secretValue = version.payload.data.toString('utf8');
return secretValue;
} catch (error) {
console.error(`Error accessing secret ${secretName}:`, error);
throw error;
}
}
// Usage with caching to reduce API calls
const secretCache = new Map();
const CACHE_TTL = 3600000; // 1 hour
async function getSecretCached(secretName) {
if (secretCache.has(secretName)) {
const cached = secretCache.get(secretName);
if (Date.now() - cached.timestamp < CACHE_TTL) {
return cached.value;
}
}
const value = await getSecret(secretName);
secretCache.set(secretName, { value, timestamp: Date.now() });
return value;
}
module.exports = { getSecret, getSecretCached };#### Container Image Scanning and Vulnerability Management
# Enable container vulnerability scanning
gcloud container images scan LOCATION-docker.pkg.dev/PROJECT/REPO/IMAGE:TAG
# Create policy to enforce scanning before deployment
cat > image-policy.yaml << 'EOF'
apiVersion: binaryauthorization.grafeas.io/v1beta1
kind: Policy
metadata:
name: cloud-run-image-policy
spec:
requireAttestationsBy:
- projects/PROJECT/attestors/prod-attestor
enforcementMode: ENFORCING_ALL_IMAGES
EOF
# Deploy with image verification
gcloud container binauthz policy import image-policy.yaml
# Create attestor
gcloud container binauthz attestors create prod-attestor \
--project=my-project \
--attestation-authority-note=prod-attestor-note#### IAM and Access Control
# Principle of least privilege: Create custom roles
gcloud iam roles create cloudRunDeployer \
--title="Cloud Run Deployer" \
--description="Deploy and manage Cloud Run services" \
--permissions=run.services.create,run.services.update,run.services.get,run.services.list
# Grant to CI/CD pipeline
gcloud projects add-iam-policy-binding my-project \
--member=serviceAccount:ci-cd@my-project.iam.gserviceaccount.com \
--role=projects/my-project/roles/cloudRunDeployer
# Restrict service account capabilities
gcloud run deploy my-api \
--region=us-central1 \
--image=gcr.io/my-project/my-api:v1.0.0 \
--service-account=my-api-limited@my-project.iam.gserviceaccount.com \
--no-allow-unauthenticated
# Audit access with Cloud Audit Logs
gcloud logging read \
"resource.type=cloud_run_service AND protoPayload.methodName=google.cloud.run.v1.Services.CreateService" \
--format=json \
--limit=50Optimize concurrency settings based on application behavior:
# CPU-bound applications: Lower concurrency
gcloud run deploy cpu-intensive-api \
--region=us-central1 \
--image=gcr.io/my-project/cpu-api:v1.0.0 \
--concurrency=10 \
--cpu=4 \
--memory=4Gi
# I/O-bound applications: Higher concurrency
gcloud run deploy io-intensive-api \
--region=us-central1 \
--image=gcr.io/my-project/io-api:v1.0.0 \
--concurrency=1000 \
--cpu=1 \
--memory=512Mi
# Mixed workload: Tune based on metrics
gcloud monitoring read-time-series \
--filter='resource.type="cloud_run_revision" AND metric.type="run.googleapis.com/request_latencies"' \
--format=json | jq '.timeSeries[] | {revision: .resource.labels.revision_name, p99_latency: .points[0].value.double_value}'# Use minimum instances to keep service warm
gcloud run deploy my-api \
--region=us-central1 \
--image=gcr.io/my-project/my-api:v1.0.0 \
--min-instances=5 \
--max-instances=100
# Implement efficient initializationOptimized initialization code:
// Defer heavy initialization until first request
let dbPoolInitialized = false;
let dbPool;
async function initializeDbPool() {
if (dbPoolInitialized) return;
dbPool = mysql.createPool({
host: process.env.DB_HOST,
user: process.env.DB_USER,
password: process.env.DB_PASSWORD,
connectionLimit: 10
});
dbPoolInitialized = true;
}
app.get('/health', (req, res) => {
res.json({ status: 'healthy' });
});
app.get('/data', async (req, res) => {
await initializeDbPool();
const connection = await dbPool.getConnection();
// ... database operations
});# Export usage data
gcloud billing export-data \
--billing-account=BILLING_ID \
--dataset=cloud_billing_export \
--overwrite
# Query costs by service
bq query --use_legacy_sql=false << 'EOF'
SELECT
service.description as service,
SUM(usage.amount) as total_usage,
SUM(cost) as total_cost
FROM `project.cloud_billing_export.gcp_billing_export_*`
WHERE service.description LIKE '%Cloud Run%'
GROUP BY service.description
ORDER BY total_cost DESC
EOF
# Cost breakdown by region
bq query --use_legacy_sql=false << 'EOF'
SELECT
location.region,
sku.description,
SUM(usage.amount) as usage,
SUM(cost) as cost
FROM `project.cloud_billing_export.gcp_billing_export_*`
WHERE service.description = 'Cloud Run'
GROUP BY location.region, sku.description
ORDER BY cost DESC
EOF# Monitor actual CPU and memory usage
gcloud monitoring read-time-series \
--filter='resource.type="cloud_run_revision" AND metric.type="run.googleapis.com/container_cpu_allocations"' \
--format='table(resource.labels.service_name, points[0].value.double_value)' \
--start-time="-7d"
# Analyze metric data to determine optimal allocation
gcloud monitoring time-series list \
--filter='resource.type="cloud_run_revision"' \
--format='json' > metrics.json
# Script to find underutilized services
python3 << 'EOF'
import json
with open('metrics.json') as f:
metrics = json.load(f)
underutilized = []
for ts in metrics.get('timeSeries', []):
service = ts['resource']['labels']['service_name']
avg_cpu = sum(p['value']['doubleValue'] for p in ts.get('points', [])) / len(ts.get('points', []))
if avg_cpu < 0.1: # Less than 10% utilization
underutilized.append((service, avg_cpu))
for service, cpu in sorted(underutilized, key=lambda x: x[1]):
print(f"{service}: {cpu*100:.1f}% CPU utilization")
EOFContinue learning with Firebase Functions for serverless event processing and GCP Monitoring for comprehensive observability across all services.
Resources
Ojasa Mirai
Master AI-powered development skills through structured learning, real projects, and verified credentials. Whether you're upskilling your team or launching your career, we deliver the skills companies actually need.
Learn Deep • Build Real • Verify Skills • Launch Forward