Skip to main content

Source-Specific Metrics

Access performance metrics for specific data sources to analyze performance patterns and optimize your usage.

Source Metrics Endpoint

HTTP Request

GET /metrics/{source}
Authorization: Bearer YOUR_API_KEY
Content-Type: application/json

Path Parameters

  • source: The data source identifier (e.g., twitter, x, reddit)

Response Format

{
  "source": "twitter",
  "window_hours": 24,
  "count": {
    "source": 5420,
    "internal": 5420
  },
  "source_latency_ms": {
    "p50": 180,
    "p95": 520,
    "p99": 1350
  },
  "internal_latency_ms": {
    "p50": 28,
    "p95": 82,
    "p99": 165
  },
  "timestamp": "2024-01-15T10:30:00Z"
}

Available Sources

Twitter/X

  • Source ID: twitter or x
  • Data Type: Social media posts, user profiles, trends
  • Rate Limits: Subject to Twitter/X API rate limits
  • Data Volume: High volume, real-time streaming

Reddit

  • Source ID: reddit
  • Data Type: Posts, comments, user activity
  • Rate Limits: Reddit API rate limits
  • Data Volume: Medium volume, periodic updates

News Sources

  • Source ID: news
  • Data Type: News articles, headlines
  • Rate Limits: Varies by news provider
  • Data Volume: Medium volume, continuous updates

Implementation Examples

JavaScript (Node.js)

const axios = require("axios");

async function getSourceMetrics(apiKey, source) {
  try {
    const response = await axios.get(`https://scrape.st/metrics/${source}`, {
      headers: {
        "x-api-key": apiKey,
        "Content-Type": "application/json",
      },
    });

    const metrics = response.data;
    console.log(`${source} Metrics:`, metrics);

    // Analyze source performance
    analyzeSourcePerformance(source, metrics);

    return metrics;
  } catch (error) {
    if (error.response?.status === 404) {
      console.error(`Source '${source}' not found`);
    } else {
      console.error(`Failed to fetch ${source} metrics:`, error.response?.data || error.message);
    }
    throw error;
  }
}

function analyzeSourcePerformance(source, metrics) {
  const sourceLatency = metrics.source_latency_ms;
  const internalLatency = metrics.internal_latency_ms;

  console.log(`\n${source.toUpperCase()} Performance Analysis:`);
  console.log(`Source Latency - P50: ${sourceLatency.p50}ms, P95: ${sourceLatency.p95}ms, P99: ${sourceLatency.p99}ms`);
  console.log(
    `Internal Latency - P50: ${internalLatency.p50}ms, P95: ${internalLatency.p95}ms, P99: ${internalLatency.p99}ms`,
  );
  console.log(`Total Requests: ${metrics.count.source} in ${metrics.window_hours} hours`);

  // Source-specific analysis
  switch (source) {
    case "twitter":
    case "x":
      analyzeTwitterMetrics(metrics);
      break;
    case "reddit":
      analyzeRedditMetrics(metrics);
      break;
    case "news":
      analyzeNewsMetrics(metrics);
      break;
  }
}

function analyzeTwitterMetrics(metrics) {
  const sourceLatency = metrics.source_latency_ms;

  // Twitter-specific thresholds
  if (sourceLatency.p95 > 800) {
    console.warn("⚠️ Twitter API latency is high (P95 > 800ms)");
  }

  if (sourceLatency.p99 > 2000) {
    console.warn("⚠️ Twitter API experiencing extreme latency (P99 > 2000ms)");
  }

  console.log("✅ Twitter API performance is within acceptable range");
}

function analyzeRedditMetrics(metrics) {
  const sourceLatency = metrics.source_latency_ms;

  // Reddit-specific thresholds
  if (sourceLatency.p95 > 600) {
    console.warn("⚠️ Reddit API latency is high (P95 > 600ms)");
  }

  console.log("✅ Reddit API performance is within acceptable range");
}

function analyzeNewsMetrics(metrics) {
  const sourceLatency = metrics.source_latency_ms;

  // News-specific thresholds
  if (sourceLatency.p95 > 1000) {
    console.warn("⚠️ News source latency is high (P95 > 1000ms)");
  }

  console.log("✅ News source performance is within acceptable range");
}

// Usage
getSourceMetrics("your_api_key_here", "twitter");
getSourceMetrics("your_api_key_here", "reddit");

Python

import requests
import json

def get_source_metrics(api_key, source):
    url = f"https://scrape.st/metrics/{source}"
    headers = {
        "Authorization": f"Bearer {api_key}",
        "Content-Type": "application/json"
    }

    try:
        response = requests.get(url, headers=headers)
        response.raise_for_status()
        metrics = response.json()

        print(f"{source.upper()} Metrics:")
        print(json.dumps(metrics, indent=2))

        # Analyze source performance
        analyze_source_performance(source, metrics)

        return metrics
    except requests.exceptions.RequestException as error:
        if error.response and error.response.status_code == 404:
            print(f"Source '{source}' not found")
        else:
            print(f"Failed to fetch {source} metrics: {error}")
        raise

def analyze_source_performance(source, metrics):
    source_latency = metrics["source_latency_ms"]
    internal_latency = metrics["internal_latency_ms"]

    print(f"\n{source.upper()} Performance Analysis:")
    print(f"Source Latency - P50: {source_latency['p50']}ms, P95: {source_latency['p95']}ms, P99: {source_latency['p99']}ms")
    print(f"Internal Latency - P50: {internal_latency['p50']}ms, P95: {internal_latency['p95']}ms, P99: {internal_latency['p99']}ms")
    print(f"Total Requests: {metrics['count']['source']} in {metrics['window_hours']} hours")

    # Source-specific analysis
    if source in ['twitter', 'x']:
        analyze_twitter_metrics(metrics)
    elif source == 'reddit':
        analyze_reddit_metrics(metrics)
    elif source == 'news':
        analyze_news_metrics(metrics)

def analyze_twitter_metrics(metrics):
    source_latency = metrics["source_latency_ms"]

    if source_latency['p95'] > 800:
        print("⚠️ Twitter API latency is high (P95 > 800ms)")

    if source_latency['p99'] > 2000:
        print("⚠️ Twitter API experiencing extreme latency (P99 > 2000ms)")

    print("✅ Twitter API performance is within acceptable range")

# Usage
get_source_metrics("your_api_key_here", "twitter")
get_source_metrics("your_api_key_here", "reddit")

Multi-Source Monitoring

Comparing Sources

class SourceComparator {
  constructor(apiKey) {
    this.apiKey = apiKey;
    this.sources = ["twitter", "reddit", "news"];
  }

  async compareAllSources() {
    const metrics = {};

    for (const source of this.sources) {
      try {
        metrics[source] = await this.getSourceMetrics(source);
      } catch (error) {
        console.error(`Failed to get ${source} metrics:`, error.message);
        metrics[source] = null;
      }
    }

    this.generateComparisonReport(metrics);
    return metrics;
  }

  async getSourceMetrics(source) {
    const response = await axios.get(`https://scrape.st/metrics/${source}`, {
      headers: {
        Authorization: `Bearer ${this.apiKey}`,
        "Content-Type": "application/json",
      },
    });

    return response.data;
  }

  generateComparisonReport(metrics) {
    console.log("\n=== Source Performance Comparison ===");

    const sources = Object.keys(metrics).filter((source) => metrics[source]);

    console.log("\nP95 Latency Comparison:");
    sources.forEach((source) => {
      const p95 = metrics[source].source_latency_ms.p95;
      console.log(`${source.padEnd(10)}: ${p95}ms`);
    });

    console.log("\nP99 Latency Comparison:");
    sources.forEach((source) => {
      const p99 = metrics[source].source_latency_ms.p99;
      console.log(`${source.padEnd(10)}: ${p99}ms`);
    });

    console.log("\nRequest Volume Comparison (24h):");
    sources.forEach((source) => {
      const count = metrics[source].count.source;
      console.log(`${source.padEnd(10)}: ${count} requests`);
    });

    // Find best performing source
    const bestSource = sources.reduce((best, current) => {
      const bestP95 = metrics[best].source_latency_ms.p95;
      const currentP95 = metrics[current].source_latency_ms.p95;
      return currentP95 < bestP95 ? current : best;
    });

    console.log(`\n🏆 Best performing source: ${bestSource}`);
  }
}

// Usage
const comparator = new SourceComparator("your_api_key_here");
comparator.compareAllSources();

Source-Specific Optimization

Twitter/X Optimization

class TwitterOptimizer {
  constructor(metrics) {
    this.metrics = metrics;
  }

  getOptimizations() {
    const recommendations = [];
    const sourceLatency = this.metrics.source_latency_ms;

    if (sourceLatency.p95 > 800) {
      recommendations.push({
        issue: "High Twitter API latency",
        suggestion: "Consider reducing request frequency or implementing caching",
        priority: "high",
      });
    }

    if (sourceLatency.p99 > 2000) {
      recommendations.push({
        issue: "Extreme Twitter API latency",
        suggestion: "Implement request batching and retry logic",
        priority: "critical",
      });
    }

    return recommendations;
  }

  getOptimalRequestPattern() {
    const p50 = this.metrics.source_latency_ms.p50;
    const p95 = this.metrics.source_latency_ms.p95;

    // Calculate optimal request interval
    const avgLatency = (p50 + p95) / 2;
    const safetyFactor = 1.5;
    const optimalInterval = Math.ceil(avgLatency * safetyFactor);

    return {
      interval: optimalInterval,
      reasoning: `Based on P50(${p50}ms) and P95(${p95}ms) latency with safety factor`,
    };
  }
}

Error Handling

Source-Specific Errors

Source Not Found

{
  "error": "Source 'invalid_source' not found",
  "code": 404,
  "availableSources": ["twitter", "reddit", "news"]
}

Source Unavailable

{
  "error": "Source 'twitter' temporarily unavailable",
  "code": 503,
  "retryAfter": 300
}

Error Handling Implementation

async function getSourceMetricsWithRetry(apiKey, source, maxRetries = 3) {
  for (let attempt = 1; attempt <= maxRetries; attempt++) {
    try {
      return await getSourceMetrics(apiKey, source);
    } catch (error) {
      if (error.response?.status === 404) {
        throw new Error(`Source '${source}' not found`);
      }

      if (error.response?.status === 503) {
        const retryAfter = error.response.data?.retryAfter || 60;
        console.log(`Source '${source}' unavailable, retrying in ${retryAfter} seconds...`);
        await new Promise((resolve) => setTimeout(resolve, retryAfter * 1000));
        continue;
      }

      if (attempt === maxRetries) {
        throw error;
      }

      const delay = Math.pow(2, attempt) * 1000;
      console.log(`Attempt ${attempt} failed, retrying in ${delay}ms...`);
      await new Promise((resolve) => setTimeout(resolve, delay));
    }
  }
}

Best Practices

For Source Selection

  • Performance Monitoring: Regularly monitor source-specific metrics
  • Fallback Sources: Implement fallback mechanisms for critical operations
  • Load Balancing: Distribute requests across multiple sources when possible
  • Rate Limiting: Respect source-specific rate limits and quotas

For Optimization

  • Source-Specific Tuning: Optimize request patterns per source
  • Caching Strategy: Implement appropriate caching for each source type
  • Error Recovery: Build source-specific error handling and recovery
  • Performance Baselines: Establish baselines for each source
Next: Health Endpoints