freeleaps-ops/apps/gitea-webhook-ambassador-python/app/services/dedup_service.py

213 lines
7.3 KiB
Python

"""
Deduplication service
Implements deduplication strategy based on commit hash + branch
"""
import asyncio
import hashlib
import json
from typing import Optional, Dict, Any
from datetime import datetime, timedelta
import structlog
from redis import asyncio as aioredis
from app.config import get_settings
logger = structlog.get_logger()
class DeduplicationService:
"""Deduplication service"""
def __init__(self, redis_client: aioredis.Redis):
self.redis = redis_client
self.settings = get_settings()
self.cache_prefix = "webhook:dedup:"
async def is_duplicate(self, dedup_key: str) -> bool:
"""
Check if the event is a duplicate
Args:
dedup_key: deduplication key (commit_hash:branch)
Returns:
bool: True if duplicate, False if new event
"""
if not self.settings.deduplication.enabled:
return False
try:
cache_key = f"{self.cache_prefix}{dedup_key}"
# Check if in cache
exists = await self.redis.exists(cache_key)
if exists:
logger.info("Duplicate event detected", dedup_key=dedup_key)
return True
# Record new event
await self._record_event(cache_key, dedup_key)
logger.info("New event recorded", dedup_key=dedup_key)
return False
except Exception as e:
logger.error("Error checking duplication",
dedup_key=dedup_key, error=str(e))
# Allow through on error to avoid blocking
return False
async def _record_event(self, cache_key: str, dedup_key: str):
"""Record event to cache"""
try:
# Set cache, TTL is deduplication window
ttl = self.settings.deduplication.cache_ttl
await self.redis.setex(cache_key, ttl, json.dumps({
"dedup_key": dedup_key,
"timestamp": datetime.utcnow().isoformat(),
"ttl": ttl
}))
# Also record to window cache
window_key = f"{self.cache_prefix}window:{dedup_key}"
window_ttl = self.settings.deduplication.window_seconds
await self.redis.setex(window_key, window_ttl, "1")
except Exception as e:
logger.error("Error recording event",
cache_key=cache_key, error=str(e))
async def get_event_info(self, dedup_key: str) -> Optional[Dict[str, Any]]:
"""Get event info"""
try:
cache_key = f"{self.cache_prefix}{dedup_key}"
data = await self.redis.get(cache_key)
if data:
return json.loads(data)
return None
except Exception as e:
logger.error("Error getting event info",
dedup_key=dedup_key, error=str(e))
return None
async def clear_event(self, dedup_key: str) -> bool:
"""Clear event record"""
try:
cache_key = f"{self.cache_prefix}{dedup_key}"
window_key = f"{self.cache_prefix}window:{dedup_key}"
# Delete both cache keys
await self.redis.delete(cache_key, window_key)
logger.info("Event cleared", dedup_key=dedup_key)
return True
except Exception as e:
logger.error("Error clearing event",
dedup_key=dedup_key, error=str(e))
return False
async def get_stats(self) -> Dict[str, Any]:
"""Get deduplication statistics"""
try:
# Get all deduplication keys
pattern = f"{self.cache_prefix}*"
keys = await self.redis.keys(pattern)
# Count different types of keys
total_keys = len(keys)
window_keys = len([k for k in keys if b"window:" in k])
event_keys = total_keys - window_keys
# Get config info
config = {
"enabled": self.settings.deduplication.enabled,
"window_seconds": self.settings.deduplication.window_seconds,
"cache_ttl": self.settings.deduplication.cache_ttl,
"strategy": self.settings.deduplication.strategy
}
return {
"total_keys": total_keys,
"window_keys": window_keys,
"event_keys": event_keys,
"config": config,
"timestamp": datetime.utcnow().isoformat()
}
except Exception as e:
logger.error("Error getting deduplication stats", error=str(e))
return {"error": str(e)}
async def cleanup_expired_events(self) -> int:
"""Clean up expired events"""
try:
pattern = f"{self.cache_prefix}*"
keys = await self.redis.keys(pattern)
cleaned_count = 0
for key in keys:
# Check TTL
ttl = await self.redis.ttl(key)
if ttl <= 0:
await self.redis.delete(key)
cleaned_count += 1
if cleaned_count > 0:
logger.info("Cleaned up expired events", count=cleaned_count)
return cleaned_count
except Exception as e:
logger.error("Error cleaning up expired events", error=str(e))
return 0
def generate_dedup_key(self, commit_hash: str, branch: str) -> str:
"""
Generate deduplication key
Args:
commit_hash: commit hash
branch: branch name
Returns:
str: deduplication key
"""
if self.settings.deduplication.strategy == "commit_branch":
return f"{commit_hash}:{branch}"
elif self.settings.deduplication.strategy == "commit_only":
return commit_hash
elif self.settings.deduplication.strategy == "branch_only":
return branch
else:
# Default use commit_hash:branch
return f"{commit_hash}:{branch}"
async def is_in_window(self, dedup_key: str) -> bool:
"""
Check if in deduplication time window
Args:
dedup_key: deduplication key
Returns:
bool: True if in window
"""
try:
window_key = f"{self.cache_prefix}window:{dedup_key}"
exists = await self.redis.exists(window_key)
return bool(exists)
except Exception as e:
logger.error("Error checking window",
dedup_key=dedup_key, error=str(e))
return False
# Global deduplication service instance
_dedup_service: Optional[DeduplicationService] = None
def get_deduplication_service() -> DeduplicationService:
"""Get deduplication service instance"""
global _dedup_service
if _dedup_service is None:
# Should get Redis client from dependency injection
# In actual use, should be passed in
raise RuntimeError("DeduplicationService not initialized")
return _dedup_service
def set_deduplication_service(service: DeduplicationService):
"""Set deduplication service instance"""
global _dedup_service
_dedup_service = service