213 lines
7.3 KiB
Python
213 lines
7.3 KiB
Python
"""
|
|
Deduplication service
|
|
Implements deduplication strategy based on commit hash + branch
|
|
"""
|
|
|
|
import asyncio
|
|
import hashlib
|
|
import json
|
|
from typing import Optional, Dict, Any
|
|
from datetime import datetime, timedelta
|
|
import structlog
|
|
from redis import asyncio as aioredis
|
|
|
|
from app.config import get_settings
|
|
|
|
logger = structlog.get_logger()
|
|
|
|
class DeduplicationService:
|
|
"""Deduplication service"""
|
|
|
|
def __init__(self, redis_client: aioredis.Redis):
|
|
self.redis = redis_client
|
|
self.settings = get_settings()
|
|
self.cache_prefix = "webhook:dedup:"
|
|
|
|
async def is_duplicate(self, dedup_key: str) -> bool:
|
|
"""
|
|
Check if the event is a duplicate
|
|
Args:
|
|
dedup_key: deduplication key (commit_hash:branch)
|
|
Returns:
|
|
bool: True if duplicate, False if new event
|
|
"""
|
|
if not self.settings.deduplication.enabled:
|
|
return False
|
|
|
|
try:
|
|
cache_key = f"{self.cache_prefix}{dedup_key}"
|
|
|
|
# Check if in cache
|
|
exists = await self.redis.exists(cache_key)
|
|
if exists:
|
|
logger.info("Duplicate event detected", dedup_key=dedup_key)
|
|
return True
|
|
|
|
# Record new event
|
|
await self._record_event(cache_key, dedup_key)
|
|
logger.info("New event recorded", dedup_key=dedup_key)
|
|
return False
|
|
|
|
except Exception as e:
|
|
logger.error("Error checking duplication",
|
|
dedup_key=dedup_key, error=str(e))
|
|
# Allow through on error to avoid blocking
|
|
return False
|
|
|
|
async def _record_event(self, cache_key: str, dedup_key: str):
|
|
"""Record event to cache"""
|
|
try:
|
|
# Set cache, TTL is deduplication window
|
|
ttl = self.settings.deduplication.cache_ttl
|
|
await self.redis.setex(cache_key, ttl, json.dumps({
|
|
"dedup_key": dedup_key,
|
|
"timestamp": datetime.utcnow().isoformat(),
|
|
"ttl": ttl
|
|
}))
|
|
|
|
# Also record to window cache
|
|
window_key = f"{self.cache_prefix}window:{dedup_key}"
|
|
window_ttl = self.settings.deduplication.window_seconds
|
|
await self.redis.setex(window_key, window_ttl, "1")
|
|
|
|
except Exception as e:
|
|
logger.error("Error recording event",
|
|
cache_key=cache_key, error=str(e))
|
|
|
|
async def get_event_info(self, dedup_key: str) -> Optional[Dict[str, Any]]:
|
|
"""Get event info"""
|
|
try:
|
|
cache_key = f"{self.cache_prefix}{dedup_key}"
|
|
data = await self.redis.get(cache_key)
|
|
if data:
|
|
return json.loads(data)
|
|
return None
|
|
except Exception as e:
|
|
logger.error("Error getting event info",
|
|
dedup_key=dedup_key, error=str(e))
|
|
return None
|
|
|
|
async def clear_event(self, dedup_key: str) -> bool:
|
|
"""Clear event record"""
|
|
try:
|
|
cache_key = f"{self.cache_prefix}{dedup_key}"
|
|
window_key = f"{self.cache_prefix}window:{dedup_key}"
|
|
|
|
# Delete both cache keys
|
|
await self.redis.delete(cache_key, window_key)
|
|
logger.info("Event cleared", dedup_key=dedup_key)
|
|
return True
|
|
|
|
except Exception as e:
|
|
logger.error("Error clearing event",
|
|
dedup_key=dedup_key, error=str(e))
|
|
return False
|
|
|
|
async def get_stats(self) -> Dict[str, Any]:
|
|
"""Get deduplication statistics"""
|
|
try:
|
|
# Get all deduplication keys
|
|
pattern = f"{self.cache_prefix}*"
|
|
keys = await self.redis.keys(pattern)
|
|
|
|
# Count different types of keys
|
|
total_keys = len(keys)
|
|
window_keys = len([k for k in keys if b"window:" in k])
|
|
event_keys = total_keys - window_keys
|
|
|
|
# Get config info
|
|
config = {
|
|
"enabled": self.settings.deduplication.enabled,
|
|
"window_seconds": self.settings.deduplication.window_seconds,
|
|
"cache_ttl": self.settings.deduplication.cache_ttl,
|
|
"strategy": self.settings.deduplication.strategy
|
|
}
|
|
|
|
return {
|
|
"total_keys": total_keys,
|
|
"window_keys": window_keys,
|
|
"event_keys": event_keys,
|
|
"config": config,
|
|
"timestamp": datetime.utcnow().isoformat()
|
|
}
|
|
|
|
except Exception as e:
|
|
logger.error("Error getting deduplication stats", error=str(e))
|
|
return {"error": str(e)}
|
|
|
|
async def cleanup_expired_events(self) -> int:
|
|
"""Clean up expired events"""
|
|
try:
|
|
pattern = f"{self.cache_prefix}*"
|
|
keys = await self.redis.keys(pattern)
|
|
|
|
cleaned_count = 0
|
|
for key in keys:
|
|
# Check TTL
|
|
ttl = await self.redis.ttl(key)
|
|
if ttl <= 0:
|
|
await self.redis.delete(key)
|
|
cleaned_count += 1
|
|
|
|
if cleaned_count > 0:
|
|
logger.info("Cleaned up expired events", count=cleaned_count)
|
|
|
|
return cleaned_count
|
|
|
|
except Exception as e:
|
|
logger.error("Error cleaning up expired events", error=str(e))
|
|
return 0
|
|
|
|
def generate_dedup_key(self, commit_hash: str, branch: str) -> str:
|
|
"""
|
|
Generate deduplication key
|
|
Args:
|
|
commit_hash: commit hash
|
|
branch: branch name
|
|
Returns:
|
|
str: deduplication key
|
|
"""
|
|
if self.settings.deduplication.strategy == "commit_branch":
|
|
return f"{commit_hash}:{branch}"
|
|
elif self.settings.deduplication.strategy == "commit_only":
|
|
return commit_hash
|
|
elif self.settings.deduplication.strategy == "branch_only":
|
|
return branch
|
|
else:
|
|
# Default use commit_hash:branch
|
|
return f"{commit_hash}:{branch}"
|
|
|
|
async def is_in_window(self, dedup_key: str) -> bool:
|
|
"""
|
|
Check if in deduplication time window
|
|
Args:
|
|
dedup_key: deduplication key
|
|
Returns:
|
|
bool: True if in window
|
|
"""
|
|
try:
|
|
window_key = f"{self.cache_prefix}window:{dedup_key}"
|
|
exists = await self.redis.exists(window_key)
|
|
return bool(exists)
|
|
|
|
except Exception as e:
|
|
logger.error("Error checking window",
|
|
dedup_key=dedup_key, error=str(e))
|
|
return False
|
|
|
|
# Global deduplication service instance
|
|
_dedup_service: Optional[DeduplicationService] = None
|
|
|
|
def get_deduplication_service() -> DeduplicationService:
|
|
"""Get deduplication service instance"""
|
|
global _dedup_service
|
|
if _dedup_service is None:
|
|
# Should get Redis client from dependency injection
|
|
# In actual use, should be passed in
|
|
raise RuntimeError("DeduplicationService not initialized")
|
|
return _dedup_service
|
|
|
|
def set_deduplication_service(service: DeduplicationService):
|
|
"""Set deduplication service instance"""
|
|
global _dedup_service
|
|
_dedup_service = service |