Major Changes: - Updated all code references from hvacknowitall/hvacnkowitall to hkia - Renamed all existing markdown files to use hkia_ prefix - Updated configuration files, scrapers, and production scripts - Modified systemd service descriptions to use HKIA - Changed NAS sync path to /mnt/nas/hkia Files Updated: - 20+ source files updated with new naming convention - 34 markdown files renamed to hkia_* format - All ScraperConfig brand_name parameters now use 'hkia' - Documentation updated to reflect new naming Rationale: - Shorter, cleaner filenames - Consistent branding across all outputs - Easier to type and reference - Maintains same functionality with improved naming Next Steps: - Deploy updated services to production - Update any external references to old naming - Monitor scrapers to ensure proper operation 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
617 lines
No EOL
27 KiB
Python
617 lines
No EOL
27 KiB
Python
import os
|
|
import time
|
|
import random
|
|
from typing import Any, Dict, List, Optional
|
|
from datetime import datetime, timedelta
|
|
from pathlib import Path
|
|
import json
|
|
import re
|
|
from scrapling import StealthyFetcher, Adaptor
|
|
from src.base_scraper import BaseScraper, ScraperConfig
|
|
|
|
|
|
class TikTokScraperAdvanced(BaseScraper):
|
|
"""TikTok scraper using advanced Scrapling configuration for bot detection avoidance."""
|
|
|
|
def __init__(self, config: ScraperConfig):
|
|
super().__init__(config)
|
|
self.target_username = os.getenv('TIKTOK_TARGET', 'hkia')
|
|
self.base_url = f"https://www.tiktok.com/@{self.target_username}"
|
|
|
|
# Configure global StealthyFetcher settings
|
|
StealthyFetcher.auto_match = True # Enable automatic element matching
|
|
StealthyFetcher.huge_tree = True # Allow large HTML trees
|
|
|
|
def _enhanced_typing(self, element, text: str):
|
|
"""Realistic typing patterns (30-70 WPM with typos)"""
|
|
for char in text:
|
|
# Variable typing speed
|
|
base_delay = random.uniform(0.08, 0.25)
|
|
|
|
# Pause on complex characters
|
|
if char in '@._-':
|
|
base_delay *= random.uniform(1.2, 2.0)
|
|
|
|
# Occasional hesitation (10% chance)
|
|
if random.random() < 0.1:
|
|
time.sleep(random.uniform(0.3, 0.8))
|
|
|
|
element.type(char)
|
|
time.sleep(base_delay)
|
|
|
|
# Typo correction (3% chance)
|
|
if random.random() < 0.03:
|
|
element.press('Backspace')
|
|
time.sleep(random.uniform(0.1, 0.3))
|
|
element.type(char)
|
|
|
|
def _advanced_human_simulation(self, page):
|
|
"""Natural page reading behavior"""
|
|
try:
|
|
viewport_height = page.viewport_size.get('height', 800)
|
|
|
|
# Natural scrolling patterns
|
|
for i in range(random.randint(3, 6)):
|
|
scroll_amount = random.randint(100, viewport_height // 3)
|
|
page.mouse.wheel(0, scroll_amount)
|
|
time.sleep(random.uniform(0.8, 2.5)) # Reading time
|
|
|
|
# Occasional back-scroll (re-reading)
|
|
if random.random() < 0.3:
|
|
page.mouse.wheel(0, -random.randint(50, 150))
|
|
|
|
# Random mouse movements
|
|
for _ in range(random.randint(2, 4)):
|
|
x = random.randint(100, page.viewport_size.get('width', 1200) - 100)
|
|
y = random.randint(100, page.viewport_size.get('height', 800) - 100)
|
|
page.mouse.move(x, y)
|
|
time.sleep(random.uniform(0.3, 0.8))
|
|
except Exception as e:
|
|
self.logger.debug(f"Human simulation error (non-critical): {e}")
|
|
|
|
def _human_delay(self, min_seconds: float = 2, max_seconds: float = 5) -> None:
|
|
"""Add human-like delays between actions."""
|
|
delay = random.uniform(min_seconds, max_seconds)
|
|
self.logger.debug(f"Waiting {delay:.2f} seconds (human-like delay)...")
|
|
time.sleep(delay)
|
|
|
|
def fetch_posts(self, max_posts: int = 20, enable_scrolling: bool = True) -> List[Dict[str, Any]]:
|
|
"""Fetch posts from TikTok profile using advanced stealth configuration.
|
|
|
|
Args:
|
|
max_posts: Maximum number of posts to fetch
|
|
enable_scrolling: Whether to scroll profile page to load more videos
|
|
"""
|
|
posts_data = []
|
|
|
|
try:
|
|
self.logger.info(f"Fetching TikTok posts from @{self.target_username}")
|
|
|
|
# Advanced stealth configuration for TikTok
|
|
self.logger.info(f"Loading {self.base_url} with advanced stealth settings...")
|
|
response = StealthyFetcher.fetch(
|
|
url=self.base_url,
|
|
|
|
# Display and stealth settings
|
|
headless=False, # Visible browser for manual CAPTCHA intervention
|
|
|
|
# Network and resource management
|
|
block_webrtc=True, # Prevent WebRTC IP leaks
|
|
allow_webgl=True, # CRITICAL: Required for modern anti-bot detection
|
|
block_images=False, # Keep images for CAPTCHA visibility
|
|
disable_ads=True, # Block ads for cleaner experience
|
|
disable_resources=False, # Keep all resources to avoid detection
|
|
|
|
# Geographic and fingerprinting
|
|
geoip=True, # Automatic geolocation spoofing
|
|
os_randomize=True, # Randomize OS fingerprints
|
|
google_search=True, # Set Google as referrer
|
|
|
|
# Humanization and behavior
|
|
humanize=True, # Enable human-like mouse movements
|
|
|
|
# Performance and timing
|
|
network_idle=True, # Wait for network idle state
|
|
timeout=120000, # 2 minute timeout (reduced for testing)
|
|
wait=3000, # 3 second wait after page load
|
|
|
|
# Enhanced headers for better compatibility
|
|
extra_headers={
|
|
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8",
|
|
"Accept-Language": "en-US,en;q=0.9,en-CA;q=0.8",
|
|
"Accept-Encoding": "gzip, deflate, br",
|
|
"Cache-Control": "max-age=0",
|
|
"DNT": "1",
|
|
"Upgrade-Insecure-Requests": "1",
|
|
"Sec-Fetch-Dest": "document",
|
|
"Sec-Fetch-Mode": "navigate",
|
|
"Sec-Fetch-Site": "none",
|
|
"Sec-Fetch-User": "?1"
|
|
}
|
|
)
|
|
|
|
if not response:
|
|
self.logger.error("Failed to load TikTok profile")
|
|
return posts_data
|
|
|
|
self.logger.info("Page loaded successfully, performing human simulation...")
|
|
|
|
# Perform advanced human simulation if we have access to the page object
|
|
try:
|
|
# Note: This would need to be adapted based on Scrapling's API
|
|
# self._advanced_human_simulation(page)
|
|
pass
|
|
except Exception as e:
|
|
self.logger.debug(f"Human simulation not available: {e}")
|
|
|
|
# Wait for human-like delay
|
|
self._human_delay(3, 6)
|
|
|
|
# Optional: Scroll to load more videos
|
|
if enable_scrolling and max_posts > 20:
|
|
self.logger.info(f"Scrolling to load more videos (targeting {max_posts} posts)...")
|
|
# Simulate scrolling to trigger lazy loading
|
|
for scroll_attempt in range(min(5, max_posts // 10)):
|
|
try:
|
|
# Scroll down progressively
|
|
self.logger.debug(f"Scroll attempt {scroll_attempt + 1}")
|
|
# Note: This would need adaptation based on Scrapling's API
|
|
# for actual scrolling implementation
|
|
self._human_delay(2, 4)
|
|
except Exception as e:
|
|
self.logger.debug(f"Scrolling error (non-critical): {e}")
|
|
break
|
|
|
|
# Extract video items using multiple strategies
|
|
video_items = []
|
|
|
|
# Strategy 1: Primary TikTok selectors
|
|
video_items = response.css("[data-e2e='user-post-item']")
|
|
self.logger.info(f"Strategy 1 found {len(video_items)} items with user-post-item selector")
|
|
|
|
# Strategy 2: Alternative selectors
|
|
if not video_items:
|
|
video_items = response.css("div[class*='DivItemContainer']")
|
|
self.logger.info(f"Strategy 2 found {len(video_items)} items with DivItemContainer selector")
|
|
|
|
if not video_items:
|
|
video_items = response.css("div[class*='video-feed-item']")
|
|
self.logger.info(f"Strategy 3 found {len(video_items)} items with video-feed-item selector")
|
|
|
|
# Strategy 3: Look for video links directly
|
|
if not video_items:
|
|
video_links = response.css("a[href*='/video/']")
|
|
self.logger.info(f"Strategy 4 found {len(video_links)} direct video links")
|
|
|
|
for idx, link in enumerate(video_links[:max_posts]):
|
|
try:
|
|
href = ""
|
|
# Extract href using ::attr() pseudo-selector
|
|
href_elements = response.css(f"a[href*='/video/']:nth-child({idx+1})::attr(href)")
|
|
if href_elements:
|
|
href = href_elements[0]
|
|
|
|
if not href:
|
|
continue
|
|
|
|
if not href.startswith('http'):
|
|
href = f"https://www.tiktok.com{href}"
|
|
|
|
video_id_match = re.search(r'/video/(\d+)', href)
|
|
video_id = video_id_match.group(1) if video_id_match else f"video_{idx}"
|
|
|
|
post_data = {
|
|
'id': video_id,
|
|
'type': 'video',
|
|
'caption': '',
|
|
'author': self.target_username,
|
|
'publish_date': datetime.now(self.tz).isoformat(),
|
|
'link': href,
|
|
'views': 0,
|
|
'platform': 'tiktok'
|
|
}
|
|
|
|
posts_data.append(post_data)
|
|
|
|
except Exception as e:
|
|
self.logger.error(f"Error processing video link {idx}: {e}")
|
|
continue
|
|
|
|
# Strategy 4: Process structured video items
|
|
if video_items and not posts_data:
|
|
self.logger.info(f"Processing {len(video_items)} structured video items...")
|
|
|
|
for idx, item in enumerate(video_items[:max_posts]):
|
|
try:
|
|
# Extract video URL using ::attr() selector
|
|
video_url = ""
|
|
href_elements = item.css("a[href*='/video/']::attr(href)")
|
|
if href_elements:
|
|
video_url = href_elements[0]
|
|
|
|
if not video_url:
|
|
# Try alternative approach
|
|
link_elements = item.css("a")
|
|
for link_elem in link_elements:
|
|
href_attrs = link_elem.css("::attr(href)")
|
|
if href_attrs and '/video/' in str(href_attrs[0]):
|
|
video_url = href_attrs[0]
|
|
break
|
|
|
|
if not video_url:
|
|
continue
|
|
|
|
if not video_url.startswith('http'):
|
|
video_url = f"https://www.tiktok.com{video_url}"
|
|
|
|
# Extract video ID from URL
|
|
video_id_match = re.search(r'/video/(\d+)', video_url)
|
|
video_id = video_id_match.group(1) if video_id_match else f"video_{idx}"
|
|
|
|
# Extract caption/description using ::text selector
|
|
caption = ""
|
|
caption_elements = item.css("div[data-e2e='browse-video-desc'] span::text")
|
|
if caption_elements:
|
|
caption = caption_elements[0] if isinstance(caption_elements, list) else str(caption_elements)
|
|
|
|
if not caption:
|
|
caption_elements = item.css("div[class*='DivContainer'] span::text")
|
|
if caption_elements:
|
|
caption = caption_elements[0] if isinstance(caption_elements, list) else str(caption_elements)
|
|
|
|
# Extract view count using ::text selector
|
|
views_text = "0"
|
|
views_elements = item.css("strong[data-e2e='video-views']::text")
|
|
if views_elements:
|
|
views_text = views_elements[0] if isinstance(views_elements, list) else str(views_elements)
|
|
|
|
if not views_text or views_text == "0":
|
|
views_elements = item.css("strong::text")
|
|
if views_elements:
|
|
views_text = views_elements[0] if isinstance(views_elements, list) else str(views_elements)
|
|
|
|
views = self._parse_count(views_text)
|
|
|
|
post_data = {
|
|
'id': video_id,
|
|
'type': 'video',
|
|
'caption': caption,
|
|
'author': self.target_username,
|
|
'publish_date': datetime.now(self.tz).isoformat(),
|
|
'link': video_url,
|
|
'views': views,
|
|
'platform': 'tiktok'
|
|
}
|
|
|
|
posts_data.append(post_data)
|
|
|
|
if idx % 5 == 0 and idx > 0:
|
|
self.logger.info(f"Processed {idx} videos...")
|
|
|
|
except Exception as e:
|
|
self.logger.error(f"Error processing video item {idx}: {e}")
|
|
continue
|
|
|
|
# Strategy 5: Extract from page scripts as fallback
|
|
if not posts_data:
|
|
self.logger.info("No posts found via selectors, checking page scripts...")
|
|
scripts = response.css("script")
|
|
|
|
for script in scripts:
|
|
script_text_elements = script.css("::text")
|
|
if not script_text_elements:
|
|
continue
|
|
|
|
script_text = script_text_elements[0] if isinstance(script_text_elements, list) else str(script_text_elements)
|
|
|
|
if '__UNIVERSAL_DATA_FOR_REHYDRATION__' in script_text or 'window.__INIT_PROPS__' in script_text:
|
|
try:
|
|
# Look for video IDs in the script content
|
|
urls = re.findall(r'["\']*/video/(\d+)["\']', script_text)
|
|
unique_ids = list(set(urls)) # Remove duplicates
|
|
|
|
self.logger.info(f"Found {len(unique_ids)} unique video IDs in script data")
|
|
|
|
for video_id in unique_ids[:max_posts]:
|
|
post_data = {
|
|
'id': video_id,
|
|
'type': 'video',
|
|
'caption': '',
|
|
'author': self.target_username,
|
|
'publish_date': datetime.now(self.tz).isoformat(),
|
|
'link': f"https://www.tiktok.com/@{self.target_username}/video/{video_id}",
|
|
'views': 0,
|
|
'platform': 'tiktok'
|
|
}
|
|
posts_data.append(post_data)
|
|
|
|
except Exception as e:
|
|
self.logger.debug(f"Could not parse script data: {e}")
|
|
continue
|
|
|
|
self.logger.info(f"Successfully fetched {len(posts_data)} TikTok posts")
|
|
|
|
except Exception as e:
|
|
self.logger.error(f"Error fetching TikTok posts: {e}")
|
|
import traceback
|
|
self.logger.error(traceback.format_exc())
|
|
|
|
return posts_data
|
|
|
|
def _fetch_video_details(self, video_url: str) -> Optional[Dict[str, Any]]:
|
|
"""Fetch detailed information from an individual TikTok video page.
|
|
|
|
Args:
|
|
video_url: URL of the TikTok video
|
|
|
|
Returns:
|
|
Dictionary with caption and additional metadata, or None if failed
|
|
"""
|
|
try:
|
|
self.logger.debug(f"Fetching details for: {video_url}")
|
|
|
|
# Fetch individual video page with stealth settings
|
|
video_response = StealthyFetcher.fetch(
|
|
url=video_url,
|
|
headless=False,
|
|
block_webrtc=True,
|
|
allow_webgl=True,
|
|
block_images=False,
|
|
disable_ads=True,
|
|
geoip=True,
|
|
os_randomize=True,
|
|
google_search=True,
|
|
humanize=True,
|
|
network_idle=True,
|
|
timeout=60000, # 1 minute timeout for individual pages
|
|
wait=2000,
|
|
extra_headers={
|
|
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
|
|
"Accept-Language": "en-US,en;q=0.9",
|
|
"Accept-Encoding": "gzip, deflate, br",
|
|
"DNT": "1",
|
|
"Upgrade-Insecure-Requests": "1"
|
|
}
|
|
)
|
|
|
|
if not video_response:
|
|
self.logger.warning(f"Failed to load video page: {video_url}")
|
|
return None
|
|
|
|
details = {}
|
|
|
|
# Extract caption/description from video page
|
|
caption_selectors = [
|
|
"h1[data-e2e='browse-video-desc']",
|
|
"div[data-e2e='browse-video-desc']",
|
|
"span[data-e2e='browse-video-desc']",
|
|
"div.video-meta-caption",
|
|
"div[class*='DivVideoInfoContainer'] span",
|
|
"h1.video-meta-title",
|
|
"meta[property='og:description']::attr(content)"
|
|
]
|
|
|
|
caption = ""
|
|
for selector in caption_selectors:
|
|
try:
|
|
caption_elements = video_response.css(f"{selector}::text")
|
|
if caption_elements:
|
|
caption = ' '.join(str(elem).strip() for elem in caption_elements if elem)
|
|
if caption:
|
|
self.logger.debug(f"Found caption with selector: {selector}")
|
|
break
|
|
except:
|
|
continue
|
|
|
|
details['caption'] = caption
|
|
|
|
# Try to extract additional metadata
|
|
# Likes
|
|
likes_elements = video_response.css("strong[data-e2e='like-count']::text")
|
|
if likes_elements:
|
|
details['likes'] = self._parse_count(str(likes_elements[0]))
|
|
|
|
# Comments
|
|
comments_elements = video_response.css("strong[data-e2e='comment-count']::text")
|
|
if comments_elements:
|
|
details['comments'] = self._parse_count(str(comments_elements[0]))
|
|
|
|
# Shares
|
|
shares_elements = video_response.css("strong[data-e2e='share-count']::text")
|
|
if shares_elements:
|
|
details['shares'] = self._parse_count(str(shares_elements[0]))
|
|
|
|
# Duration
|
|
duration_elements = video_response.css("div[class*='DivSeekBarTimeContainer'] div::text")
|
|
if duration_elements and len(duration_elements) >= 2:
|
|
details['duration'] = str(duration_elements[1])
|
|
|
|
return details
|
|
|
|
except Exception as e:
|
|
self.logger.error(f"Error fetching video details from {video_url}: {e}")
|
|
return None
|
|
|
|
def _parse_count(self, count_str: str) -> int:
|
|
"""Parse TikTok view/like counts (e.g., '1.2M' -> 1200000)."""
|
|
if not count_str:
|
|
return 0
|
|
|
|
count_str = str(count_str).strip().upper()
|
|
|
|
try:
|
|
if 'K' in count_str:
|
|
num = re.search(r'([\d.]+)', count_str)
|
|
if num:
|
|
return int(float(num.group(1)) * 1000)
|
|
elif 'M' in count_str:
|
|
num = re.search(r'([\d.]+)', count_str)
|
|
if num:
|
|
return int(float(num.group(1)) * 1000000)
|
|
elif 'B' in count_str:
|
|
num = re.search(r'([\d.]+)', count_str)
|
|
if num:
|
|
return int(float(num.group(1)) * 1000000000)
|
|
else:
|
|
# Remove any non-numeric characters
|
|
return int(re.sub(r'[^\d]', '', count_str) or 0)
|
|
except:
|
|
return 0
|
|
|
|
def fetch_content(self, max_posts: int = 20, fetch_captions: bool = False,
|
|
max_caption_fetches: int = 10) -> List[Dict[str, Any]]:
|
|
"""Fetch all content from TikTok with optional caption retrieval.
|
|
|
|
Args:
|
|
max_posts: Maximum number of posts to fetch
|
|
fetch_captions: Whether to fetch captions from individual video pages
|
|
max_caption_fetches: Maximum number of videos to fetch captions for
|
|
"""
|
|
# First, get video IDs and basic info from profile
|
|
posts_data = self.fetch_posts(max_posts=max_posts, enable_scrolling=(max_posts > 20))
|
|
|
|
# Optionally fetch captions from individual video pages
|
|
if fetch_captions and posts_data:
|
|
caption_limit = min(len(posts_data), max_caption_fetches)
|
|
self.logger.info(f"Fetching captions for {caption_limit} videos (this will take time)...")
|
|
|
|
successful_fetches = 0
|
|
for i, post in enumerate(posts_data[:caption_limit]):
|
|
try:
|
|
# Aggressive delay before each fetch to avoid detection
|
|
self._human_delay(5, 10)
|
|
|
|
# Fetch individual video details
|
|
video_url = post.get('link', '')
|
|
if not video_url:
|
|
continue
|
|
|
|
self.logger.info(f"Fetching caption {i+1}/{caption_limit}: {video_url}")
|
|
video_details = self._fetch_video_details(video_url)
|
|
|
|
if video_details:
|
|
# Update post with fetched details
|
|
post.update(video_details)
|
|
successful_fetches += 1
|
|
self.logger.info(f"Successfully fetched caption ({successful_fetches}/{caption_limit})")
|
|
|
|
# Extended break every 3 videos to avoid detection
|
|
if (i + 1) % 3 == 0 and i < caption_limit - 1:
|
|
break_time = random.uniform(30, 60)
|
|
self.logger.info(f"Taking extended {break_time:.0f}s break to avoid detection...")
|
|
time.sleep(break_time)
|
|
|
|
except Exception as e:
|
|
self.logger.warning(f"Failed to fetch details for video {i+1}: {e}")
|
|
continue
|
|
|
|
self.logger.info(f"Caption fetching complete: {successful_fetches}/{caption_limit} successful")
|
|
|
|
return posts_data
|
|
|
|
def format_markdown(self, items: List[Dict[str, Any]]) -> str:
|
|
"""Format TikTok content as markdown."""
|
|
markdown_sections = []
|
|
|
|
for item in items:
|
|
section = []
|
|
|
|
# ID
|
|
section.append(f"# ID: {item.get('id', 'N/A')}")
|
|
section.append("")
|
|
|
|
# Type
|
|
section.append(f"## Type: {item.get('type', 'video')}")
|
|
section.append("")
|
|
|
|
# Author
|
|
section.append(f"## Author: @{item.get('author', 'Unknown')}")
|
|
section.append("")
|
|
|
|
# Publish Date
|
|
section.append(f"## Publish Date: {item.get('publish_date', '')}")
|
|
section.append("")
|
|
|
|
# Link
|
|
section.append(f"## Link: {item.get('link', '')}")
|
|
section.append("")
|
|
|
|
# Views
|
|
views = item.get('views', 0)
|
|
section.append(f"## Views: {views:,}")
|
|
section.append("")
|
|
|
|
# Likes (if fetched from individual page)
|
|
likes = item.get('likes')
|
|
if likes is not None:
|
|
section.append(f"## Likes: {likes:,}")
|
|
section.append("")
|
|
|
|
# Comments (if fetched from individual page)
|
|
comments = item.get('comments')
|
|
if comments is not None:
|
|
section.append(f"## Comments: {comments:,}")
|
|
section.append("")
|
|
|
|
# Shares (if fetched from individual page)
|
|
shares = item.get('shares')
|
|
if shares is not None:
|
|
section.append(f"## Shares: {shares:,}")
|
|
section.append("")
|
|
|
|
# Duration (if fetched from individual page)
|
|
duration = item.get('duration')
|
|
if duration:
|
|
section.append(f"## Duration: {duration}")
|
|
section.append("")
|
|
|
|
# Caption
|
|
section.append("## Caption:")
|
|
caption = item.get('caption', '')
|
|
if caption:
|
|
section.append(caption)
|
|
else:
|
|
section.append("(No caption available - fetch individual video for details)")
|
|
section.append("")
|
|
|
|
# Separator
|
|
section.append("-" * 50)
|
|
section.append("")
|
|
|
|
markdown_sections.append('\n'.join(section))
|
|
|
|
return '\n'.join(markdown_sections)
|
|
|
|
def get_incremental_items(self, items: List[Dict[str, Any]], state: Dict[str, Any]) -> List[Dict[str, Any]]:
|
|
"""Get only new videos since last sync."""
|
|
if not state:
|
|
return items
|
|
|
|
last_video_id = state.get('last_video_id')
|
|
|
|
if not last_video_id:
|
|
return items
|
|
|
|
# Filter for videos newer than the last synced
|
|
new_items = []
|
|
for item in items:
|
|
if item.get('id') == last_video_id:
|
|
break # Found the last synced video
|
|
new_items.append(item)
|
|
|
|
return new_items
|
|
|
|
def update_state(self, state: Dict[str, Any], items: List[Dict[str, Any]]) -> Dict[str, Any]:
|
|
"""Update state with latest video information."""
|
|
if not items:
|
|
return state
|
|
|
|
# Get the first item (most recent)
|
|
latest_item = items[0]
|
|
|
|
state['last_video_id'] = latest_item.get('id')
|
|
state['last_video_date'] = latest_item.get('publish_date')
|
|
state['last_sync'] = datetime.now(self.tz).isoformat()
|
|
state['video_count'] = len(items)
|
|
|
|
return state |