Heat transfer is fundamental to HVAC systems...
There are three main types: conduction, convection, and radiation.
import pytest
from unittest.mock import Mock, patch, MagicMock
from datetime import datetime
import json
from pathlib import Path
from src.hvacrschool_scraper import HVACRSchoolScraper
from src.base_scraper import ScraperConfig
class TestHVACRSchoolScraper:
@pytest.fixture
def config(self):
return ScraperConfig(
source_name="hvacrschool",
brand_name="hkia",
data_dir=Path("test_data"),
logs_dir=Path("test_logs"),
timezone="America/Halifax"
)
@pytest.fixture
def mock_scraper(self, config):
with patch('src.hvacrschool_scraper.StealthyFetcher') as mock_scraper_class:
mock_scraper_instance = MagicMock()
mock_scraper_class.return_value = mock_scraper_instance
scraper = HVACRSchoolScraper(config)
scraper.scraper = mock_scraper_instance
return scraper
@pytest.fixture
def sample_sitemap_xml(self):
return '''
Heat transfer is fundamental to HVAC systems...
There are three main types: conduction, convection, and radiation.
Heat transfer is fundamental...
Test content
', 'description': 'Test description' } ] markdown = mock_scraper.format_markdown(articles) assert '# ID: test123' in markdown assert '## Title: Test Article' in markdown assert '## Author: Bryan Orr' in markdown assert '## Type: blog_post' in markdown assert '## Word Count: 250' in markdown assert '## Categories: HVAC, Heat Transfer' in markdown assert '## Permalink: http://www.hvacrschool.com/test-article/' in markdown assert '## Description:' in markdown @patch('time.sleep') def test_rate_limiting(self, mock_sleep, mock_scraper): """Test rate limiting functionality.""" mock_scraper.last_request_time = 0 mock_scraper.request_delay = 2.0 # First call should not sleep with patch('time.time', return_value=10.0): mock_scraper._apply_rate_limit() mock_sleep.assert_not_called() # Second call within delay period should sleep with patch('time.time', return_value=11.0): # 1 second later mock_scraper._apply_rate_limit() mock_sleep.assert_called_once_with(1.0) # Should sleep for 1 more second @patch('src.hvacrschool_scraper.HVACRSchoolScraper.fetch_sitemap_urls') @patch('src.hvacrschool_scraper.HVACRSchoolScraper.scrape_article') def test_fetch_content(self, mock_scrape_article, mock_fetch_sitemap, mock_scraper): """Test content fetching with max_items limit.""" # Mock sitemap URLs mock_fetch_sitemap.return_value = [ {'url': 'http://www.hvacrschool.com/article1/', 'lastmod': '2024-01-20T10:00:00Z'}, {'url': 'http://www.hvacrschool.com/article2/', 'lastmod': '2024-01-15T10:00:00Z'}, {'url': 'http://www.hvacrschool.com/article3/', 'lastmod': '2024-01-10T10:00:00Z'}, ] # Mock article scraping mock_scrape_article.side_effect = [ {'title': 'Article 1', 'url': 'http://www.hvacrschool.com/article1/'}, {'title': 'Article 2', 'url': 'http://www.hvacrschool.com/article2/'}, ] # Test with max_items limit articles = mock_scraper.fetch_content(max_items=2) assert len(articles) == 2 assert articles[0]['title'] == 'Article 1' assert articles[1]['title'] == 'Article 2' # Should have called scrape_article twice (limited by max_items) assert mock_scrape_article.call_count == 2