@@ -182,7 +182,10 @@ def test_request_headers_merging_and_ua_override(self):
182182 assert sent_headers ["Accept-Language" ] == "fr-FR"
183183 assert sent_headers ["User-Agent" ] == "ua-sync-1" # rotating UA wins
184184
185- @pytest .mark .integration
185+
186+ @pytest .mark .flaky (reruns = 3 , reruns_delay = 5 )
187+ @pytest .mark .integration
188+ class TestLinkContentFetcherIntegration :
186189 def test_link_content_fetcher_html (self ):
187190 """
188191 Test fetching HTML content from a real URL.
@@ -195,7 +198,6 @@ def test_link_content_fetcher_html(self):
195198 assert "url" in first_stream .meta and first_stream .meta ["url" ] == HTML_URL
196199 assert first_stream .mime_type == "text/html"
197200
198- @pytest .mark .integration
199201 def test_link_content_fetcher_text (self ):
200202 """
201203 Test fetching text content from a real URL.
@@ -208,7 +210,6 @@ def test_link_content_fetcher_text(self):
208210 assert "url" in first_stream .meta and first_stream .meta ["url" ] == TEXT_URL
209211 assert first_stream .mime_type == "text/plain"
210212
211- @pytest .mark .integration
212213 def test_link_content_fetcher_multiple_different_content_types (self ):
213214 """
214215 This test is to ensure that the fetcher can handle a list of URLs that contain different content types.
@@ -225,7 +226,6 @@ def test_link_content_fetcher_multiple_different_content_types(self):
225226 assert len (stream .data ) > 0
226227 assert stream .mime_type == "application/pdf"
227228
228- @pytest .mark .integration
229229 def test_link_content_fetcher_multiple_html_streams (self ):
230230 """
231231 This test is to ensure that the fetcher can handle a list of URLs that contain different content types,
@@ -244,7 +244,6 @@ def test_link_content_fetcher_multiple_html_streams(self):
244244 assert len (stream .data ) > 0
245245 assert stream .mime_type == "application/pdf"
246246
247- @pytest .mark .integration
248247 def test_mix_of_good_and_failed_requests (self ):
249248 """
250249 This test is to ensure that the fetcher can handle a list of URLs that contain URLs that fail to be fetched.
@@ -259,8 +258,8 @@ def test_mix_of_good_and_failed_requests(self):
259258 assert first_stream .mime_type == "text/html"
260259
261260
261+ @pytest .mark .asyncio
262262class TestLinkContentFetcherAsync :
263- @pytest .mark .asyncio
264263 async def test_run_async (self ):
265264 """Test basic async fetching with a mocked response"""
266265 with patch ("haystack.components.fetchers.link_content.httpx.AsyncClient.get" ) as mock_get :
@@ -276,7 +275,6 @@ async def test_run_async(self):
276275 assert first_stream .meta ["content_type" ] == "text/plain"
277276 assert first_stream .mime_type == "text/plain"
278277
279- @pytest .mark .asyncio
280278 async def test_run_async_multiple (self ):
281279 """Test async fetching of multiple URLs with mocked responses"""
282280 with patch ("haystack.components.fetchers.link_content.httpx.AsyncClient.get" ) as mock_get :
@@ -295,14 +293,12 @@ async def test_run_async_multiple(self):
295293 assert stream .meta ["content_type" ] == "text/plain"
296294 assert stream .mime_type == "text/plain"
297295
298- @pytest .mark .asyncio
299296 async def test_run_async_empty_urls (self ):
300297 """Test async fetching with empty URL list"""
301298 fetcher = LinkContentFetcher ()
302299 streams = (await fetcher .run_async (urls = []))["streams" ]
303300 assert len (streams ) == 0
304301
305- @pytest .mark .asyncio
306302 async def test_run_async_error_handling (self ):
307303 """Test error handling for async fetching"""
308304 with patch ("haystack.components.fetchers.link_content.httpx.AsyncClient.get" ) as mock_get :
@@ -322,7 +318,6 @@ async def test_run_async_error_handling(self):
322318 with pytest .raises (httpx .HTTPStatusError ):
323319 await fetcher .run_async (urls = ["https://www.example.com" ])
324320
325- @pytest .mark .asyncio
326321 async def test_run_async_user_agent_rotation (self ):
327322 """Test user agent rotation in async fetching"""
328323 with (
@@ -355,34 +350,6 @@ async def test_run_async_user_agent_rotation(self):
355350
356351 mock_sleep .assert_called_once ()
357352
358- @pytest .mark .asyncio
359- @pytest .mark .integration
360- async def test_run_async_multiple_integration (self ):
361- """Test async fetching of multiple URLs with real HTTP requests"""
362- fetcher = LinkContentFetcher ()
363- streams = (await fetcher .run_async ([HTML_URL , TEXT_URL ]))["streams" ]
364- assert len (streams ) == 2
365-
366- for stream in streams :
367- assert "Haystack" in stream .data .decode ("utf-8" )
368-
369- if stream .meta ["url" ] == HTML_URL :
370- assert stream .meta ["content_type" ] == "text/html"
371- assert stream .mime_type == "text/html"
372- elif stream .meta ["url" ] == TEXT_URL :
373- assert stream .meta ["content_type" ] == "text/plain"
374- assert stream .mime_type == "text/plain"
375-
376- @pytest .mark .asyncio
377- @pytest .mark .integration
378- async def test_run_async_with_client_kwargs (self ):
379- """Test async fetching with custom client kwargs"""
380- fetcher = LinkContentFetcher (client_kwargs = {"follow_redirects" : True , "timeout" : 10.0 })
381- streams = (await fetcher .run_async ([HTML_URL ]))["streams" ]
382- assert len (streams ) == 1
383- assert "Haystack" in streams [0 ].data .decode ("utf-8" )
384-
385- @pytest .mark .asyncio
386353 async def test_request_headers_merging_and_ua_override (self ):
387354 # Patch the AsyncClient class to control the instance created by LinkContentFetcher
388355 with patch ("haystack.components.fetchers.link_content.httpx.AsyncClient" ) as AsyncClientMock :
@@ -405,7 +372,6 @@ async def test_request_headers_merging_and_ua_override(self):
405372 assert sent_headers ["Accept-Language" ] == "de-DE"
406373 assert sent_headers ["User-Agent" ] == "ua-async-1" # rotating UA wins
407374
408- @pytest .mark .asyncio
409375 async def test_duplicated_request_headers_merging (self ):
410376 # Patch the AsyncClient class to control the instance created by LinkContentFetcher
411377 with patch ("haystack.components.fetchers.link_content.httpx.AsyncClient" ) as AsyncClientMock :
@@ -439,3 +405,31 @@ async def test_duplicated_request_headers_merging(self):
439405
440406 assert "x-test-header" in existing_keys
441407 assert existing_keys ["x-test-header" ] == "X-TeSt-HeAdEr"
408+
409+
410+ @pytest .mark .flaky (reruns = 3 , reruns_delay = 5 )
411+ @pytest .mark .integration
412+ @pytest .mark .asyncio
413+ class TestLinkContentFetcherAsyncIntegration :
414+ async def test_run_async_multiple_integration (self ):
415+ """Test async fetching of multiple URLs with real HTTP requests"""
416+ fetcher = LinkContentFetcher ()
417+ streams = (await fetcher .run_async ([HTML_URL , TEXT_URL ]))["streams" ]
418+ assert len (streams ) == 2
419+
420+ for stream in streams :
421+ assert "Haystack" in stream .data .decode ("utf-8" )
422+
423+ if stream .meta ["url" ] == HTML_URL :
424+ assert stream .meta ["content_type" ] == "text/html"
425+ assert stream .mime_type == "text/html"
426+ elif stream .meta ["url" ] == TEXT_URL :
427+ assert stream .meta ["content_type" ] == "text/plain"
428+ assert stream .mime_type == "text/plain"
429+
430+ async def test_run_async_with_client_kwargs (self ):
431+ """Test async fetching with custom client kwargs"""
432+ fetcher = LinkContentFetcher (client_kwargs = {"follow_redirects" : True , "timeout" : 10.0 })
433+ streams = (await fetcher .run_async ([HTML_URL ]))["streams" ]
434+ assert len (streams ) == 1
435+ assert "Haystack" in streams [0 ].data .decode ("utf-8" )
0 commit comments