example:
import scrapy
class MySpider(scrapy.Spider):
name = "my_spider"
start_urls = ['https://www.basketball-reference.com/leagues/NBA_2022.html']
async def start(self):
for url in self.start_urls:
yield scrapy.Request(url, callback=self.parse, meta={'playwright': True, 'playwright_include_page': True})
async def parse(self, response):
page = response.meta['playwright_page']
# 使用 Playwright 的 PageCoroutine 来确保异步调用在正确的事件循环中
title = await page.title() # 获取页面标题
self.logger.info(f"Page Title: {title}")
# 获取 cookies
cookies = await page.context.cookies() # 获取 cookies
self.logger.info(f"Cookies: {cookies}")
# 继续其他的爬虫逻辑
yield {'title': title, 'cookies': cookies}
Version:
scrapy: 2.13.3
scrapy-playwright: 0.0.44
playwright: 1.55.0
I just want to get cookie with playwright.Page, but it doesn't work. It's seen scrapy async conflicted with playwright.
Pls help , thx.