Aws Lamda run Python Scrapy Script
my primary goal is to have a working function
iocpsupport aws lambda error
from emlakproject.emlakproject.spiders.yokarlilar import YokarlilarSpider
import scrapy
from scrapy.crawler import CrawlerProcess
import gspread
from emlakproject.emlakproject.spiders.yokarlilar import YokarlilarSpider
gc = gspread.service_account(filename='scrapy-sheets-database.json')
wb = gc.open_by_key('1d1-dIZ1EiOEccScfm4JoRXJcxC4i5hxGfY2o57ZlJkA')
ws = wb.worksheet('yokarlilar')
def main(event, context):
process = CrawlerProcess()
process.crawl(YokarlilarSpider)
process.start()
if __name__ == "__main__":
main('', '')
/emlakproject/spiders/yokarlilar.py
import scrapy
import gspread
from datetime import date
import time
today = date.today()
gc = gspread.service_account(filename='scrapy-sheets-database.json')
wb = gc.open_by_key('1d1-dIZ1EiOEccScfm4JoRXJcxC4i5hxGfY2o57ZlJkA')
ws = wb.worksheet('yokarlilar')
class YokarlilarSpider(scrapy.Spider):
name = 'yokarlilar'
allowed_domains = ['hasanyokarli.com']
start_urls = ['http://hasanyokarli.com/konut/']
def parse(self, response):
emlaklar = response.css('div.pure-u-1.pure-u-sm-1-2.pure-u-md-1-3')
for emlak in emlaklar:
ilanbilgileri = emlak.css('div.l-box.pos-r')
#/html/body/div[1]/main/div/div[7]/div/a/span[2]
#/html/body/div[1]/main/div/div[7]/div/a/span[1]
#body > div.main-cont > main > div > div:nth-child(7) > div > a > span.fiyat
yield {
'link': 'http://hasanyokarli.com/'+ilanbilgileri.css('a::attr(href)').get(),
'title': ilanbilgileri.css('a::attr(title)').get(),
'fiyat': ilanbilgileri.css('a span.fiyat::attr(data-fiyat)').get(),
'oda': ilanbilgileri.css('a div.emlak-data').xpath('span[contains(@class, "emlak-data-oda")]/text()').get(),
'salon': ilanbilgileri.css('a div.emlak-data').xpath('span[contains(@class, "emlak-data-salon")]/text()').get(),
'banyo': ilanbilgileri.css('a div.emlak-data').xpath('span[contains(@class, "emlak-data-banyo")]/text()').get(),
'alan': ilanbilgileri.css('a div.emlak-data').xpath('span[contains(@class, "emlak-data-alan")]/text()').get().strip(' \t\n\r m²'),
}
time.sleep(1)
ws.append_row(
[str(today.strftime("%d/%m/%Y")),
str('http://hasanyokarli.com'+ilanbilgileri.css('a::attr(href)').get()),
str(ilanbilgileri.css('a::attr(title)').get()),
float(ilanbilgileri.css('a span.fiyat::attr(data-fiyat)').get()),
float(ilanbilgileri.css('a div.emlak-data').xpath('span[contains(@class, "emlak-data-oda")]/text()').get()),
float(ilanbilgileri.css('a div.emlak-data').xpath('span[contains(@class, "emlak-data-salon")]/text()').get()),
float(ilanbilgileri.css('a div.emlak-data').xpath('span[contains(@class, "emlak-data-banyo")]/text()').get()),
float(ilanbilgileri.css('a div.emlak-data').xpath('span[contains(@class, "emlak-data-alan")]/text()').get().strip(' \t\n\r m²'))
])
time.sleep(1)