Eray Bartan Blog

Eray Bartan Blog

Aws Lamda run Python Scrapy Script

Subscribe to my newsletter and never miss my upcoming articles

my primary goal is to have a working function

iocpsupport aws lambda error

handler.py

from emlakproject.emlakproject.spiders.yokarlilar import YokarlilarSpider

import scrapy
from scrapy.crawler import CrawlerProcess
import gspread
from emlakproject.emlakproject.spiders.yokarlilar import YokarlilarSpider


gc = gspread.service_account(filename='scrapy-sheets-database.json')
wb = gc.open_by_key('1d1-dIZ1EiOEccScfm4JoRXJcxC4i5hxGfY2o57ZlJkA')
ws = wb.worksheet('yokarlilar')

def main(event, context):
    process = CrawlerProcess()
    process.crawl(YokarlilarSpider)
    process.start()

if __name__ == "__main__":
    main('', '')

/emlakproject/spiders/yokarlilar.py

import scrapy
import gspread
from datetime import date
import time
today = date.today()

gc = gspread.service_account(filename='scrapy-sheets-database.json')
wb = gc.open_by_key('1d1-dIZ1EiOEccScfm4JoRXJcxC4i5hxGfY2o57ZlJkA')
ws = wb.worksheet('yokarlilar')

class YokarlilarSpider(scrapy.Spider):
    name = 'yokarlilar'
    allowed_domains = ['hasanyokarli.com']
    start_urls = ['http://hasanyokarli.com/konut/']

    def parse(self, response):
        emlaklar = response.css('div.pure-u-1.pure-u-sm-1-2.pure-u-md-1-3')
        for emlak in emlaklar:
            ilanbilgileri = emlak.css('div.l-box.pos-r')
            #/html/body/div[1]/main/div/div[7]/div/a/span[2]
            #/html/body/div[1]/main/div/div[7]/div/a/span[1]
            #body > div.main-cont > main > div > div:nth-child(7) > div > a > span.fiyat

            yield {
                'link': 'http://hasanyokarli.com/'+ilanbilgileri.css('a::attr(href)').get(),
                'title': ilanbilgileri.css('a::attr(title)').get(),
                'fiyat': ilanbilgileri.css('a span.fiyat::attr(data-fiyat)').get(),
                'oda': ilanbilgileri.css('a div.emlak-data').xpath('span[contains(@class, "emlak-data-oda")]/text()').get(),
                'salon': ilanbilgileri.css('a div.emlak-data').xpath('span[contains(@class, "emlak-data-salon")]/text()').get(),
                'banyo': ilanbilgileri.css('a div.emlak-data').xpath('span[contains(@class, "emlak-data-banyo")]/text()').get(),
                'alan': ilanbilgileri.css('a div.emlak-data').xpath('span[contains(@class, "emlak-data-alan")]/text()').get().strip(' \t\n\r m²'),
            }
            time.sleep(1)
            ws.append_row(
                [str(today.strftime("%d/%m/%Y")),
                str('http://hasanyokarli.com'+ilanbilgileri.css('a::attr(href)').get()),
                str(ilanbilgileri.css('a::attr(title)').get()), 
                float(ilanbilgileri.css('a span.fiyat::attr(data-fiyat)').get()),
                float(ilanbilgileri.css('a div.emlak-data').xpath('span[contains(@class, "emlak-data-oda")]/text()').get()),
                float(ilanbilgileri.css('a div.emlak-data').xpath('span[contains(@class, "emlak-data-salon")]/text()').get()),
                float(ilanbilgileri.css('a div.emlak-data').xpath('span[contains(@class, "emlak-data-banyo")]/text()').get()),
                float(ilanbilgileri.css('a div.emlak-data').xpath('span[contains(@class, "emlak-data-alan")]/text()').get().strip(' \t\n\r m²'))

                ])
            time.sleep(1)
 
Share this

Impressum

Technical Specialties

  • Neo4j Graph Database Beginner Developer
  • Google Data Studio Dashboard Design
  • Micosoft Power BI Dashboard Design

Industry Experience

  • Municiapilty (IT Department, Data Analyst)
  • Real Estate (Web Design, Social Media Publisher, Mailchimp Automation)
  • Hotels & Restaurants (Web Design, Google Ads , Facebook Ads)
  • Sports (Web Design, Mailchimp Campaign)
Proudly part of