commit 2e7aa9a37ae50bde3809dedab8902a1b546d7e0c Author: Ondřej Hrachovina Date: Fri Feb 21 20:46:50 2025 +0100 upload diff --git a/.editorconfig b/.editorconfig new file mode 100644 index 0000000..1dd0b3a --- /dev/null +++ b/.editorconfig @@ -0,0 +1,18 @@ +# EditorConfig is awesome: https://EditorConfig.org + +# top-most EditorConfig file +root = true + +[*] +indent_style = space +indent_size = 4 +end_of_line = lf +charset = utf-8 +trim_trailing_whitespace = true +insert_final_newline = true + +[*.yml] +indent_size = 2 + +[Makefile] +indent_style = tab diff --git a/.env b/.env new file mode 100644 index 0000000..1f3b1f2 --- /dev/null +++ b/.env @@ -0,0 +1,8 @@ +DEBUG=0 +FOUND_OFFERS_FILE=found_offers.txt +REFRESH_INTERVAL_DAYTIME_MINUTES=30 +REFRESH_INTERVAL_NIGHTTIME_MINUTES=90 +DISCORD_TOKEN=CREATE_ENV_LOCAL_AND_SET_TOKEN +DISCORD_OFFERS_CHANNEL=1067564052022300672 +DISCORD_DEV_CHANNEL=954732715377311825 +DISPOSITIONS=3+kk,3+1 diff --git a/.env.docker b/.env.docker new file mode 100644 index 0000000..a4703cd --- /dev/null +++ b/.env.docker @@ -0,0 +1 @@ +FOUND_OFFERS_FILE=/data/found_offers.txt diff --git a/.github/workflows/dockerhub.yml b/.github/workflows/dockerhub.yml new file mode 100644 index 0000000..7c336f8 --- /dev/null +++ b/.github/workflows/dockerhub.yml @@ -0,0 +1,26 @@ +name: Build and publish image on Docker Hub + +on: + push: + branches: [master] + +jobs: + build_docker_image: + name: Build and push Docker image to Docker Hub + runs-on: ubuntu-latest + steps: + - name: Check out the repo + uses: actions/checkout@v2 + + - name: Log in to Docker Hub + uses: docker/login-action@v1 + with: + username: ${{ secrets.DOCKER_USERNAME }} + password: ${{ secrets.DOCKER_PASSWORD }} + + - name: Build and push Docker image + uses: docker/build-push-action@v2 + with: + context: . + push: true + tags: janch32/web-scraper-nabidek-pronajmu:latest diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..e03134d --- /dev/null +++ b/.gitignore @@ -0,0 +1,5 @@ +**/__pycache__ +remembered_offers.txt +found_offers.txt +data/ +.env.local diff --git a/.vscode/launch.json b/.vscode/launch.json new file mode 100644 index 0000000..c08fad6 --- /dev/null +++ b/.vscode/launch.json @@ -0,0 +1,18 @@ +{ + // Use IntelliSense to learn about possible attributes. + // Hover to view descriptions of existing attributes. + // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387 + "version": "0.2.0", + "configurations": [ + { + "name": "Launch project", + "type": "python", + "request": "launch", + "program": "${workspaceFolder}/src/main.py", + "console": "integratedTerminal", + "env": { + "DEBUG": "1" + } + } + ] +} diff --git a/.vscode/settings.json b/.vscode/settings.json new file mode 100644 index 0000000..bc5f163 --- /dev/null +++ b/.vscode/settings.json @@ -0,0 +1,7 @@ +{ + "cSpell.words": [ + "Idnes", + "Realingo", + "sreality" + ] +} diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..d41a209 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,17 @@ +FROM python:3-alpine + +WORKDIR /app +VOLUME ["/data"] + +COPY requirements.txt requirements.txt +RUN apk add --no-cache --virtual .build-deps gcc musl-dev && \ + pip3 install -r requirements.txt && \ + apk del .build-deps + +ENV APP_ENV=docker + +COPY .env* . +COPY src . +COPY graphql ./graphql + +CMD ["python3", "main.py"] diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..cb06f79 --- /dev/null +++ b/Makefile @@ -0,0 +1,11 @@ +.PHONY: install debug + +install: + python3 -m pip install -r requirements.txt + +run: + python3 src/main.py + +debug: + DEBUG=1 + python3 src/main.py diff --git a/README.md b/README.md new file mode 100644 index 0000000..d399113 --- /dev/null +++ b/README.md @@ -0,0 +1,58 @@ +# Web Scraper Nabídek Pronájmu +Hlídá nové nabídky na populárních realitních serverech. + +[**Docker image (aktuální z master větvě) - `janch32/web-scraper-nabidek-pronajmu`**](https://hub.docker.com/r/janch32/web-scraper-nabidek-pronajmu) + +*Tato aplikace byla vytvořena pro osobní použití, takže obsahuje hardkódované údaje pro hledání pronájmu bytů v Brně (ale nemělo by být zas tak moc těžký to upravit).* + +Nicméně je možné při spuštění aplikace nakonfigurovat, které **dispozice bytu** (počet místností) hledat. + +## Podporované realitní servery +- BRAVIS +- EuroBydlení +- iDNES Reality +- REALCITY +- realingo +- Remax +- Sreality +- UlovDomov +- BezRealitky + +## Spuštění +- Lze spustit lokálně nebo v Dockeru +- **Lokální spuštění** + - Je vyžadován **Python 3.11+** + - Před prvním spuštěním nainstalujte závislosti `make install` + - Vytvořte si lokální soubor `.env.local` a nastavte v něm všechny požadované parametry (minimálně však Discord token, cílovou roomku a požadované dispozice bytu) + - následně je možné spustit `make run` nebo v debug režimu `make debug` +- **Spuštění v Dockeru** + - Přiložená Docker Compose konfigurace souží pro vývoj. Stačí ji spustit příkazem `docker-compose up -d` (má zapnutý debug mód) + - K dispozici je také sestavený Docker obraz v Ducker Hub, vždy aktuální s master větví - [`janch32/web-scraper-nabidek-pronajmu`](https://hub.docker.com/r/janch32/web-scraper-nabidek-pronajmu) + - Kromě toho je možné vytvořit "produkční" Docker image díky `Dockerfile`. Při spuštění kontejneru je nutné nastavit všechny požadované env proměnné (ne v v .env.local!) + +Aplikace při prvním spuštění nevypíše žádné nabídky, pouze si stáhne seznam těch aktuálních. Poté každých 30 mint (nastavitelné přes env proměnné) kontroluje nové nabídky na realitních serverech a ty přeposílá do Discord kanálu. Aplikace nemusí běžet pořád, po opětovném spuštění pošle všechny nové nabídky od posledního spuštění. + +## Konfigurace přes Env proměnné +- `DISCORD_OFFERS_CHANNEL` - Unikátní číslo Discord kanálu, kde se budou posílat nabídky. [Návod pro získání ID](https://support.discord.com/hc/en-us/articles/206346498-Where-can-I-find-my-User-Server-Message-ID-) +- `DISCORD_DEV_CHANNEL` - Unikátní číslo Discord kanálu, kde se budou posílat chyby programu. +- `DISCORD_TOKEN` - Obsahuje Discord token bota. [Návod pro získání tokenu](https://discordgsm.com/guide/how-to-get-a-discord-bot-token) +- `DISPOSITIONS` - Obsahuje seznam dispozic oddělených čárkou. Např.: `DISPOSITIONS=2+kk,2+1,others` + +### Seznam dostupných hodnot parametru `DISPOSITIONS` +- `1+kk` +- `1+1` +- `2+kk` +- `2+1` +- `3+kk` +- `3+1` +- `4+kk` +- `4+1` +- `5++` (5+kk a více místností) +- `others` (jiné, atypické nebo neznámé velikosti) + +### Další konfigurovatelné Env proměnné +Tyto hodnoty jsou nastavené pro bězné použití a není potřeba ji měnit. Zde je každopádně popis těchto hodnot. +- `DEBUG` (boolean, výchozí vypnuto). Aktivuje režim ladění aplikace, především podrobnějšího výpisu do konzole. Vhodné pro vývoj. +- `FOUND_OFFERS_FILE` Cesta k souboru, kam se ukládají dříve nalezené nabídky. Aplikace si soubor vytvoří, ale složka musí existovat. Pokud aplikace nebyla nějakou dobu spuštěna (řádově týdny) je dobré tento soubor smazat - aplikace by toto vyhodnotila jako velké množství nových nabídek a zaspamovala by Discord kanál. +- `REFRESH_INTERVAL_DAYTIME_MINUTES` - interval po který se mají stáhnout nejnovější nabídky Výchozí 30min, doporučeno minimálně 10min +- `REFRESH_INTERVAL_NIGHTTIME_MINUTES` - noční interval stahování nabídek. Jde o čas mezi 22h-6h. Výchozí 90min, doporučeno vyšší než denní interval diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 0000000..b31376f --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,9 @@ +version: '2.4' + +services: + web-scraper: + build: . + environment: + - DEBUG=1 + volumes: + - ./data:/data diff --git a/graphql/bezrealitky.json b/graphql/bezrealitky.json new file mode 100644 index 0000000..55c768b --- /dev/null +++ b/graphql/bezrealitky.json @@ -0,0 +1,14 @@ +{ + "operationName": "AdvertList", + "variables": { + "limit": 15, + "offset": 75, + "order": "TIMEORDER_DESC", + "locale": "CS", + "offerType": [], + "estateType": [], + "disposition": [], + "regionOsmIds": [] + }, + "query": "query AdvertList($locale: Locale!, $estateType: [EstateType], $offerType: [OfferType], $disposition: [Disposition], $region: ID, $regionOsmIds: [ID], $limit: Int = 15, $offset: Int = 0, $order: ResultOrder = TIMEORDER_DESC, $petFriendly: Boolean, $balconyFrom: Float, $balconyTo: Float, $loggiaFrom: Float, $loggiaTo: Float, $terraceFrom: Float, $terraceTo: Float, $cellarFrom: Float, $cellarTo: Float, $parking: Boolean, $garage: Boolean, $newBuilding: Boolean, $lift: Boolean, $ownership: [Ownership], $construction: [Construction], $equipped: [Equipped], $priceFrom: Int, $priceTo: Int, $surfaceFrom: Int, $surfaceTo: Int, $advertId: [ID], $roommate: Boolean, $includeImports: Boolean, $boundaryPoints: [GPSPointInput], $discountedOnly: Boolean, $barrierFree: Boolean, $polygonBuffer: Int, $availableFrom: DateTime) {\n listAdverts(\n offerType: $offerType\n estateType: $estateType\n disposition: $disposition\n limit: $limit\n regionId: $region\n regionOsmIds: $regionOsmIds\n offset: $offset\n order: $order\n petFriendly: $petFriendly\n balconySurfaceFrom: $balconyFrom\n balconySurfaceTo: $balconyTo\n loggiaSurfaceFrom: $loggiaFrom\n loggiaSurfaceTo: $loggiaTo\n terraceSurfaceFrom: $terraceFrom\n terraceSurfaceTo: $terraceTo\n cellarSurfaceFrom: $cellarFrom\n cellarSurfaceTo: $cellarTo\n parking: $parking\n garage: $garage\n newBuilding: $newBuilding\n lift: $lift\n ownership: $ownership\n construction: $construction\n equipped: $equipped\n priceFrom: $priceFrom\n priceTo: $priceTo\n surfaceFrom: $surfaceFrom\n surfaceTo: $surfaceTo\n ids: $advertId\n roommate: $roommate\n includeImports: $includeImports\n boundaryPoints: $boundaryPoints\n discountedOnly: $discountedOnly\n polygonBuffer: $polygonBuffer\n barrierFree: $barrierFree\n availableFrom: $availableFrom\n ) {\n list {\n id\n uri\n estateType\n offerType\n disposition\n imageAltText(locale: $locale)\n mainImage {\n id\n url(filter: RECORD_THUMB)\n __typename\n }\n address(locale: $locale)\n surface\n surfaceLand\n tags(locale: $locale)\n price\n charges\n currency\n petFriendly\n reserved\n highlighted\n roommate\n project {\n id\n __typename\n }\n gps {\n lat\n lng\n __typename\n }\n mortgageData(locale: $locale) {\n rateLow\n rateHigh\n loan\n years\n __typename\n }\n originalPrice\n isDiscounted\n nemoreport {\n id\n status\n timeCreated\n __typename\n }\n isNew\n videos {\n id\n previewUrl\n status\n __typename\n }\n links {\n id\n url\n type\n status\n __typename\n }\n __typename\n }\n totalCount\n __typename\n }\n actionList: listAdverts(\n offerType: $offerType\n estateType: $estateType\n disposition: $disposition\n regionId: $region\n regionOsmIds: $regionOsmIds\n offset: $offset\n order: $order\n petFriendly: $petFriendly\n balconySurfaceFrom: $balconyFrom\n balconySurfaceTo: $balconyTo\n loggiaSurfaceFrom: $loggiaFrom\n loggiaSurfaceTo: $loggiaTo\n terraceSurfaceFrom: $terraceFrom\n terraceSurfaceTo: $terraceTo\n cellarSurfaceFrom: $cellarFrom\n cellarSurfaceTo: $cellarTo\n parking: $parking\n garage: $garage\n newBuilding: $newBuilding\n lift: $lift\n ownership: $ownership\n construction: $construction\n equipped: $equipped\n priceFrom: $priceFrom\n priceTo: $priceTo\n surfaceFrom: $surfaceFrom\n surfaceTo: $surfaceTo\n ids: $advertId\n roommate: $roommate\n includeImports: $includeImports\n boundaryPoints: $boundaryPoints\n discountedOnly: true\n limit: 3\n availableFrom: $availableFrom\n ) {\n list {\n id\n uri\n estateType\n offerType\n disposition\n imageAltText(locale: $locale)\n mainImage {\n id\n url(filter: RECORD_THUMB)\n __typename\n }\n address(locale: $locale)\n surface\n surfaceLand\n tags(locale: $locale)\n price\n charges\n currency\n petFriendly\n reserved\n highlighted\n roommate\n project {\n id\n __typename\n }\n gps {\n lat\n lng\n __typename\n }\n mortgageData(locale: $locale) {\n rateLow\n rateHigh\n loan\n years\n __typename\n }\n originalPrice\n isDiscounted\n nemoreport {\n id\n status\n timeCreated\n __typename\n }\n isNew\n videos {\n id\n previewUrl\n status\n __typename\n }\n links {\n id\n url\n type\n status\n __typename\n }\n __typename\n }\n totalCount\n __typename\n }\n}\n" +} diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..f96895f --- /dev/null +++ b/requirements.txt @@ -0,0 +1,5 @@ +requests +beautifulsoup4 +python-dotenv +discord.py +environ-config diff --git a/src/config.py b/src/config.py new file mode 100644 index 0000000..d0d0fed --- /dev/null +++ b/src/config.py @@ -0,0 +1,55 @@ +import functools +import operator +import os +from pathlib import Path + +import environ +from dotenv import load_dotenv + +from disposition import Disposition + +load_dotenv(".env") + +app_env = os.getenv("APP_ENV") +if app_env: + load_dotenv(".env." + app_env, override=True) + +load_dotenv(".env.local", override=True) + +_str_to_disposition_map = { + "1+kk": Disposition.FLAT_1KK, + "1+1": Disposition.FLAT_1, + "2+kk": Disposition.FLAT_2KK, + "2+1": Disposition.FLAT_2, + "3+kk": Disposition.FLAT_3KK, + "3+1": Disposition.FLAT_3, + "4+kk": Disposition.FLAT_4KK, + "4+1": Disposition.FLAT_4, + "5++": Disposition.FLAT_5_UP, + "others": Disposition.FLAT_OTHERS +} + +def dispositions_converter(raw_disps: str): + return functools.reduce(operator.or_, map(lambda d: _str_to_disposition_map[d], raw_disps.split(",")), Disposition.NONE) + + +@environ.config(prefix="") +class Config: + debug: bool = environ.bool_var() + found_offers_file: Path = environ.var(converter=Path) + refresh_interval_daytime_minutes: int = environ.var(converter=int) + refresh_interval_nighttime_minutes: int = environ.var(converter=int) + dispositions: Disposition = environ.var(converter=dispositions_converter) + + @environ.config() + class Discord: + token = environ.var() + offers_channel_1kk = environ.var(converter=int) + offers_channel_1 = environ.var(converter=int) + offers_channel_2kk = environ.var(converter=int) + offers_channel_2 = environ.var(converter=int) + dev_channel = environ.var(converter=int) + + discord: Discord = environ.group(Discord) + +config: Config = Config.from_environ() diff --git a/src/discord_logger.py b/src/discord_logger.py new file mode 100644 index 0000000..f737d8b --- /dev/null +++ b/src/discord_logger.py @@ -0,0 +1,13 @@ +import logging + + +class DiscordLogger(logging.Handler): + def __init__(self, client, channel, level) -> None: + super().__init__(level) + self.client = client + self.channel = channel + + def emit(self, record: logging.LogRecord): + message = "**{}**\n```\n{}\n```".format(record.levelname, record.getMessage()) + + self.client.loop.create_task(self.channel.send(message)) diff --git a/src/disposition.py b/src/disposition.py new file mode 100644 index 0000000..fb0b530 --- /dev/null +++ b/src/disposition.py @@ -0,0 +1,15 @@ +from enum import Flag, auto + + +class Disposition(Flag): + NONE = 0 + FLAT_1KK = auto() # 1kk + FLAT_1 = auto() # 1+1 + FLAT_2KK = auto() # 2kk + FLAT_2 = auto() # 2+1 + FLAT_3KK = auto() # 3kk + FLAT_3 = auto() # 3+1 + FLAT_4KK = auto() # 4kk + FLAT_4 = auto() # 4+1 + FLAT_5_UP = auto() # 5+ + FLAT_OTHERS = auto() # others diff --git a/src/main.py b/src/main.py new file mode 100644 index 0000000..40b9351 --- /dev/null +++ b/src/main.py @@ -0,0 +1,115 @@ +#!/usr/bin/evn python3 +import logging +from datetime import datetime +from time import time + +import discord +from discord.ext import tasks + +from config import * +from discord_logger import DiscordLogger +from offers_storage import OffersStorage +from scrapers.rental_offer import RentalOffer +from scrapers_manager import create_scrapers, create_scrapers_1_1, create_scrapers_1kk, create_scrapers_2_2, create_scrapers_2kk, fetch_latest_offers + + +def get_current_daytime() -> bool: return datetime.now().hour in range(6, 22) + + +client = discord.Client(intents=discord.Intents.default(), heartbeat_timeout=30) +daytime = get_current_daytime() +interval_time = config.refresh_interval_daytime_minutes if daytime else config.refresh_interval_nighttime_minutes + + +#scrapers = create_scrapers(config.dispositions) + +scrapers = [] +scrapers += create_scrapers_1kk() +scrapers += create_scrapers_1_1() +scrapers += create_scrapers_2kk() +scrapers += create_scrapers_2_2() + +@client.event +async def on_ready(): + global storage + + dev_channel = client.get_channel(config.discord.dev_channel) + ##channel = client.get_channel(config.discord.offers_channel) + storage = OffersStorage(config.found_offers_file) + + if not config.debug: + discord_error_logger = DiscordLogger(client, dev_channel, logging.ERROR) + logging.getLogger().addHandler(discord_error_logger) + else: + logging.info("Discord logger is inactive in debug mode") + + logging.info("Available scrapers: " + ", ".join(dict.fromkeys(s.name for s in scrapers))) + + logging.info("Fetching latest offers every {} minutes".format(interval_time)) + + process_latest_offers.start() + + +async def send_offer(offer, channel): + embed = discord.Embed( + title=offer.title, + url=offer.link, + description=offer.location, + timestamp=datetime.utcnow(), + color=offer.scraper.color + ) + + embed.add_field(name="Cena", value=str(offer.price) + " Kč") + embed.set_author(name=offer.scraper.name, icon_url=offer.scraper.logo_url) + embed.set_image(url=offer.image_url) + + await client.get_channel(channel).send(embed=embed) + + +@tasks.loop(minutes=interval_time) +async def process_latest_offers(): + logging.info("Fetching offers") + + new_offers: list[RentalOffer] = [] + channel_offers = [] + for xchannel, offer in fetch_latest_offers(scrapers): + if not storage.contains(offer): + new_offers.append(offer) + channel_offers.append((xchannel, offer)) + + first_time = storage.first_time + storage.save_offers(new_offers) + + logging.info("Offers fetched (new: {})".format(len(new_offers))) + + if not first_time: + for xchannel, offer in channel_offers: + try: + await send_offer(offer, xchannel) + except: + logging.info(f"send offer exp: {offer.link}") + else: + logging.info("No previous offers, first fetch is running silently") + + global daytime, interval_time + if daytime != get_current_daytime(): # Pokud stary daytime neodpovida novemu + + daytime = not daytime # Zneguj daytime (podle podminky se zmenil) + + interval_time = config.refresh_interval_daytime_minutes if daytime else config.refresh_interval_nighttime_minutes + + logging.info("Fetching latest offers every {} minutes".format(interval_time)) + process_latest_offers.change_interval(minutes=interval_time) + + #await channel.edit(topic="Last update {}".format("".format(int(time())))) + + +if __name__ == "__main__": + logging.basicConfig( + level=(logging.DEBUG if config.debug else logging.INFO), + format='%(asctime)s - [%(levelname)s] %(message)s', + datefmt='%Y-%m-%d %H:%M:%S') + + logging.debug("Running in debug mode") + + client.run(config.discord.token, log_level=logging.INFO) diff --git a/src/offers_storage.py b/src/offers_storage.py new file mode 100644 index 0000000..c4cceb8 --- /dev/null +++ b/src/offers_storage.py @@ -0,0 +1,50 @@ +import os + +from scrapers.rental_offer import RentalOffer + + +class OffersStorage: + """Úložiště dříve nalezených nabídek""" + + def __init__(self, path: str): + self.path = path + """Cesta k uloženým odkazům""" + + self.first_time = False + """Neproběhl pokus o uložení nabídek (soubor neexistuje)""" + + self._links: set[str] = set() + """Seznam URL odkazů na všechny nalezené nabídky""" + + try: + with open(self.path) as file: + for line in file: + self._links.add(line.strip()) + except FileNotFoundError: + self.first_time = True + + + def contains(self, offer: RentalOffer) -> bool: + """Objevila se nabídka již dříve? + + Args: + offer (RentalOffer): Nabídka + + Returns: + bool: Jde o starou nabídku + """ + return offer.link in self._links + + + def save_offers(self, offers: list[RentalOffer]): + """Uložit nabídky jako nalezené + + Args: + offers (list[RentalOffer]): Nalezené nabídky + """ + with open(self.path, 'a+') as file_object: + for offer in offers: + self._links.add(offer.link) + file_object.write(offer.link + os.linesep) + + self.first_time = False diff --git a/src/scrapers/rental_offer.py b/src/scrapers/rental_offer.py new file mode 100644 index 0000000..33bd86a --- /dev/null +++ b/src/scrapers/rental_offer.py @@ -0,0 +1,26 @@ +from dataclasses import dataclass + +from disposition import Disposition + + +@dataclass +class RentalOffer: + """Nabídka pronájmu bytu""" + + link: str + """URL adresa na nabídku""" + + title: str + """Popis nabídky (nejčastěji počet pokojů, výměra)""" + + location: str + """Lokace bytu (městská část, ulice)""" + + price: int | str + """Cena pronájmu za měsíc bez poplatků a energií""" + + image_url: str + """Náhledový obrázek nabídky""" + + scraper: 'ScraperBase' + """Odkaz na instanci srapera, ze kterého tato nabídka pochází""" diff --git a/src/scrapers/scraper_base.py b/src/scrapers/scraper_base.py new file mode 100644 index 0000000..a918270 --- /dev/null +++ b/src/scrapers/scraper_base.py @@ -0,0 +1,68 @@ +from abc import abstractmethod +from typing import Any + +from requests import Response + +from disposition import Disposition +from scrapers.rental_offer import RentalOffer +from utils import flatten + + +class ScraperBase(): + """Hlavní třída pro získávání aktuálních nabídek pronájmu bytů z různých služeb + """ + + user_agent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36" + headers = {"User-Agent": user_agent} + + @property + @abstractmethod + def name(self) -> str: + pass + + @property + @abstractmethod + def logo_url(self) -> str: + pass + + @property + @abstractmethod + def color(self) -> int: + pass + + @property + @abstractmethod + def disposition_mapping(self) -> dict[Disposition, Any]: + pass + + def __init__(self, disposition: Disposition, channel) -> None: + super().__init__() + self.disposition = disposition + self.channel = channel + + def get_dispositions_data(self) -> list: + return list(flatten([self.disposition_mapping[d] for d in self.disposition])) + + @abstractmethod + def build_response() -> Response: + """Vytvoří a pošle dotaz na server pro získání nabídek podle nakonfigurovaných parametrů + + Raises: + NotImplementedError: Pokud potomek neimplementuje tuto metodu + + Returns: + Response: Odpověď nabídkového serveru obsahující neparsované nabídky + """ + raise NotImplementedError("Server request builder is not implemeneted") + + @abstractmethod + def get_latest_offers() -> list[RentalOffer]: + """Načte a vrátí seznam nejnovějších nabídek bytů k pronájmu z dané služby + + Raises: + NotImplementedError: Pokud potomek neimplementuje tuto metodu + + Returns: + list[RentalOffer]: Seznam nabízených bytů k pronájmu + """ + raise NotImplementedError("Fetching new results is not implemeneted") diff --git a/src/scrapers/scraper_bezrealitky.py b/src/scrapers/scraper_bezrealitky.py new file mode 100644 index 0000000..ef83e01 --- /dev/null +++ b/src/scrapers/scraper_bezrealitky.py @@ -0,0 +1,86 @@ +""" Scraper for BezRealitky.cz +author: Mark Barzali +""" + +import json +from abc import ABC as abstract +from typing import ClassVar + +from disposition import Disposition +from scrapers.scraper_base import ScraperBase +from scrapers.rental_offer import RentalOffer +import requests + + +class ScraperBezrealitky(ScraperBase): + + name = "BezRealitky" + logo_url = "https://www.bezrealitky.cz/manifest-icon-192.maskable.png" + color = 0x00CC00 + base_url = "https://www.bezrealitky.cz" + file: ClassVar[str] = "./graphql/bezrealitky.json" + + API: ClassVar[str] = "https://api.bezrealitky.cz/" + OFFER_TYPE: ClassVar[str] = "PRONAJEM" + ESTATE_TYPE: ClassVar[str] = "BYT" + BRNO: ClassVar[str] = "R438171" + + class Routes(abstract): + GRAPHQL: ClassVar[str] = "graphql/" + OFFERS: ClassVar[str] = "nemovitosti-byty-domy/" + + disposition_mapping = { + Disposition.FLAT_1KK: "DISP_1_KK", + Disposition.FLAT_1: "DISP_1_1", + Disposition.FLAT_2KK: "DISP_2_KK", + Disposition.FLAT_2: "DISP_2_1", + Disposition.FLAT_3KK: "DISP_3_KK", + Disposition.FLAT_3: "DISP_3_1", + Disposition.FLAT_4KK: "DISP_4_KK", + Disposition.FLAT_4: "DISP_4_1", + Disposition.FLAT_5_UP: None, + Disposition.FLAT_OTHERS: None, + } + + def __init__(self, dispositions: Disposition, channel): + super().__init__(dispositions, channel) + self._read_config() + self._patch_config() + + def _read_config(self) -> None: + with open(ScraperBezrealitky.file, "r") as file: + self._config = json.load(file) + + def _patch_config(self): + match = { + "estateType": self.ESTATE_TYPE, + "offerType": self.OFFER_TYPE, + "disposition": self.get_dispositions_data(), + "regionOsmIds": [self.BRNO], + } + self._config["variables"].update(match) + + @staticmethod + def _create_link_to_offer(item: dict) -> str: + return f"{ScraperBezrealitky.base_url}/{ScraperBezrealitky.Routes.OFFERS}{item}" + + def build_response(self) -> requests.Response: + return requests.post( + url=f"{ScraperBezrealitky.API}{ScraperBezrealitky.Routes.GRAPHQL}", + json=self._config + ) + + def get_latest_offers(self) -> list[RentalOffer]: + response = self.build_response().json() + + return [ # type: list[RentalOffer] + RentalOffer( + scraper=self, + link=self._create_link_to_offer(item["uri"]), + title=item["imageAltText"], + location=item["address"], + price=f"{item['price']} / {item['charges']}", + image_url=item["mainImage"]["url"] if item["mainImage"] else "", + ) + for item in response["data"]["listAdverts"]["list"] + ] diff --git a/src/scrapers/scraper_bravis.py b/src/scrapers/scraper_bravis.py new file mode 100644 index 0000000..355b11d --- /dev/null +++ b/src/scrapers/scraper_bravis.py @@ -0,0 +1,62 @@ +import logging +import re +from urllib.parse import urljoin + +import requests +from bs4 import BeautifulSoup + +from disposition import Disposition +from scrapers.rental_offer import RentalOffer +from scrapers.scraper_base import ScraperBase + + +class ScraperBravis(ScraperBase): + + name = "BRAVIS" + logo_url = "https://www.bravis.cz/content/img/logo-small.png" + color = 0xCE0020 + base_url = "https://www.bravis.cz/pronajem-bytu" + + + def build_response(self) -> requests.Response: + url = self.base_url + "?" + + if Disposition.FLAT_1KK in self.disposition or Disposition.FLAT_1 in self.disposition: + url += "typ-nemovitosti-byt+1=&" + if Disposition.FLAT_2KK in self.disposition or Disposition.FLAT_2 in self.disposition: + url += "typ-nemovitosti-byt+2=&" + if Disposition.FLAT_3KK in self.disposition or Disposition.FLAT_3 in self.disposition: + url += "typ-nemovitosti-byt+3=&" + if Disposition.FLAT_4KK in self.disposition or Disposition.FLAT_4 in self.disposition: + url += "typ-nemovitosti-byt+4=&" + if Disposition.FLAT_5_UP in self.disposition: + url += "typ-nemovitosti-byt+5=&" + + url += "typ-nabidky=pronajem-bytu&lokalita=cele-brno&vybavenost=nezalezi&q=&action=search&s=1-20-order-0" + + logging.debug("BRAVIS request: %s", url) + + return requests.get(url, headers=self.headers) + + def get_latest_offers(self) -> list[RentalOffer]: + response = self.build_response() + soup = BeautifulSoup(response.text, 'html.parser') + + items: list[RentalOffer] = [] + + for item in soup.select("#search > .in > .itemslist > li"): + if item.get("class"): + continue + + params = item.select(".params > li") + + items.append(RentalOffer( + scraper = self, + link = urljoin(self.base_url, item.select_one("a.main").get("href")), + title = "Pronájem " + params[1].find("strong").get_text().strip() + ", " + params[2].find("strong").get_text().strip(), + location = item.select_one(".location").get_text().strip(), + price = int(re.sub(r"[^\d]", "", [text for text in item.select_one(".price").stripped_strings][0])), + image_url = urljoin(self.base_url, item.select_one(".img > img").get("src")) + )) + + return items diff --git a/src/scrapers/scraper_euro_bydleni.py b/src/scrapers/scraper_euro_bydleni.py new file mode 100644 index 0000000..e6cf2ff --- /dev/null +++ b/src/scrapers/scraper_euro_bydleni.py @@ -0,0 +1,95 @@ +import json +import logging +import re +from urllib.parse import urljoin + +import requests +from bs4 import BeautifulSoup + +from disposition import Disposition +from scrapers.rental_offer import RentalOffer +from scrapers.scraper_base import ScraperBase +from scrapers.rental_offer import RentalOffer +from urllib.parse import urljoin +from bs4 import BeautifulSoup + + +class ScraperEuroBydleni(ScraperBase): + + name = "Eurobydlení" + logo_url = "https://files.janchaloupka.cz/eurobydleni.png" + color = 0xFA0F54 + base_url = "https://www.eurobydleni.cz/search-form" + + cookies = {"listing-sort": "sort-added"} + disposition_mapping = { + Disposition.FLAT_1: 15, + Disposition.FLAT_1KK: 16, + Disposition.FLAT_2: 17, + Disposition.FLAT_2KK: 18, + Disposition.FLAT_3: 19, + Disposition.FLAT_3KK: 20, + Disposition.FLAT_4: 21, + Disposition.FLAT_4KK: 22, + Disposition.FLAT_5_UP: (202, 256), # (5+1, 5kk) + Disposition.FLAT_OTHERS: (14, 857), # (Garsonka, Apartman) + } + + + def build_response(self) -> requests.Response: + request_data = { + "sql[advert_type_eu][]": 7, + "sql[advert_subtype_eu][]": self.get_dispositions_data(), + "sql[advert_function_eu][]": 3, + "sql[advert_price_min]": "", + "sql[advert_price_max]": "", + "sql[usable_area_min]": "", + "sql[usable_area_max]": "", + "sql[estate_area_min]": "", + "sql[estate_area_max]": "", + "sql[locality][locality][input]": "Brno, Česko", + "sql[locality][locality][city]": "Brno, Česko", + "sql[locality][locality][zip_code]": "", + "sql[locality][locality][types]": "locality", + "sql[locality][location][lat]": "49.1950602", + "sql[locality][location][lng]": "16.6068371", + "sql[locality][viewport][south]": "49.10965517428777", + "sql[locality][viewport][west]": "16.42806782678905", + "sql[locality][viewport][north]": "49.294484956308", + "sql[locality][viewport][east]": "16.72785321479357", + "sql[poptavka][jmeno]": "", + "sql[poptavka][prijmeni]": "", + "sql[poptavka][email]": "", + "sql[poptavka][telefon]": "" + } + + logging.debug("EuroBydlení request: %s", json.dumps(request_data)) + + response = requests.post(self.base_url, headers=self.headers, cookies=self.cookies, data=request_data) + response.encoding = "utf-8" + return response + + def get_latest_offers(self) -> list[RentalOffer]: + response = self.build_response() + soup = BeautifulSoup(response.text, 'html.parser') + + items: list[RentalOffer] = [] + + offers = soup.find(id="properties-box") + for item in offers.find_all("li", {"class": "list-items__item"}): + + image_container = item.find("ul", {"class": "list-items__item__image__wrap"}) + content = item.find("div", {"class": "list-items__content__1"}) + title = content.find("h2", {"class": "list-items__item__title"}) + details = content.find_all("li") + + items.append(RentalOffer( + scraper = self, + link = urljoin(self.base_url, title.find("a").get('href')), + title = title.get_text().strip(), + location = details[1].get_text().strip(), + price = int(re.sub(r"[^\d]", "", details[0].get_text()) or "0"), + image_url = "https:" + image_container.find("img").get("src") + )) + + return items diff --git a/src/scrapers/scraper_idnes_reality.py b/src/scrapers/scraper_idnes_reality.py new file mode 100644 index 0000000..fbd903a --- /dev/null +++ b/src/scrapers/scraper_idnes_reality.py @@ -0,0 +1,68 @@ +import logging +import re + +import requests +from bs4 import BeautifulSoup + +from disposition import Disposition +from scrapers.rental_offer import RentalOffer +from scrapers.scraper_base import ScraperBase +from scrapers.rental_offer import RentalOffer +import requests +from bs4 import BeautifulSoup + + +class ScraperIdnesReality(ScraperBase): + + name = "iDNES Reality" + logo_url = "https://sta-reality2.1gr.cz/ui/image/favicons/favicon-32x32.png" + color = 0x1D80D7 + + disposition_mapping = { + Disposition.FLAT_1KK: "s-qc%5BsubtypeFlat%5D%5B%5D=1k", + Disposition.FLAT_1: "s-qc%5BsubtypeFlat%5D%5B%5D=11", + Disposition.FLAT_2KK: "s-qc%5BsubtypeFlat%5D%5B%5D=2k", + Disposition.FLAT_2: "s-qc%5BsubtypeFlat%5D%5B%5D=21", + Disposition.FLAT_3KK: "s-qc%5BsubtypeFlat%5D%5B%5D=3k", + Disposition.FLAT_3: "s-qc%5BsubtypeFlat%5D%5B%5D=31", + Disposition.FLAT_4KK: "s-qc%5BsubtypeFlat%5D%5B%5D=4k", + Disposition.FLAT_4: "s-qc%5BsubtypeFlat%5D%5B%5D=41", + Disposition.FLAT_5_UP: ( + "s-qc%5BsubtypeFlat%5D%5B%5D=5k", + "s-qc%5BsubtypeFlat%5D%5B%5D=51", + "s-qc%5BsubtypeFlat%5D%5B%5D=6k", # 6 a víc + ), + Disposition.FLAT_OTHERS: "s-qc%5BsubtypeFlat%5D%5B%5D=atypical", # atyp + } + + + def build_response(self) -> requests.Response: + url = "https://reality.idnes.cz/s/pronajem/byty/brno-mesto/?" + url += "&".join(self.get_dispositions_data()) + + logging.debug("iDNES reality request: %s", url) + + return requests.get(url, headers=self.headers) + + def get_latest_offers(self) -> list[RentalOffer]: + response = self.build_response() + soup = BeautifulSoup(response.text, 'html.parser') + + items: list[RentalOffer] = [] + + offers = soup.find(id="snippet-s-result-articles") + for item in offers.find_all("div", {"class": "c-products__item"}): + + if "c-products__item-advertisment" in item.get("class"): + continue + + items.append(RentalOffer( + scraper = self, + link = item.find("a", {"class": "c-products__link"}).get('href'), + title = ' '.join(item.find("h2", {"class": "c-products__title"}).get_text().strip().splitlines()), + location = item.find("p", {"class": "c-products__info"}).get_text().strip(), + price = int(re.sub(r"[^\d]", "", item.find("p", {"class": "c-products__price"}).get_text()) or "0"), + image_url = item.find("img").get("data-src") + )) + + return items diff --git a/src/scrapers/scraper_realcity.py b/src/scrapers/scraper_realcity.py new file mode 100644 index 0000000..4b64a2d --- /dev/null +++ b/src/scrapers/scraper_realcity.py @@ -0,0 +1,63 @@ +import logging +import re + +import requests +from bs4 import BeautifulSoup + +from disposition import Disposition +from scrapers.rental_offer import RentalOffer +from scrapers.scraper_base import ScraperBase +from scrapers.rental_offer import RentalOffer +import requests +from bs4 import BeautifulSoup + + +class ScraperRealcity(ScraperBase): + + name = "REALCITY" + logo_url = "https://files.janchaloupka.cz/realcity.png" + color = 0xB60D1C + + disposition_mapping = { + Disposition.FLAT_1KK: "%221%2Bkk%22", + Disposition.FLAT_1: "%221%2B1%22", + Disposition.FLAT_2KK: "%222%2Bkk%22", + Disposition.FLAT_2: "%222%2B1%22", + Disposition.FLAT_3KK: "%223%2Bkk%22", + Disposition.FLAT_3: "%223%2B1%22", + Disposition.FLAT_4KK: "%224%2Bkk%22", + Disposition.FLAT_4: ("%224%2B1%22", "%224%2B2%22"), # 4+1, 4+2 + Disposition.FLAT_5_UP: ("%225%2Bkk%22", "%225%2B1%22", "%225%2B2%22", "%226%2Bkk%22", "%226%2B1%22", "%22disp_more%22"), # 5kk, 5+1, 5+2, 6kk, 6+1, ++ + Disposition.FLAT_OTHERS: ("%22atyp%22", "%22disp_nospec%22"), # atyp, unknown + } + + + def build_response(self) -> requests.Response: + url = "https://www.realcity.cz/pronajem-bytu/brno-mesto-68/?sp=%7B%22locality%22%3A%5B68%5D%2C%22transactionTypes%22%3A%5B%22rent%22%5D%2C%22propertyTypes%22%3A%5B%7B%22propertyType%22%3A%22flat%22%2C%22options%22%3A%7B%22disposition%22%3A%5B" + url += "%2C".join(self.get_dispositions_data()) + url += "%5D%7D%7D%5D%7D" + + logging.debug("REALCITY request: %s", url) + + return requests.get(url, headers=self.headers) + + def get_latest_offers(self) -> list[RentalOffer]: + response = self.build_response() + soup = BeautifulSoup(response.text, 'html.parser') + + items: list[RentalOffer] = [] + + for item in soup.select("#rc-advertise-result .media.advertise.item"): + image = item.find("div", "pull-left image") + body = item.find("div", "media-body") + + items.append(RentalOffer( + scraper=self, + link="https://www.realcity.cz" + body.find("div", "title").a.get("href"), + title=body.find("div", "title").a.get_text() or "Chybí titulek", + location=body.find("div", "address").get_text().strip() or "Chybí adresa", + price=re.sub(r'\D+', '', body.find("div", "price").get_text() or "0"), + image_url="https:" + image.img.get("src") + )) + + return items diff --git a/src/scrapers/scraper_realingo.py b/src/scrapers/scraper_realingo.py new file mode 100644 index 0000000..8c7a635 --- /dev/null +++ b/src/scrapers/scraper_realingo.py @@ -0,0 +1,111 @@ +import json +import logging +from urllib.parse import urljoin + +import requests + +from disposition import Disposition +from scrapers.rental_offer import RentalOffer +from scrapers.scraper_base import ScraperBase +from scrapers.rental_offer import RentalOffer +import requests + + +class ScraperRealingo(ScraperBase): + + name = "realingo" + logo_url = "https://www.realingo.cz/_next/static/media/images/android-chrome-144x144-cf1233ce.png" + color = 0x00BC78 + base_url = "https://www.realingo.cz/graphql" + + disposition_mapping = { + Disposition.FLAT_1KK: "FLAT1_KK", + Disposition.FLAT_1: "FLAT11", + Disposition.FLAT_2KK: "FLAT2_KK", + Disposition.FLAT_2: "FLAT21", + Disposition.FLAT_3KK: "FLAT3_KK", + Disposition.FLAT_3: "FLAT31", + Disposition.FLAT_4KK: "FLAT4_KK", + Disposition.FLAT_4: "FLAT41", + Disposition.FLAT_5_UP: ("FLAT5_KK", "FLAT51", "FLAT6_AND_MORE"), + Disposition.FLAT_OTHERS: "OTHERS_FLAT", + } + + + def build_response(self) -> requests.Response: + json_request = { + "query": "query SearchOffer($purpose: OfferPurpose, $property: PropertyType, $saved: Boolean, $categories: [OfferCategory!], $area: RangeInput, $plotArea: RangeInput, $price: RangeInput, $bounds: GpsBoundsInput, $address: String, $transportType: TransportType, $toleration: Float, $buildingTypes: [BuildingType!], $buildingStatuses: [BuildingStatus!], $buildingPositions: [BuildingPosition!], $houseTypes: [HouseType!], $floor: RangeInput, $ownershipStatuses: [OwnershipStatus!], $furnitureStatuses: [FurnitureStatus!], $maxAge: Int, $contactType: ContactType, $geometry: GeoJSONGeometry, $sort: OfferSort = NEWEST, $first: Int = 20, $skip: Int = 0) {\n addressGeometry(\n address: $address\n geometry: $geometry\n toleration: $toleration\n transportType: $transportType\n ) {\n geometry\n mask\n }\n searchOffer(\n filter: {purpose: $purpose, property: $property, saved: $saved, address: $address, transportType: $transportType, toleration: $toleration, categories: $categories, area: $area, plotArea: $plotArea, price: $price, bounds: $bounds, buildingTypes: $buildingTypes, buildingStatuses: $buildingStatuses, buildingPositions: $buildingPositions, houseTypes: $houseTypes, floor: $floor, ownershipStatuses: $ownershipStatuses, furnitureStatuses: $furnitureStatuses, maxAge: $maxAge, contactType: $contactType, geometry: $geometry}\n sort: $sort\n first: $first\n skip: $skip\n save: true\n ) {\n location {\n id\n type\n url\n name\n neighbours {\n id\n type\n url\n name\n }\n breadcrumbs {\n url\n name\n }\n relatedSearch {\n ...SearchParametersAttributes\n }\n center\n }\n items {\n ...SearchOfferAttributes\n }\n total\n }\n}\n\nfragment FilterAttributes on OfferFilter {\n purpose\n property\n categories\n address\n location {\n name\n }\n toleration\n transportType\n bounds {\n northEast {\n latitude\n longitude\n }\n southWest {\n latitude\n longitude\n }\n }\n saved\n geometry\n area {\n from\n to\n }\n plotArea {\n from\n to\n }\n price {\n from\n to\n }\n buildingTypes\n buildingStatuses\n buildingPositions\n houseTypes\n floor {\n from\n to\n }\n ownershipStatuses\n furnitureStatuses\n maxAge\n contactType\n}\n\nfragment SearchParametersAttributes on SearchParameters {\n filter {\n ...FilterAttributes\n }\n page\n priceMap\n sort\n}\n\nfragment SearchOfferAttributes on Offer {\n id\n url\n purpose\n property\n visited\n liked\n reserved\n createdAt\n category\n purpose\n property\n price {\n total\n canonical\n currency\n }\n area {\n main\n plot\n }\n photos {\n main\n }\n location {\n address\n addressUrl\n locationPrecision\n latitude\n longitude\n }\n}\n", + "operationName": "SearchOffer", + "variables": { + "purpose": "RENT", + "property": "FLAT", + "address": "Brno", + "saved": False, + "categories": self.get_dispositions_data(), + "sort": "NEWEST", + "first": 300, + "skip": 0 + } + } + + logging.debug("realingo request: %s", json.dumps(json_request)) + + return requests.post(self.base_url, headers=self.headers, json=json_request) + + + def category_to_string(self, id) -> str: + return { + "FLAT1_KK": "Byt 1+kk", + "FLAT11": "Byt 1+1", + "FLAT2_KK": "Byt 2+kk", + "FLAT21": "Byt 2+1", + "FLAT3_KK": "Byt 3+kk", + "FLAT31": "Byt 3+1", + "FLAT4_KK": "Byt 4+kk", + "FLAT41": "Byt 4+1", + "FLAT5_KK": "Byt 5+kk", + "FLAT51": "Byt 5+1", + "FLAT6_AND_MORE": "Byt 6+kk a v\u011bt\u0161\xed", + "HOUSE_FAMILY": "Rodinn\xfd dům", + "HOUSE_APARTMENT": "\u010cin\u017eovn\xed", + "HOUSE_MANSION": "Vila", + "LAND_COMMERCIAL": "Komer\u010dn\xed", + "LAND_HOUSING": "Bydlen\xed", + "LAND_GARDEN": "Zahrady", + "LAND_AGRICULTURAL": "Zem\u011bd\u011blsk\xfd", + "LAND_MEADOW": "Louka", + "LAND_FOREST": "Les", + "COMMERCIAL_OFFICE": "Kancel\xe1\u0159", + "COMMERCIAL_STORAGE": "Sklad", + "COMMERCIAL_MANUFACTURING": "V\xfdrobn\xed prostor", + "COMMERCIAL_BUSINESS": "Obchod", + "COMMERCIAL_ACCOMMODATION": "Ubytov\xe1n\xed", + "COMMERCIAL_RESTAURANT": "Restaurace", + "COMMERCIAL_AGRICULTURAL": "Zem\u011bd\u011blsk\xfd objekt", + "OTHERS_HUT": "Chata", + "OTHERS_COTTAGE": "Chalupa", + "OTHERS_GARAGE": "Gar\xe1\u017e", + "OTHERS_FARMHOUSE": "Zem\u011bd\u011blsk\xe1 usedlost", + "OTHERS_POND": "Rybn\xedk", + "OTHERS_FLAT": "Atypick\xfd", + "OTHERS_OTHERS": "Pam\xe1tka", + "OTHERS_MONUMENTS": "Ostatn\xed" + }.get(id, "") + + + def get_latest_offers(self) -> list[RentalOffer]: + response = self.build_response().json() + + items: list[RentalOffer] = [] + + for offer in response["data"]["searchOffer"]["items"]: + items.append(RentalOffer( + scraper = self, + link = urljoin(self.base_url, offer["url"]), + title = self.category_to_string(offer["category"]) + ", " + str(offer["area"]["main"]) + " m²", + location = offer["location"]["address"], + price = offer["price"]["total"], + image_url = urljoin(self.base_url, "/static/images/" + (offer["photos"]["main"] or "")) + )) + + return items diff --git a/src/scrapers/scraper_remax.py b/src/scrapers/scraper_remax.py new file mode 100644 index 0000000..73feb54 --- /dev/null +++ b/src/scrapers/scraper_remax.py @@ -0,0 +1,72 @@ +import logging +import re +from urllib.parse import urljoin + +import requests +from bs4 import BeautifulSoup + +from disposition import Disposition +from scrapers.rental_offer import RentalOffer +from scrapers.scraper_base import ScraperBase +from scrapers.rental_offer import RentalOffer +from urllib.parse import urljoin +from bs4 import BeautifulSoup + + +class ScraperRemax(ScraperBase): + + name = "Remax" + logo_url = "https://www.remax-czech.cz/apple-touch-icon.png" + color = 0x003DA5 + base_url = "https://www.remax-czech.cz/reality/vyhledavani/" + + disposition_mapping = { + Disposition.FLAT_1KK: "&types%5B4%5D%5B2%5D=on", + Disposition.FLAT_2KK: "&types%5B4%5D%5B3%5D=on", + Disposition.FLAT_3KK: "&types%5B4%5D%5B4%5D=on", + Disposition.FLAT_4KK: "&types%5B4%5D%5B5%5D=on", + Disposition.FLAT_1: "&types%5B4%5D%5B9%5D=on", + Disposition.FLAT_2: "&types%5B4%5D%5B10%5D=on", + Disposition.FLAT_3: "&types%5B4%5D%5B11%5D=on", + Disposition.FLAT_4: "&types%5B4%5D%5B12%5D=on", + Disposition.FLAT_5_UP: ( + "&types%5B4%5D%5B6%5D=on", # 5kk + "&types%5B4%5D%5B7%5D=on", # 6kk + "&types%5B4%5D%5B8%5D=on", # 7kk + "&types%5B4%5D%5B13%5D=on", # 5+1 + "&types%5B4%5D%5B14%5D=on", # 6+1 + "&types%5B4%5D%5B15%5D=on", # 7+1 + ), + Disposition.FLAT_OTHERS: ( + "&types%5B4%5D%5B16%5D=on", # atyp + "&types%5B4%5D%5B17%5D=on", # jiný + ), + } + + + def build_response(self) -> requests.Response: + url = self.base_url + "?regions%5B116%5D%5B3702%5D=on&sale=2" + url += "".join(self.get_dispositions_data()) + url += "&order_by_published_date=0" + + logging.debug("Remax request: %s", url) + + return requests.get(url, headers=self.headers) + + def get_latest_offers(self) -> list[RentalOffer]: + response = self.build_response() + soup = BeautifulSoup(response.text, 'html.parser') + + items: list[RentalOffer] = [] + + for item in soup.select("#list .container-fluid .pl-items .pl-items__item"): + items.append(RentalOffer( + scraper = self, + link = urljoin(self.base_url, item.get('data-url')), + title = item.get("data-title"), + location = re.sub(r"\s+", " ", item.get("data-display-address")), + price = int(re.sub(r"[^\d]", "", item.get("data-price")) or "0"), + image_url = item.get("data-img") + )) + + return items diff --git a/src/scrapers/scraper_sreality.py b/src/scrapers/scraper_sreality.py new file mode 100644 index 0000000..8fff886 --- /dev/null +++ b/src/scrapers/scraper_sreality.py @@ -0,0 +1,138 @@ +import logging +from time import time +from urllib.parse import urljoin + +import requests + +from disposition import Disposition +from scrapers.rental_offer import RentalOffer +from scrapers.scraper_base import ScraperBase +from scrapers.rental_offer import RentalOffer +from time import time +import requests +from urllib.parse import urljoin + + +class ScraperSreality(ScraperBase): + + name = "Sreality" + logo_url = "https://www.sreality.cz/img/icons/android-chrome-192x192.png" + color = 0xCC0000 + base_url = "https://www.sreality.cz" + + disposition_mapping = { + Disposition.FLAT_1KK: "2", + Disposition.FLAT_1: "3", + Disposition.FLAT_2KK: "4", + Disposition.FLAT_2: "5", + Disposition.FLAT_3KK: "6", + Disposition.FLAT_3: "7", + Disposition.FLAT_4KK: "8", + Disposition.FLAT_4: "9", + Disposition.FLAT_5_UP: ("10", "11", "12"), + Disposition.FLAT_OTHERS: "16", + } + + _category_type_to_url = { + 0: "vse", + 1: "prodej", + 2: "pronajem", + 3: "drazby" + } + + _category_main_to_url = { + 0: "vse", + 1: "byt", + 2: "dum", + 3: "pozemek", + 4: "komercni", + 5: "ostatni" + } + + _category_sub_to_url = { + 2: "1+kk", + 3: "1+1", + 4: "2+kk", + 5: "2+1", + 6: "3+kk", + 7: "3+1", + 8: "4+kk", + 9: "4+1", + 10: "5+kk", + 11: "5+1", + 12: "6-a-vice", + 16: "atypicky", + 47: "pokoj", + 37: "rodinny", + 39: "vila", + 43: "chalupa", + 33: "chata", + 35: "pamatka", + 40: "na-klic", + 44: "zemedelska-usedlost", + 19: "bydleni", + 18: "komercni", + 20: "pole", + 22: "louka", + 21: "les", + 46: "rybnik", + 48: "sady-vinice", + 23: "zahrada", + 24: "ostatni-pozemky", + 25: "kancelare", + 26: "sklad", + 27: "vyrobni-prostor", + 28: "obchodni-prostor", + 29: "ubytovani", + 30: "restaurace", + 31: "zemedelsky", + 38: "cinzovni-dum", + 49: "virtualni-kancelar", + 32: "ostatni-komercni-prostory", + 34: "garaz", + 52: "garazove-stani", + 50: "vinny-sklep", + 51: "pudni-prostor", + 53: "mobilni-domek", + 36: "jine-nemovitosti" + } + + + def _create_link_to_offer(self, offer) -> str: + return urljoin(self.base_url, "/detail" + + "/" + self._category_type_to_url[offer["seo"]["category_type_cb"]] + + "/" + self._category_main_to_url[offer["seo"]["category_main_cb"]] + + "/" + self._category_sub_to_url[offer["seo"]["category_sub_cb"]] + + "/" + offer["seo"]["locality"] + + "/" + str(offer["hash_id"])) + + def build_response(self) -> requests.Response: + url = self.base_url + "/api/cs/v2/estates?category_main_cb=1&category_sub_cb=" + url += "|".join(self.get_dispositions_data()) + url += "&category_type_cb=2&locality_district_id=72&locality_region_id=14&per_page=20" + url += "&tms=" + str(int(time())) + + logging.debug("Sreality request: %s", url) + + return requests.get(url, headers=self.headers) + + def get_latest_offers(self) -> list[RentalOffer]: + response = self.build_response().json() + + items: list[RentalOffer] = [] + + for item in response["_embedded"]["estates"]: + # Ignorovat "tip" nabídky, které úplně neodpovídají filtrům a mění se s každým vyhledáváním + if item["region_tip"] > 0: + continue + + items.append(RentalOffer( + scraper = self, + link = self._create_link_to_offer(item), + title = item["name"], + location = item["locality"], + price = item["price_czk"]["value_raw"], + image_url = item["_links"]["image_middle2"][0]["href"] + )) + + return items diff --git a/src/scrapers/scraper_ulov_domov.py b/src/scrapers/scraper_ulov_domov.py new file mode 100644 index 0000000..887ddba --- /dev/null +++ b/src/scrapers/scraper_ulov_domov.py @@ -0,0 +1,111 @@ +import json +import logging + +import requests + +from disposition import Disposition +from scrapers.rental_offer import RentalOffer +from scrapers.scraper_base import ScraperBase +from scrapers.rental_offer import RentalOffer +import requests + + +class ScraperUlovDomov(ScraperBase): + + name = "UlovDomov" + logo_url = "https://www.ulovdomov.cz/favicon.png" + color = 0xFFFFFF + base_url = "https://www.ulovdomov.cz/fe-api/find/seperated-offers-within-bounds" + + disposition_mapping = { + Disposition.FLAT_1KK: 2, + Disposition.FLAT_1: 3, + Disposition.FLAT_2KK: 4, + Disposition.FLAT_2: 5, + Disposition.FLAT_3KK: 6, + Disposition.FLAT_3: 7, + Disposition.FLAT_4KK: 8, + Disposition.FLAT_4: 9, + Disposition.FLAT_5_UP: (10, 11, 12, 13, 14, 15), # 5kk, 5+1, 6kk, 6+1, 7kk, 7+1 + Disposition.FLAT_OTHERS: 16, + } + + + def disposition_id_to_string(self, id) -> str: + return { + 1: "garsonky", + 2: "1+kk", + 3: "1+1", + 4: "2+kk", + 5: "2+1", + 6: "3+kk", + 7: "3+1", + 8: "4+kk", + 9: "4+1", + 10: "5+kk", + 11: "5+1", + 12: "6+kk", + 13: "6+1", + 14: "7+kk", + 15: "7+1", + 16: "atypický", + 29: "domu", + 24: "spolubydlení (1 lůžkový)", + 25: "spolubydlení (2 lůžkový)", + 26: "spolubydlení (3 lůžkový)", + 27: "spolubydlení (4+ lůžkový)", + 28: "spolubydlení (samostatný pokoj)", + "shared_room": "spolubydlení", + "5_and_more": "5 a více" + }.get(id, "") + + def build_response(self) -> requests.Response: + json_request = { + "acreage_from": "", + "acreage_to": "", + "added_before": "", + "banner_panel_width_type": 480, + "bounds": { + "north_east": { + "lat": 49.294485, + "lng": 16.727853 + }, + "south_west": { + "lat": 49.109655, + "lng": 16.428068 + } + }, + "conveniences": [], + "dispositions": self.get_dispositions_data(), + "furnishing": [], + "is_price_commision_free": None, + "limit": 20, + "offer_type_id": None, + "page": 1, + "price_from": "", + "price_to": "", + "query": "", + "sort_by": "date:desc", + "sticker": None + } + + logging.debug("UlovDomov request: %s", json.dumps(json_request)) + + return requests.post(self.base_url, headers=self.headers, json=json_request) + + def get_latest_offers(self) -> list[RentalOffer]: + response = self.build_response().json() + + items: list[RentalOffer] = [] + for offer in response["offers"]: + items.append(RentalOffer( + scraper = self, + link = offer["absolute_url"], + # TODO "Pronájem" podle ID? + title = "Pronájem " + self.disposition_id_to_string(offer["disposition_id"]) + " " + str(offer["acreage"]) + " m²", + location = offer["street"]["label"] + ", " + offer["village"]["label"] + " - " + offer["village_part"]["label"], + price = offer["price_rental"], + image_url = offer["photos"][0]["path"] + )) + + return items diff --git a/src/scrapers_manager.py b/src/scrapers_manager.py new file mode 100644 index 0000000..cd58653 --- /dev/null +++ b/src/scrapers_manager.py @@ -0,0 +1,99 @@ +import logging +import traceback + +from config import * +from disposition import Disposition +from scrapers.rental_offer import RentalOffer +from scrapers.scraper_base import ScraperBase +from scrapers.scraper_bravis import ScraperBravis +from scrapers.scraper_euro_bydleni import ScraperEuroBydleni +from scrapers.scraper_idnes_reality import ScraperIdnesReality +from scrapers.scraper_realcity import ScraperRealcity +from scrapers.scraper_realingo import ScraperRealingo +from scrapers.scraper_remax import ScraperRemax +from scrapers.scraper_sreality import ScraperSreality +from scrapers.scraper_ulov_domov import ScraperUlovDomov +from scrapers.scraper_bezrealitky import ScraperBezrealitky + + +def create_scrapers(dispositions: Disposition) -> list[ScraperBase]: + return [ + ScraperBravis(dispositions, None), + ScraperEuroBydleni(dispositions, None), + ScraperIdnesReality(dispositions, None), + ScraperRealcity(dispositions, None), + ScraperRealingo(dispositions, None), + ScraperRemax(dispositions, None), + ScraperSreality(dispositions, None), + ScraperUlovDomov(dispositions, None), + ScraperBezrealitky(dispositions, None), + ] + +def create_scrapers_1kk() -> list[ScraperBase]: + return [ + ScraperBravis(Disposition.FLAT_1KK, config.discord.offers_channel_1kk), + ScraperEuroBydleni(Disposition.FLAT_1KK, config.discord.offers_channel_1kk), + ScraperIdnesReality(Disposition.FLAT_1KK, config.discord.offers_channel_1kk), + ScraperRealcity(Disposition.FLAT_1KK, config.discord.offers_channel_1kk), + ScraperRealingo(Disposition.FLAT_1KK, config.discord.offers_channel_1kk), + ScraperRemax(Disposition.FLAT_1KK, config.discord.offers_channel_1kk), + ScraperSreality(Disposition.FLAT_1KK, config.discord.offers_channel_1kk), + ScraperUlovDomov(Disposition.FLAT_1KK, config.discord.offers_channel_1kk), + ScraperBezrealitky(Disposition.FLAT_1KK, config.discord.offers_channel_1kk), + ] + +def create_scrapers_1_1() -> list[ScraperBase]: + return [ + ScraperBravis(Disposition.FLAT_1, config.discord.offers_channel_1), + ScraperEuroBydleni(Disposition.FLAT_1, config.discord.offers_channel_1), + ScraperIdnesReality(Disposition.FLAT_1, config.discord.offers_channel_1), + ScraperRealcity(Disposition.FLAT_1, config.discord.offers_channel_1), + ScraperRealingo(Disposition.FLAT_1, config.discord.offers_channel_1), + ScraperRemax(Disposition.FLAT_1, config.discord.offers_channel_1), + ScraperSreality(Disposition.FLAT_1, config.discord.offers_channel_1), + ScraperUlovDomov(Disposition.FLAT_1, config.discord.offers_channel_1), + ScraperBezrealitky(Disposition.FLAT_1, config.discord.offers_channel_1), + ] + +def create_scrapers_2kk() -> list[ScraperBase]: + return [ + ScraperBravis(Disposition.FLAT_2KK, config.discord.offers_channel_2kk), + ScraperEuroBydleni(Disposition.FLAT_2KK, config.discord.offers_channel_2kk), + ScraperIdnesReality(Disposition.FLAT_2KK, config.discord.offers_channel_2kk), + ScraperRealcity(Disposition.FLAT_2KK, config.discord.offers_channel_2kk), + ScraperRealingo(Disposition.FLAT_2KK, config.discord.offers_channel_2kk), + ScraperRemax(Disposition.FLAT_2KK, config.discord.offers_channel_2kk), + ScraperSreality(Disposition.FLAT_2KK, config.discord.offers_channel_2kk), + ScraperUlovDomov(Disposition.FLAT_2KK, config.discord.offers_channel_2kk), + ScraperBezrealitky(Disposition.FLAT_2KK, config.discord.offers_channel_2kk), + ] + +def create_scrapers_2_2() -> list[ScraperBase]: + return [ + ScraperBravis(Disposition.FLAT_2, config.discord.offers_channel_2), + ScraperEuroBydleni(Disposition.FLAT_2, config.discord.offers_channel_2), + ScraperIdnesReality(Disposition.FLAT_2, config.discord.offers_channel_2), + ScraperRealcity(Disposition.FLAT_2, config.discord.offers_channel_2), + ScraperRealingo(Disposition.FLAT_2, config.discord.offers_channel_2), + ScraperRemax(Disposition.FLAT_2, config.discord.offers_channel_2), + ScraperSreality(Disposition.FLAT_2, config.discord.offers_channel_2), + ScraperUlovDomov(Disposition.FLAT_2, config.discord.offers_channel_2), + ScraperBezrealitky(Disposition.FLAT_2, config.discord.offers_channel_2), + ] + +def fetch_latest_offers(scrapers: list[ScraperBase]) -> list[(object, RentalOffer)]: + """Získá všechny nejnovější nabídky z dostupných serverů + + Returns: + list[RentalOffer]: Seznam nabídek + """ + + offers: list[(object, RentalOffer)] = [] + for scraper in scrapers: + try: + for offer in scraper.get_latest_offers(): + offers.append((scraper.channel, offer)) + except Exception: + logging.error(traceback.format_exc()) + + return offers diff --git a/src/utils.py b/src/utils.py new file mode 100644 index 0000000..17d090f --- /dev/null +++ b/src/utils.py @@ -0,0 +1,10 @@ +from typing import Iterable + +def flatten(xs): + """https://stackoverflow.com/a/2158532 + """ + for x in xs: + if isinstance(x, Iterable) and not isinstance(x, (str, bytes)): + yield from flatten(x) + else: + yield x