upload
This commit is contained in:
18
.editorconfig
Normal file
18
.editorconfig
Normal file
@@ -0,0 +1,18 @@
|
|||||||
|
# EditorConfig is awesome: https://EditorConfig.org
|
||||||
|
|
||||||
|
# top-most EditorConfig file
|
||||||
|
root = true
|
||||||
|
|
||||||
|
[*]
|
||||||
|
indent_style = space
|
||||||
|
indent_size = 4
|
||||||
|
end_of_line = lf
|
||||||
|
charset = utf-8
|
||||||
|
trim_trailing_whitespace = true
|
||||||
|
insert_final_newline = true
|
||||||
|
|
||||||
|
[*.yml]
|
||||||
|
indent_size = 2
|
||||||
|
|
||||||
|
[Makefile]
|
||||||
|
indent_style = tab
|
||||||
8
.env
Normal file
8
.env
Normal file
@@ -0,0 +1,8 @@
|
|||||||
|
DEBUG=0
|
||||||
|
FOUND_OFFERS_FILE=found_offers.txt
|
||||||
|
REFRESH_INTERVAL_DAYTIME_MINUTES=30
|
||||||
|
REFRESH_INTERVAL_NIGHTTIME_MINUTES=90
|
||||||
|
DISCORD_TOKEN=CREATE_ENV_LOCAL_AND_SET_TOKEN
|
||||||
|
DISCORD_OFFERS_CHANNEL=1067564052022300672
|
||||||
|
DISCORD_DEV_CHANNEL=954732715377311825
|
||||||
|
DISPOSITIONS=3+kk,3+1
|
||||||
1
.env.docker
Normal file
1
.env.docker
Normal file
@@ -0,0 +1 @@
|
|||||||
|
FOUND_OFFERS_FILE=/data/found_offers.txt
|
||||||
26
.github/workflows/dockerhub.yml
vendored
Normal file
26
.github/workflows/dockerhub.yml
vendored
Normal file
@@ -0,0 +1,26 @@
|
|||||||
|
name: Build and publish image on Docker Hub
|
||||||
|
|
||||||
|
on:
|
||||||
|
push:
|
||||||
|
branches: [master]
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
build_docker_image:
|
||||||
|
name: Build and push Docker image to Docker Hub
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
steps:
|
||||||
|
- name: Check out the repo
|
||||||
|
uses: actions/checkout@v2
|
||||||
|
|
||||||
|
- name: Log in to Docker Hub
|
||||||
|
uses: docker/login-action@v1
|
||||||
|
with:
|
||||||
|
username: ${{ secrets.DOCKER_USERNAME }}
|
||||||
|
password: ${{ secrets.DOCKER_PASSWORD }}
|
||||||
|
|
||||||
|
- name: Build and push Docker image
|
||||||
|
uses: docker/build-push-action@v2
|
||||||
|
with:
|
||||||
|
context: .
|
||||||
|
push: true
|
||||||
|
tags: janch32/web-scraper-nabidek-pronajmu:latest
|
||||||
5
.gitignore
vendored
Normal file
5
.gitignore
vendored
Normal file
@@ -0,0 +1,5 @@
|
|||||||
|
**/__pycache__
|
||||||
|
remembered_offers.txt
|
||||||
|
found_offers.txt
|
||||||
|
data/
|
||||||
|
.env.local
|
||||||
18
.vscode/launch.json
vendored
Normal file
18
.vscode/launch.json
vendored
Normal file
@@ -0,0 +1,18 @@
|
|||||||
|
{
|
||||||
|
// Use IntelliSense to learn about possible attributes.
|
||||||
|
// Hover to view descriptions of existing attributes.
|
||||||
|
// For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
|
||||||
|
"version": "0.2.0",
|
||||||
|
"configurations": [
|
||||||
|
{
|
||||||
|
"name": "Launch project",
|
||||||
|
"type": "python",
|
||||||
|
"request": "launch",
|
||||||
|
"program": "${workspaceFolder}/src/main.py",
|
||||||
|
"console": "integratedTerminal",
|
||||||
|
"env": {
|
||||||
|
"DEBUG": "1"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
7
.vscode/settings.json
vendored
Normal file
7
.vscode/settings.json
vendored
Normal file
@@ -0,0 +1,7 @@
|
|||||||
|
{
|
||||||
|
"cSpell.words": [
|
||||||
|
"Idnes",
|
||||||
|
"Realingo",
|
||||||
|
"sreality"
|
||||||
|
]
|
||||||
|
}
|
||||||
17
Dockerfile
Normal file
17
Dockerfile
Normal file
@@ -0,0 +1,17 @@
|
|||||||
|
FROM python:3-alpine
|
||||||
|
|
||||||
|
WORKDIR /app
|
||||||
|
VOLUME ["/data"]
|
||||||
|
|
||||||
|
COPY requirements.txt requirements.txt
|
||||||
|
RUN apk add --no-cache --virtual .build-deps gcc musl-dev && \
|
||||||
|
pip3 install -r requirements.txt && \
|
||||||
|
apk del .build-deps
|
||||||
|
|
||||||
|
ENV APP_ENV=docker
|
||||||
|
|
||||||
|
COPY .env* .
|
||||||
|
COPY src .
|
||||||
|
COPY graphql ./graphql
|
||||||
|
|
||||||
|
CMD ["python3", "main.py"]
|
||||||
11
Makefile
Normal file
11
Makefile
Normal file
@@ -0,0 +1,11 @@
|
|||||||
|
.PHONY: install debug
|
||||||
|
|
||||||
|
install:
|
||||||
|
python3 -m pip install -r requirements.txt
|
||||||
|
|
||||||
|
run:
|
||||||
|
python3 src/main.py
|
||||||
|
|
||||||
|
debug:
|
||||||
|
DEBUG=1
|
||||||
|
python3 src/main.py
|
||||||
58
README.md
Normal file
58
README.md
Normal file
@@ -0,0 +1,58 @@
|
|||||||
|
# Web Scraper Nabídek Pronájmu
|
||||||
|
Hlídá nové nabídky na populárních realitních serverech.
|
||||||
|
|
||||||
|
[**Docker image (aktuální z master větvě) - `janch32/web-scraper-nabidek-pronajmu`**](https://hub.docker.com/r/janch32/web-scraper-nabidek-pronajmu)
|
||||||
|
|
||||||
|
*Tato aplikace byla vytvořena pro osobní použití, takže obsahuje hardkódované údaje pro hledání pronájmu bytů v Brně (ale nemělo by být zas tak moc těžký to upravit).*
|
||||||
|
|
||||||
|
Nicméně je možné při spuštění aplikace nakonfigurovat, které **dispozice bytu** (počet místností) hledat.
|
||||||
|
|
||||||
|
## Podporované realitní servery
|
||||||
|
- BRAVIS
|
||||||
|
- EuroBydlení
|
||||||
|
- iDNES Reality
|
||||||
|
- REALCITY
|
||||||
|
- realingo
|
||||||
|
- Remax
|
||||||
|
- Sreality
|
||||||
|
- UlovDomov
|
||||||
|
- BezRealitky
|
||||||
|
|
||||||
|
## Spuštění
|
||||||
|
- Lze spustit lokálně nebo v Dockeru
|
||||||
|
- **Lokální spuštění**
|
||||||
|
- Je vyžadován **Python 3.11+**
|
||||||
|
- Před prvním spuštěním nainstalujte závislosti `make install`
|
||||||
|
- Vytvořte si lokální soubor `.env.local` a nastavte v něm všechny požadované parametry (minimálně však Discord token, cílovou roomku a požadované dispozice bytu)
|
||||||
|
- následně je možné spustit `make run` nebo v debug režimu `make debug`
|
||||||
|
- **Spuštění v Dockeru**
|
||||||
|
- Přiložená Docker Compose konfigurace souží pro vývoj. Stačí ji spustit příkazem `docker-compose up -d` (má zapnutý debug mód)
|
||||||
|
- K dispozici je také sestavený Docker obraz v Ducker Hub, vždy aktuální s master větví - [`janch32/web-scraper-nabidek-pronajmu`](https://hub.docker.com/r/janch32/web-scraper-nabidek-pronajmu)
|
||||||
|
- Kromě toho je možné vytvořit "produkční" Docker image díky `Dockerfile`. Při spuštění kontejneru je nutné nastavit všechny požadované env proměnné (ne v v .env.local!)
|
||||||
|
|
||||||
|
Aplikace při prvním spuštění nevypíše žádné nabídky, pouze si stáhne seznam těch aktuálních. Poté každých 30 mint (nastavitelné přes env proměnné) kontroluje nové nabídky na realitních serverech a ty přeposílá do Discord kanálu. Aplikace nemusí běžet pořád, po opětovném spuštění pošle všechny nové nabídky od posledního spuštění.
|
||||||
|
|
||||||
|
## Konfigurace přes Env proměnné
|
||||||
|
- `DISCORD_OFFERS_CHANNEL` - Unikátní číslo Discord kanálu, kde se budou posílat nabídky. [Návod pro získání ID](https://support.discord.com/hc/en-us/articles/206346498-Where-can-I-find-my-User-Server-Message-ID-)
|
||||||
|
- `DISCORD_DEV_CHANNEL` - Unikátní číslo Discord kanálu, kde se budou posílat chyby programu.
|
||||||
|
- `DISCORD_TOKEN` - Obsahuje Discord token bota. [Návod pro získání tokenu](https://discordgsm.com/guide/how-to-get-a-discord-bot-token)
|
||||||
|
- `DISPOSITIONS` - Obsahuje seznam dispozic oddělených čárkou. Např.: `DISPOSITIONS=2+kk,2+1,others`
|
||||||
|
|
||||||
|
### Seznam dostupných hodnot parametru `DISPOSITIONS`
|
||||||
|
- `1+kk`
|
||||||
|
- `1+1`
|
||||||
|
- `2+kk`
|
||||||
|
- `2+1`
|
||||||
|
- `3+kk`
|
||||||
|
- `3+1`
|
||||||
|
- `4+kk`
|
||||||
|
- `4+1`
|
||||||
|
- `5++` (5+kk a více místností)
|
||||||
|
- `others` (jiné, atypické nebo neznámé velikosti)
|
||||||
|
|
||||||
|
### Další konfigurovatelné Env proměnné
|
||||||
|
Tyto hodnoty jsou nastavené pro bězné použití a není potřeba ji měnit. Zde je každopádně popis těchto hodnot.
|
||||||
|
- `DEBUG` (boolean, výchozí vypnuto). Aktivuje režim ladění aplikace, především podrobnějšího výpisu do konzole. Vhodné pro vývoj.
|
||||||
|
- `FOUND_OFFERS_FILE` Cesta k souboru, kam se ukládají dříve nalezené nabídky. Aplikace si soubor vytvoří, ale složka musí existovat. Pokud aplikace nebyla nějakou dobu spuštěna (řádově týdny) je dobré tento soubor smazat - aplikace by toto vyhodnotila jako velké množství nových nabídek a zaspamovala by Discord kanál.
|
||||||
|
- `REFRESH_INTERVAL_DAYTIME_MINUTES` - interval po který se mají stáhnout nejnovější nabídky Výchozí 30min, doporučeno minimálně 10min
|
||||||
|
- `REFRESH_INTERVAL_NIGHTTIME_MINUTES` - noční interval stahování nabídek. Jde o čas mezi 22h-6h. Výchozí 90min, doporučeno vyšší než denní interval
|
||||||
9
docker-compose.yml
Normal file
9
docker-compose.yml
Normal file
@@ -0,0 +1,9 @@
|
|||||||
|
version: '2.4'
|
||||||
|
|
||||||
|
services:
|
||||||
|
web-scraper:
|
||||||
|
build: .
|
||||||
|
environment:
|
||||||
|
- DEBUG=1
|
||||||
|
volumes:
|
||||||
|
- ./data:/data
|
||||||
14
graphql/bezrealitky.json
Normal file
14
graphql/bezrealitky.json
Normal file
File diff suppressed because one or more lines are too long
5
requirements.txt
Normal file
5
requirements.txt
Normal file
@@ -0,0 +1,5 @@
|
|||||||
|
requests
|
||||||
|
beautifulsoup4
|
||||||
|
python-dotenv
|
||||||
|
discord.py
|
||||||
|
environ-config
|
||||||
55
src/config.py
Normal file
55
src/config.py
Normal file
@@ -0,0 +1,55 @@
|
|||||||
|
import functools
|
||||||
|
import operator
|
||||||
|
import os
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
import environ
|
||||||
|
from dotenv import load_dotenv
|
||||||
|
|
||||||
|
from disposition import Disposition
|
||||||
|
|
||||||
|
load_dotenv(".env")
|
||||||
|
|
||||||
|
app_env = os.getenv("APP_ENV")
|
||||||
|
if app_env:
|
||||||
|
load_dotenv(".env." + app_env, override=True)
|
||||||
|
|
||||||
|
load_dotenv(".env.local", override=True)
|
||||||
|
|
||||||
|
_str_to_disposition_map = {
|
||||||
|
"1+kk": Disposition.FLAT_1KK,
|
||||||
|
"1+1": Disposition.FLAT_1,
|
||||||
|
"2+kk": Disposition.FLAT_2KK,
|
||||||
|
"2+1": Disposition.FLAT_2,
|
||||||
|
"3+kk": Disposition.FLAT_3KK,
|
||||||
|
"3+1": Disposition.FLAT_3,
|
||||||
|
"4+kk": Disposition.FLAT_4KK,
|
||||||
|
"4+1": Disposition.FLAT_4,
|
||||||
|
"5++": Disposition.FLAT_5_UP,
|
||||||
|
"others": Disposition.FLAT_OTHERS
|
||||||
|
}
|
||||||
|
|
||||||
|
def dispositions_converter(raw_disps: str):
|
||||||
|
return functools.reduce(operator.or_, map(lambda d: _str_to_disposition_map[d], raw_disps.split(",")), Disposition.NONE)
|
||||||
|
|
||||||
|
|
||||||
|
@environ.config(prefix="")
|
||||||
|
class Config:
|
||||||
|
debug: bool = environ.bool_var()
|
||||||
|
found_offers_file: Path = environ.var(converter=Path)
|
||||||
|
refresh_interval_daytime_minutes: int = environ.var(converter=int)
|
||||||
|
refresh_interval_nighttime_minutes: int = environ.var(converter=int)
|
||||||
|
dispositions: Disposition = environ.var(converter=dispositions_converter)
|
||||||
|
|
||||||
|
@environ.config()
|
||||||
|
class Discord:
|
||||||
|
token = environ.var()
|
||||||
|
offers_channel_1kk = environ.var(converter=int)
|
||||||
|
offers_channel_1 = environ.var(converter=int)
|
||||||
|
offers_channel_2kk = environ.var(converter=int)
|
||||||
|
offers_channel_2 = environ.var(converter=int)
|
||||||
|
dev_channel = environ.var(converter=int)
|
||||||
|
|
||||||
|
discord: Discord = environ.group(Discord)
|
||||||
|
|
||||||
|
config: Config = Config.from_environ()
|
||||||
13
src/discord_logger.py
Normal file
13
src/discord_logger.py
Normal file
@@ -0,0 +1,13 @@
|
|||||||
|
import logging
|
||||||
|
|
||||||
|
|
||||||
|
class DiscordLogger(logging.Handler):
|
||||||
|
def __init__(self, client, channel, level) -> None:
|
||||||
|
super().__init__(level)
|
||||||
|
self.client = client
|
||||||
|
self.channel = channel
|
||||||
|
|
||||||
|
def emit(self, record: logging.LogRecord):
|
||||||
|
message = "**{}**\n```\n{}\n```".format(record.levelname, record.getMessage())
|
||||||
|
|
||||||
|
self.client.loop.create_task(self.channel.send(message))
|
||||||
15
src/disposition.py
Normal file
15
src/disposition.py
Normal file
@@ -0,0 +1,15 @@
|
|||||||
|
from enum import Flag, auto
|
||||||
|
|
||||||
|
|
||||||
|
class Disposition(Flag):
|
||||||
|
NONE = 0
|
||||||
|
FLAT_1KK = auto() # 1kk
|
||||||
|
FLAT_1 = auto() # 1+1
|
||||||
|
FLAT_2KK = auto() # 2kk
|
||||||
|
FLAT_2 = auto() # 2+1
|
||||||
|
FLAT_3KK = auto() # 3kk
|
||||||
|
FLAT_3 = auto() # 3+1
|
||||||
|
FLAT_4KK = auto() # 4kk
|
||||||
|
FLAT_4 = auto() # 4+1
|
||||||
|
FLAT_5_UP = auto() # 5+
|
||||||
|
FLAT_OTHERS = auto() # others
|
||||||
115
src/main.py
Normal file
115
src/main.py
Normal file
@@ -0,0 +1,115 @@
|
|||||||
|
#!/usr/bin/evn python3
|
||||||
|
import logging
|
||||||
|
from datetime import datetime
|
||||||
|
from time import time
|
||||||
|
|
||||||
|
import discord
|
||||||
|
from discord.ext import tasks
|
||||||
|
|
||||||
|
from config import *
|
||||||
|
from discord_logger import DiscordLogger
|
||||||
|
from offers_storage import OffersStorage
|
||||||
|
from scrapers.rental_offer import RentalOffer
|
||||||
|
from scrapers_manager import create_scrapers, create_scrapers_1_1, create_scrapers_1kk, create_scrapers_2_2, create_scrapers_2kk, fetch_latest_offers
|
||||||
|
|
||||||
|
|
||||||
|
def get_current_daytime() -> bool: return datetime.now().hour in range(6, 22)
|
||||||
|
|
||||||
|
|
||||||
|
client = discord.Client(intents=discord.Intents.default(), heartbeat_timeout=30)
|
||||||
|
daytime = get_current_daytime()
|
||||||
|
interval_time = config.refresh_interval_daytime_minutes if daytime else config.refresh_interval_nighttime_minutes
|
||||||
|
|
||||||
|
|
||||||
|
#scrapers = create_scrapers(config.dispositions)
|
||||||
|
|
||||||
|
scrapers = []
|
||||||
|
scrapers += create_scrapers_1kk()
|
||||||
|
scrapers += create_scrapers_1_1()
|
||||||
|
scrapers += create_scrapers_2kk()
|
||||||
|
scrapers += create_scrapers_2_2()
|
||||||
|
|
||||||
|
@client.event
|
||||||
|
async def on_ready():
|
||||||
|
global storage
|
||||||
|
|
||||||
|
dev_channel = client.get_channel(config.discord.dev_channel)
|
||||||
|
##channel = client.get_channel(config.discord.offers_channel)
|
||||||
|
storage = OffersStorage(config.found_offers_file)
|
||||||
|
|
||||||
|
if not config.debug:
|
||||||
|
discord_error_logger = DiscordLogger(client, dev_channel, logging.ERROR)
|
||||||
|
logging.getLogger().addHandler(discord_error_logger)
|
||||||
|
else:
|
||||||
|
logging.info("Discord logger is inactive in debug mode")
|
||||||
|
|
||||||
|
logging.info("Available scrapers: " + ", ".join(dict.fromkeys(s.name for s in scrapers)))
|
||||||
|
|
||||||
|
logging.info("Fetching latest offers every {} minutes".format(interval_time))
|
||||||
|
|
||||||
|
process_latest_offers.start()
|
||||||
|
|
||||||
|
|
||||||
|
async def send_offer(offer, channel):
|
||||||
|
embed = discord.Embed(
|
||||||
|
title=offer.title,
|
||||||
|
url=offer.link,
|
||||||
|
description=offer.location,
|
||||||
|
timestamp=datetime.utcnow(),
|
||||||
|
color=offer.scraper.color
|
||||||
|
)
|
||||||
|
|
||||||
|
embed.add_field(name="Cena", value=str(offer.price) + " Kč")
|
||||||
|
embed.set_author(name=offer.scraper.name, icon_url=offer.scraper.logo_url)
|
||||||
|
embed.set_image(url=offer.image_url)
|
||||||
|
|
||||||
|
await client.get_channel(channel).send(embed=embed)
|
||||||
|
|
||||||
|
|
||||||
|
@tasks.loop(minutes=interval_time)
|
||||||
|
async def process_latest_offers():
|
||||||
|
logging.info("Fetching offers")
|
||||||
|
|
||||||
|
new_offers: list[RentalOffer] = []
|
||||||
|
channel_offers = []
|
||||||
|
for xchannel, offer in fetch_latest_offers(scrapers):
|
||||||
|
if not storage.contains(offer):
|
||||||
|
new_offers.append(offer)
|
||||||
|
channel_offers.append((xchannel, offer))
|
||||||
|
|
||||||
|
first_time = storage.first_time
|
||||||
|
storage.save_offers(new_offers)
|
||||||
|
|
||||||
|
logging.info("Offers fetched (new: {})".format(len(new_offers)))
|
||||||
|
|
||||||
|
if not first_time:
|
||||||
|
for xchannel, offer in channel_offers:
|
||||||
|
try:
|
||||||
|
await send_offer(offer, xchannel)
|
||||||
|
except:
|
||||||
|
logging.info(f"send offer exp: {offer.link}")
|
||||||
|
else:
|
||||||
|
logging.info("No previous offers, first fetch is running silently")
|
||||||
|
|
||||||
|
global daytime, interval_time
|
||||||
|
if daytime != get_current_daytime(): # Pokud stary daytime neodpovida novemu
|
||||||
|
|
||||||
|
daytime = not daytime # Zneguj daytime (podle podminky se zmenil)
|
||||||
|
|
||||||
|
interval_time = config.refresh_interval_daytime_minutes if daytime else config.refresh_interval_nighttime_minutes
|
||||||
|
|
||||||
|
logging.info("Fetching latest offers every {} minutes".format(interval_time))
|
||||||
|
process_latest_offers.change_interval(minutes=interval_time)
|
||||||
|
|
||||||
|
#await channel.edit(topic="Last update {}".format("<t:{}:R>".format(int(time()))))
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
logging.basicConfig(
|
||||||
|
level=(logging.DEBUG if config.debug else logging.INFO),
|
||||||
|
format='%(asctime)s - [%(levelname)s] %(message)s',
|
||||||
|
datefmt='%Y-%m-%d %H:%M:%S')
|
||||||
|
|
||||||
|
logging.debug("Running in debug mode")
|
||||||
|
|
||||||
|
client.run(config.discord.token, log_level=logging.INFO)
|
||||||
50
src/offers_storage.py
Normal file
50
src/offers_storage.py
Normal file
@@ -0,0 +1,50 @@
|
|||||||
|
import os
|
||||||
|
|
||||||
|
from scrapers.rental_offer import RentalOffer
|
||||||
|
|
||||||
|
|
||||||
|
class OffersStorage:
|
||||||
|
"""Úložiště dříve nalezených nabídek"""
|
||||||
|
|
||||||
|
def __init__(self, path: str):
|
||||||
|
self.path = path
|
||||||
|
"""Cesta k uloženým odkazům"""
|
||||||
|
|
||||||
|
self.first_time = False
|
||||||
|
"""Neproběhl pokus o uložení nabídek (soubor neexistuje)"""
|
||||||
|
|
||||||
|
self._links: set[str] = set()
|
||||||
|
"""Seznam URL odkazů na všechny nalezené nabídky"""
|
||||||
|
|
||||||
|
try:
|
||||||
|
with open(self.path) as file:
|
||||||
|
for line in file:
|
||||||
|
self._links.add(line.strip())
|
||||||
|
except FileNotFoundError:
|
||||||
|
self.first_time = True
|
||||||
|
|
||||||
|
|
||||||
|
def contains(self, offer: RentalOffer) -> bool:
|
||||||
|
"""Objevila se nabídka již dříve?
|
||||||
|
|
||||||
|
Args:
|
||||||
|
offer (RentalOffer): Nabídka
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
bool: Jde o starou nabídku
|
||||||
|
"""
|
||||||
|
return offer.link in self._links
|
||||||
|
|
||||||
|
|
||||||
|
def save_offers(self, offers: list[RentalOffer]):
|
||||||
|
"""Uložit nabídky jako nalezené
|
||||||
|
|
||||||
|
Args:
|
||||||
|
offers (list[RentalOffer]): Nalezené nabídky
|
||||||
|
"""
|
||||||
|
with open(self.path, 'a+') as file_object:
|
||||||
|
for offer in offers:
|
||||||
|
self._links.add(offer.link)
|
||||||
|
file_object.write(offer.link + os.linesep)
|
||||||
|
|
||||||
|
self.first_time = False
|
||||||
26
src/scrapers/rental_offer.py
Normal file
26
src/scrapers/rental_offer.py
Normal file
@@ -0,0 +1,26 @@
|
|||||||
|
from dataclasses import dataclass
|
||||||
|
|
||||||
|
from disposition import Disposition
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class RentalOffer:
|
||||||
|
"""Nabídka pronájmu bytu"""
|
||||||
|
|
||||||
|
link: str
|
||||||
|
"""URL adresa na nabídku"""
|
||||||
|
|
||||||
|
title: str
|
||||||
|
"""Popis nabídky (nejčastěji počet pokojů, výměra)"""
|
||||||
|
|
||||||
|
location: str
|
||||||
|
"""Lokace bytu (městská část, ulice)"""
|
||||||
|
|
||||||
|
price: int | str
|
||||||
|
"""Cena pronájmu za měsíc bez poplatků a energií"""
|
||||||
|
|
||||||
|
image_url: str
|
||||||
|
"""Náhledový obrázek nabídky"""
|
||||||
|
|
||||||
|
scraper: 'ScraperBase'
|
||||||
|
"""Odkaz na instanci srapera, ze kterého tato nabídka pochází"""
|
||||||
68
src/scrapers/scraper_base.py
Normal file
68
src/scrapers/scraper_base.py
Normal file
@@ -0,0 +1,68 @@
|
|||||||
|
from abc import abstractmethod
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
from requests import Response
|
||||||
|
|
||||||
|
from disposition import Disposition
|
||||||
|
from scrapers.rental_offer import RentalOffer
|
||||||
|
from utils import flatten
|
||||||
|
|
||||||
|
|
||||||
|
class ScraperBase():
|
||||||
|
"""Hlavní třída pro získávání aktuálních nabídek pronájmu bytů z různých služeb
|
||||||
|
"""
|
||||||
|
|
||||||
|
user_agent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36"
|
||||||
|
headers = {"User-Agent": user_agent}
|
||||||
|
|
||||||
|
@property
|
||||||
|
@abstractmethod
|
||||||
|
def name(self) -> str:
|
||||||
|
pass
|
||||||
|
|
||||||
|
@property
|
||||||
|
@abstractmethod
|
||||||
|
def logo_url(self) -> str:
|
||||||
|
pass
|
||||||
|
|
||||||
|
@property
|
||||||
|
@abstractmethod
|
||||||
|
def color(self) -> int:
|
||||||
|
pass
|
||||||
|
|
||||||
|
@property
|
||||||
|
@abstractmethod
|
||||||
|
def disposition_mapping(self) -> dict[Disposition, Any]:
|
||||||
|
pass
|
||||||
|
|
||||||
|
def __init__(self, disposition: Disposition, channel) -> None:
|
||||||
|
super().__init__()
|
||||||
|
self.disposition = disposition
|
||||||
|
self.channel = channel
|
||||||
|
|
||||||
|
def get_dispositions_data(self) -> list:
|
||||||
|
return list(flatten([self.disposition_mapping[d] for d in self.disposition]))
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
def build_response() -> Response:
|
||||||
|
"""Vytvoří a pošle dotaz na server pro získání nabídek podle nakonfigurovaných parametrů
|
||||||
|
|
||||||
|
Raises:
|
||||||
|
NotImplementedError: Pokud potomek neimplementuje tuto metodu
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Response: Odpověď nabídkového serveru obsahující neparsované nabídky
|
||||||
|
"""
|
||||||
|
raise NotImplementedError("Server request builder is not implemeneted")
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
def get_latest_offers() -> list[RentalOffer]:
|
||||||
|
"""Načte a vrátí seznam nejnovějších nabídek bytů k pronájmu z dané služby
|
||||||
|
|
||||||
|
Raises:
|
||||||
|
NotImplementedError: Pokud potomek neimplementuje tuto metodu
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
list[RentalOffer]: Seznam nabízených bytů k pronájmu
|
||||||
|
"""
|
||||||
|
raise NotImplementedError("Fetching new results is not implemeneted")
|
||||||
86
src/scrapers/scraper_bezrealitky.py
Normal file
86
src/scrapers/scraper_bezrealitky.py
Normal file
@@ -0,0 +1,86 @@
|
|||||||
|
""" Scraper for BezRealitky.cz
|
||||||
|
author: Mark Barzali
|
||||||
|
"""
|
||||||
|
|
||||||
|
import json
|
||||||
|
from abc import ABC as abstract
|
||||||
|
from typing import ClassVar
|
||||||
|
|
||||||
|
from disposition import Disposition
|
||||||
|
from scrapers.scraper_base import ScraperBase
|
||||||
|
from scrapers.rental_offer import RentalOffer
|
||||||
|
import requests
|
||||||
|
|
||||||
|
|
||||||
|
class ScraperBezrealitky(ScraperBase):
|
||||||
|
|
||||||
|
name = "BezRealitky"
|
||||||
|
logo_url = "https://www.bezrealitky.cz/manifest-icon-192.maskable.png"
|
||||||
|
color = 0x00CC00
|
||||||
|
base_url = "https://www.bezrealitky.cz"
|
||||||
|
file: ClassVar[str] = "./graphql/bezrealitky.json"
|
||||||
|
|
||||||
|
API: ClassVar[str] = "https://api.bezrealitky.cz/"
|
||||||
|
OFFER_TYPE: ClassVar[str] = "PRONAJEM"
|
||||||
|
ESTATE_TYPE: ClassVar[str] = "BYT"
|
||||||
|
BRNO: ClassVar[str] = "R438171"
|
||||||
|
|
||||||
|
class Routes(abstract):
|
||||||
|
GRAPHQL: ClassVar[str] = "graphql/"
|
||||||
|
OFFERS: ClassVar[str] = "nemovitosti-byty-domy/"
|
||||||
|
|
||||||
|
disposition_mapping = {
|
||||||
|
Disposition.FLAT_1KK: "DISP_1_KK",
|
||||||
|
Disposition.FLAT_1: "DISP_1_1",
|
||||||
|
Disposition.FLAT_2KK: "DISP_2_KK",
|
||||||
|
Disposition.FLAT_2: "DISP_2_1",
|
||||||
|
Disposition.FLAT_3KK: "DISP_3_KK",
|
||||||
|
Disposition.FLAT_3: "DISP_3_1",
|
||||||
|
Disposition.FLAT_4KK: "DISP_4_KK",
|
||||||
|
Disposition.FLAT_4: "DISP_4_1",
|
||||||
|
Disposition.FLAT_5_UP: None,
|
||||||
|
Disposition.FLAT_OTHERS: None,
|
||||||
|
}
|
||||||
|
|
||||||
|
def __init__(self, dispositions: Disposition, channel):
|
||||||
|
super().__init__(dispositions, channel)
|
||||||
|
self._read_config()
|
||||||
|
self._patch_config()
|
||||||
|
|
||||||
|
def _read_config(self) -> None:
|
||||||
|
with open(ScraperBezrealitky.file, "r") as file:
|
||||||
|
self._config = json.load(file)
|
||||||
|
|
||||||
|
def _patch_config(self):
|
||||||
|
match = {
|
||||||
|
"estateType": self.ESTATE_TYPE,
|
||||||
|
"offerType": self.OFFER_TYPE,
|
||||||
|
"disposition": self.get_dispositions_data(),
|
||||||
|
"regionOsmIds": [self.BRNO],
|
||||||
|
}
|
||||||
|
self._config["variables"].update(match)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _create_link_to_offer(item: dict) -> str:
|
||||||
|
return f"{ScraperBezrealitky.base_url}/{ScraperBezrealitky.Routes.OFFERS}{item}"
|
||||||
|
|
||||||
|
def build_response(self) -> requests.Response:
|
||||||
|
return requests.post(
|
||||||
|
url=f"{ScraperBezrealitky.API}{ScraperBezrealitky.Routes.GRAPHQL}",
|
||||||
|
json=self._config
|
||||||
|
)
|
||||||
|
|
||||||
|
def get_latest_offers(self) -> list[RentalOffer]:
|
||||||
|
response = self.build_response().json()
|
||||||
|
|
||||||
|
return [ # type: list[RentalOffer]
|
||||||
|
RentalOffer(
|
||||||
|
scraper=self,
|
||||||
|
link=self._create_link_to_offer(item["uri"]),
|
||||||
|
title=item["imageAltText"],
|
||||||
|
location=item["address"],
|
||||||
|
price=f"{item['price']} / {item['charges']}",
|
||||||
|
image_url=item["mainImage"]["url"] if item["mainImage"] else "",
|
||||||
|
)
|
||||||
|
for item in response["data"]["listAdverts"]["list"]
|
||||||
|
]
|
||||||
62
src/scrapers/scraper_bravis.py
Normal file
62
src/scrapers/scraper_bravis.py
Normal file
@@ -0,0 +1,62 @@
|
|||||||
|
import logging
|
||||||
|
import re
|
||||||
|
from urllib.parse import urljoin
|
||||||
|
|
||||||
|
import requests
|
||||||
|
from bs4 import BeautifulSoup
|
||||||
|
|
||||||
|
from disposition import Disposition
|
||||||
|
from scrapers.rental_offer import RentalOffer
|
||||||
|
from scrapers.scraper_base import ScraperBase
|
||||||
|
|
||||||
|
|
||||||
|
class ScraperBravis(ScraperBase):
|
||||||
|
|
||||||
|
name = "BRAVIS"
|
||||||
|
logo_url = "https://www.bravis.cz/content/img/logo-small.png"
|
||||||
|
color = 0xCE0020
|
||||||
|
base_url = "https://www.bravis.cz/pronajem-bytu"
|
||||||
|
|
||||||
|
|
||||||
|
def build_response(self) -> requests.Response:
|
||||||
|
url = self.base_url + "?"
|
||||||
|
|
||||||
|
if Disposition.FLAT_1KK in self.disposition or Disposition.FLAT_1 in self.disposition:
|
||||||
|
url += "typ-nemovitosti-byt+1=&"
|
||||||
|
if Disposition.FLAT_2KK in self.disposition or Disposition.FLAT_2 in self.disposition:
|
||||||
|
url += "typ-nemovitosti-byt+2=&"
|
||||||
|
if Disposition.FLAT_3KK in self.disposition or Disposition.FLAT_3 in self.disposition:
|
||||||
|
url += "typ-nemovitosti-byt+3=&"
|
||||||
|
if Disposition.FLAT_4KK in self.disposition or Disposition.FLAT_4 in self.disposition:
|
||||||
|
url += "typ-nemovitosti-byt+4=&"
|
||||||
|
if Disposition.FLAT_5_UP in self.disposition:
|
||||||
|
url += "typ-nemovitosti-byt+5=&"
|
||||||
|
|
||||||
|
url += "typ-nabidky=pronajem-bytu&lokalita=cele-brno&vybavenost=nezalezi&q=&action=search&s=1-20-order-0"
|
||||||
|
|
||||||
|
logging.debug("BRAVIS request: %s", url)
|
||||||
|
|
||||||
|
return requests.get(url, headers=self.headers)
|
||||||
|
|
||||||
|
def get_latest_offers(self) -> list[RentalOffer]:
|
||||||
|
response = self.build_response()
|
||||||
|
soup = BeautifulSoup(response.text, 'html.parser')
|
||||||
|
|
||||||
|
items: list[RentalOffer] = []
|
||||||
|
|
||||||
|
for item in soup.select("#search > .in > .itemslist > li"):
|
||||||
|
if item.get("class"):
|
||||||
|
continue
|
||||||
|
|
||||||
|
params = item.select(".params > li")
|
||||||
|
|
||||||
|
items.append(RentalOffer(
|
||||||
|
scraper = self,
|
||||||
|
link = urljoin(self.base_url, item.select_one("a.main").get("href")),
|
||||||
|
title = "Pronájem " + params[1].find("strong").get_text().strip() + ", " + params[2].find("strong").get_text().strip(),
|
||||||
|
location = item.select_one(".location").get_text().strip(),
|
||||||
|
price = int(re.sub(r"[^\d]", "", [text for text in item.select_one(".price").stripped_strings][0])),
|
||||||
|
image_url = urljoin(self.base_url, item.select_one(".img > img").get("src"))
|
||||||
|
))
|
||||||
|
|
||||||
|
return items
|
||||||
95
src/scrapers/scraper_euro_bydleni.py
Normal file
95
src/scrapers/scraper_euro_bydleni.py
Normal file
@@ -0,0 +1,95 @@
|
|||||||
|
import json
|
||||||
|
import logging
|
||||||
|
import re
|
||||||
|
from urllib.parse import urljoin
|
||||||
|
|
||||||
|
import requests
|
||||||
|
from bs4 import BeautifulSoup
|
||||||
|
|
||||||
|
from disposition import Disposition
|
||||||
|
from scrapers.rental_offer import RentalOffer
|
||||||
|
from scrapers.scraper_base import ScraperBase
|
||||||
|
from scrapers.rental_offer import RentalOffer
|
||||||
|
from urllib.parse import urljoin
|
||||||
|
from bs4 import BeautifulSoup
|
||||||
|
|
||||||
|
|
||||||
|
class ScraperEuroBydleni(ScraperBase):
|
||||||
|
|
||||||
|
name = "Eurobydlení"
|
||||||
|
logo_url = "https://files.janchaloupka.cz/eurobydleni.png"
|
||||||
|
color = 0xFA0F54
|
||||||
|
base_url = "https://www.eurobydleni.cz/search-form"
|
||||||
|
|
||||||
|
cookies = {"listing-sort": "sort-added"}
|
||||||
|
disposition_mapping = {
|
||||||
|
Disposition.FLAT_1: 15,
|
||||||
|
Disposition.FLAT_1KK: 16,
|
||||||
|
Disposition.FLAT_2: 17,
|
||||||
|
Disposition.FLAT_2KK: 18,
|
||||||
|
Disposition.FLAT_3: 19,
|
||||||
|
Disposition.FLAT_3KK: 20,
|
||||||
|
Disposition.FLAT_4: 21,
|
||||||
|
Disposition.FLAT_4KK: 22,
|
||||||
|
Disposition.FLAT_5_UP: (202, 256), # (5+1, 5kk)
|
||||||
|
Disposition.FLAT_OTHERS: (14, 857), # (Garsonka, Apartman)
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def build_response(self) -> requests.Response:
|
||||||
|
request_data = {
|
||||||
|
"sql[advert_type_eu][]": 7,
|
||||||
|
"sql[advert_subtype_eu][]": self.get_dispositions_data(),
|
||||||
|
"sql[advert_function_eu][]": 3,
|
||||||
|
"sql[advert_price_min]": "",
|
||||||
|
"sql[advert_price_max]": "",
|
||||||
|
"sql[usable_area_min]": "",
|
||||||
|
"sql[usable_area_max]": "",
|
||||||
|
"sql[estate_area_min]": "",
|
||||||
|
"sql[estate_area_max]": "",
|
||||||
|
"sql[locality][locality][input]": "Brno, Česko",
|
||||||
|
"sql[locality][locality][city]": "Brno, Česko",
|
||||||
|
"sql[locality][locality][zip_code]": "",
|
||||||
|
"sql[locality][locality][types]": "locality",
|
||||||
|
"sql[locality][location][lat]": "49.1950602",
|
||||||
|
"sql[locality][location][lng]": "16.6068371",
|
||||||
|
"sql[locality][viewport][south]": "49.10965517428777",
|
||||||
|
"sql[locality][viewport][west]": "16.42806782678905",
|
||||||
|
"sql[locality][viewport][north]": "49.294484956308",
|
||||||
|
"sql[locality][viewport][east]": "16.72785321479357",
|
||||||
|
"sql[poptavka][jmeno]": "",
|
||||||
|
"sql[poptavka][prijmeni]": "",
|
||||||
|
"sql[poptavka][email]": "",
|
||||||
|
"sql[poptavka][telefon]": ""
|
||||||
|
}
|
||||||
|
|
||||||
|
logging.debug("EuroBydlení request: %s", json.dumps(request_data))
|
||||||
|
|
||||||
|
response = requests.post(self.base_url, headers=self.headers, cookies=self.cookies, data=request_data)
|
||||||
|
response.encoding = "utf-8"
|
||||||
|
return response
|
||||||
|
|
||||||
|
def get_latest_offers(self) -> list[RentalOffer]:
|
||||||
|
response = self.build_response()
|
||||||
|
soup = BeautifulSoup(response.text, 'html.parser')
|
||||||
|
|
||||||
|
items: list[RentalOffer] = []
|
||||||
|
|
||||||
|
offers = soup.find(id="properties-box")
|
||||||
|
for item in offers.find_all("li", {"class": "list-items__item"}):
|
||||||
|
|
||||||
|
image_container = item.find("ul", {"class": "list-items__item__image__wrap"})
|
||||||
|
content = item.find("div", {"class": "list-items__content__1"})
|
||||||
|
title = content.find("h2", {"class": "list-items__item__title"})
|
||||||
|
details = content.find_all("li")
|
||||||
|
|
||||||
|
items.append(RentalOffer(
|
||||||
|
scraper = self,
|
||||||
|
link = urljoin(self.base_url, title.find("a").get('href')),
|
||||||
|
title = title.get_text().strip(),
|
||||||
|
location = details[1].get_text().strip(),
|
||||||
|
price = int(re.sub(r"[^\d]", "", details[0].get_text()) or "0"),
|
||||||
|
image_url = "https:" + image_container.find("img").get("src")
|
||||||
|
))
|
||||||
|
|
||||||
|
return items
|
||||||
68
src/scrapers/scraper_idnes_reality.py
Normal file
68
src/scrapers/scraper_idnes_reality.py
Normal file
@@ -0,0 +1,68 @@
|
|||||||
|
import logging
|
||||||
|
import re
|
||||||
|
|
||||||
|
import requests
|
||||||
|
from bs4 import BeautifulSoup
|
||||||
|
|
||||||
|
from disposition import Disposition
|
||||||
|
from scrapers.rental_offer import RentalOffer
|
||||||
|
from scrapers.scraper_base import ScraperBase
|
||||||
|
from scrapers.rental_offer import RentalOffer
|
||||||
|
import requests
|
||||||
|
from bs4 import BeautifulSoup
|
||||||
|
|
||||||
|
|
||||||
|
class ScraperIdnesReality(ScraperBase):
|
||||||
|
|
||||||
|
name = "iDNES Reality"
|
||||||
|
logo_url = "https://sta-reality2.1gr.cz/ui/image/favicons/favicon-32x32.png"
|
||||||
|
color = 0x1D80D7
|
||||||
|
|
||||||
|
disposition_mapping = {
|
||||||
|
Disposition.FLAT_1KK: "s-qc%5BsubtypeFlat%5D%5B%5D=1k",
|
||||||
|
Disposition.FLAT_1: "s-qc%5BsubtypeFlat%5D%5B%5D=11",
|
||||||
|
Disposition.FLAT_2KK: "s-qc%5BsubtypeFlat%5D%5B%5D=2k",
|
||||||
|
Disposition.FLAT_2: "s-qc%5BsubtypeFlat%5D%5B%5D=21",
|
||||||
|
Disposition.FLAT_3KK: "s-qc%5BsubtypeFlat%5D%5B%5D=3k",
|
||||||
|
Disposition.FLAT_3: "s-qc%5BsubtypeFlat%5D%5B%5D=31",
|
||||||
|
Disposition.FLAT_4KK: "s-qc%5BsubtypeFlat%5D%5B%5D=4k",
|
||||||
|
Disposition.FLAT_4: "s-qc%5BsubtypeFlat%5D%5B%5D=41",
|
||||||
|
Disposition.FLAT_5_UP: (
|
||||||
|
"s-qc%5BsubtypeFlat%5D%5B%5D=5k",
|
||||||
|
"s-qc%5BsubtypeFlat%5D%5B%5D=51",
|
||||||
|
"s-qc%5BsubtypeFlat%5D%5B%5D=6k", # 6 a víc
|
||||||
|
),
|
||||||
|
Disposition.FLAT_OTHERS: "s-qc%5BsubtypeFlat%5D%5B%5D=atypical", # atyp
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def build_response(self) -> requests.Response:
|
||||||
|
url = "https://reality.idnes.cz/s/pronajem/byty/brno-mesto/?"
|
||||||
|
url += "&".join(self.get_dispositions_data())
|
||||||
|
|
||||||
|
logging.debug("iDNES reality request: %s", url)
|
||||||
|
|
||||||
|
return requests.get(url, headers=self.headers)
|
||||||
|
|
||||||
|
def get_latest_offers(self) -> list[RentalOffer]:
|
||||||
|
response = self.build_response()
|
||||||
|
soup = BeautifulSoup(response.text, 'html.parser')
|
||||||
|
|
||||||
|
items: list[RentalOffer] = []
|
||||||
|
|
||||||
|
offers = soup.find(id="snippet-s-result-articles")
|
||||||
|
for item in offers.find_all("div", {"class": "c-products__item"}):
|
||||||
|
|
||||||
|
if "c-products__item-advertisment" in item.get("class"):
|
||||||
|
continue
|
||||||
|
|
||||||
|
items.append(RentalOffer(
|
||||||
|
scraper = self,
|
||||||
|
link = item.find("a", {"class": "c-products__link"}).get('href'),
|
||||||
|
title = ' '.join(item.find("h2", {"class": "c-products__title"}).get_text().strip().splitlines()),
|
||||||
|
location = item.find("p", {"class": "c-products__info"}).get_text().strip(),
|
||||||
|
price = int(re.sub(r"[^\d]", "", item.find("p", {"class": "c-products__price"}).get_text()) or "0"),
|
||||||
|
image_url = item.find("img").get("data-src")
|
||||||
|
))
|
||||||
|
|
||||||
|
return items
|
||||||
63
src/scrapers/scraper_realcity.py
Normal file
63
src/scrapers/scraper_realcity.py
Normal file
@@ -0,0 +1,63 @@
|
|||||||
|
import logging
|
||||||
|
import re
|
||||||
|
|
||||||
|
import requests
|
||||||
|
from bs4 import BeautifulSoup
|
||||||
|
|
||||||
|
from disposition import Disposition
|
||||||
|
from scrapers.rental_offer import RentalOffer
|
||||||
|
from scrapers.scraper_base import ScraperBase
|
||||||
|
from scrapers.rental_offer import RentalOffer
|
||||||
|
import requests
|
||||||
|
from bs4 import BeautifulSoup
|
||||||
|
|
||||||
|
|
||||||
|
class ScraperRealcity(ScraperBase):
|
||||||
|
|
||||||
|
name = "REALCITY"
|
||||||
|
logo_url = "https://files.janchaloupka.cz/realcity.png"
|
||||||
|
color = 0xB60D1C
|
||||||
|
|
||||||
|
disposition_mapping = {
|
||||||
|
Disposition.FLAT_1KK: "%221%2Bkk%22",
|
||||||
|
Disposition.FLAT_1: "%221%2B1%22",
|
||||||
|
Disposition.FLAT_2KK: "%222%2Bkk%22",
|
||||||
|
Disposition.FLAT_2: "%222%2B1%22",
|
||||||
|
Disposition.FLAT_3KK: "%223%2Bkk%22",
|
||||||
|
Disposition.FLAT_3: "%223%2B1%22",
|
||||||
|
Disposition.FLAT_4KK: "%224%2Bkk%22",
|
||||||
|
Disposition.FLAT_4: ("%224%2B1%22", "%224%2B2%22"), # 4+1, 4+2
|
||||||
|
Disposition.FLAT_5_UP: ("%225%2Bkk%22", "%225%2B1%22", "%225%2B2%22", "%226%2Bkk%22", "%226%2B1%22", "%22disp_more%22"), # 5kk, 5+1, 5+2, 6kk, 6+1, ++
|
||||||
|
Disposition.FLAT_OTHERS: ("%22atyp%22", "%22disp_nospec%22"), # atyp, unknown
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def build_response(self) -> requests.Response:
|
||||||
|
url = "https://www.realcity.cz/pronajem-bytu/brno-mesto-68/?sp=%7B%22locality%22%3A%5B68%5D%2C%22transactionTypes%22%3A%5B%22rent%22%5D%2C%22propertyTypes%22%3A%5B%7B%22propertyType%22%3A%22flat%22%2C%22options%22%3A%7B%22disposition%22%3A%5B"
|
||||||
|
url += "%2C".join(self.get_dispositions_data())
|
||||||
|
url += "%5D%7D%7D%5D%7D"
|
||||||
|
|
||||||
|
logging.debug("REALCITY request: %s", url)
|
||||||
|
|
||||||
|
return requests.get(url, headers=self.headers)
|
||||||
|
|
||||||
|
def get_latest_offers(self) -> list[RentalOffer]:
|
||||||
|
response = self.build_response()
|
||||||
|
soup = BeautifulSoup(response.text, 'html.parser')
|
||||||
|
|
||||||
|
items: list[RentalOffer] = []
|
||||||
|
|
||||||
|
for item in soup.select("#rc-advertise-result .media.advertise.item"):
|
||||||
|
image = item.find("div", "pull-left image")
|
||||||
|
body = item.find("div", "media-body")
|
||||||
|
|
||||||
|
items.append(RentalOffer(
|
||||||
|
scraper=self,
|
||||||
|
link="https://www.realcity.cz" + body.find("div", "title").a.get("href"),
|
||||||
|
title=body.find("div", "title").a.get_text() or "Chybí titulek",
|
||||||
|
location=body.find("div", "address").get_text().strip() or "Chybí adresa",
|
||||||
|
price=re.sub(r'\D+', '', body.find("div", "price").get_text() or "0"),
|
||||||
|
image_url="https:" + image.img.get("src")
|
||||||
|
))
|
||||||
|
|
||||||
|
return items
|
||||||
111
src/scrapers/scraper_realingo.py
Normal file
111
src/scrapers/scraper_realingo.py
Normal file
@@ -0,0 +1,111 @@
|
|||||||
|
import json
|
||||||
|
import logging
|
||||||
|
from urllib.parse import urljoin
|
||||||
|
|
||||||
|
import requests
|
||||||
|
|
||||||
|
from disposition import Disposition
|
||||||
|
from scrapers.rental_offer import RentalOffer
|
||||||
|
from scrapers.scraper_base import ScraperBase
|
||||||
|
from scrapers.rental_offer import RentalOffer
|
||||||
|
import requests
|
||||||
|
|
||||||
|
|
||||||
|
class ScraperRealingo(ScraperBase):
|
||||||
|
|
||||||
|
name = "realingo"
|
||||||
|
logo_url = "https://www.realingo.cz/_next/static/media/images/android-chrome-144x144-cf1233ce.png"
|
||||||
|
color = 0x00BC78
|
||||||
|
base_url = "https://www.realingo.cz/graphql"
|
||||||
|
|
||||||
|
disposition_mapping = {
|
||||||
|
Disposition.FLAT_1KK: "FLAT1_KK",
|
||||||
|
Disposition.FLAT_1: "FLAT11",
|
||||||
|
Disposition.FLAT_2KK: "FLAT2_KK",
|
||||||
|
Disposition.FLAT_2: "FLAT21",
|
||||||
|
Disposition.FLAT_3KK: "FLAT3_KK",
|
||||||
|
Disposition.FLAT_3: "FLAT31",
|
||||||
|
Disposition.FLAT_4KK: "FLAT4_KK",
|
||||||
|
Disposition.FLAT_4: "FLAT41",
|
||||||
|
Disposition.FLAT_5_UP: ("FLAT5_KK", "FLAT51", "FLAT6_AND_MORE"),
|
||||||
|
Disposition.FLAT_OTHERS: "OTHERS_FLAT",
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def build_response(self) -> requests.Response:
|
||||||
|
json_request = {
|
||||||
|
"query": "query SearchOffer($purpose: OfferPurpose, $property: PropertyType, $saved: Boolean, $categories: [OfferCategory!], $area: RangeInput, $plotArea: RangeInput, $price: RangeInput, $bounds: GpsBoundsInput, $address: String, $transportType: TransportType, $toleration: Float, $buildingTypes: [BuildingType!], $buildingStatuses: [BuildingStatus!], $buildingPositions: [BuildingPosition!], $houseTypes: [HouseType!], $floor: RangeInput, $ownershipStatuses: [OwnershipStatus!], $furnitureStatuses: [FurnitureStatus!], $maxAge: Int, $contactType: ContactType, $geometry: GeoJSONGeometry, $sort: OfferSort = NEWEST, $first: Int = 20, $skip: Int = 0) {\n addressGeometry(\n address: $address\n geometry: $geometry\n toleration: $toleration\n transportType: $transportType\n ) {\n geometry\n mask\n }\n searchOffer(\n filter: {purpose: $purpose, property: $property, saved: $saved, address: $address, transportType: $transportType, toleration: $toleration, categories: $categories, area: $area, plotArea: $plotArea, price: $price, bounds: $bounds, buildingTypes: $buildingTypes, buildingStatuses: $buildingStatuses, buildingPositions: $buildingPositions, houseTypes: $houseTypes, floor: $floor, ownershipStatuses: $ownershipStatuses, furnitureStatuses: $furnitureStatuses, maxAge: $maxAge, contactType: $contactType, geometry: $geometry}\n sort: $sort\n first: $first\n skip: $skip\n save: true\n ) {\n location {\n id\n type\n url\n name\n neighbours {\n id\n type\n url\n name\n }\n breadcrumbs {\n url\n name\n }\n relatedSearch {\n ...SearchParametersAttributes\n }\n center\n }\n items {\n ...SearchOfferAttributes\n }\n total\n }\n}\n\nfragment FilterAttributes on OfferFilter {\n purpose\n property\n categories\n address\n location {\n name\n }\n toleration\n transportType\n bounds {\n northEast {\n latitude\n longitude\n }\n southWest {\n latitude\n longitude\n }\n }\n saved\n geometry\n area {\n from\n to\n }\n plotArea {\n from\n to\n }\n price {\n from\n to\n }\n buildingTypes\n buildingStatuses\n buildingPositions\n houseTypes\n floor {\n from\n to\n }\n ownershipStatuses\n furnitureStatuses\n maxAge\n contactType\n}\n\nfragment SearchParametersAttributes on SearchParameters {\n filter {\n ...FilterAttributes\n }\n page\n priceMap\n sort\n}\n\nfragment SearchOfferAttributes on Offer {\n id\n url\n purpose\n property\n visited\n liked\n reserved\n createdAt\n category\n purpose\n property\n price {\n total\n canonical\n currency\n }\n area {\n main\n plot\n }\n photos {\n main\n }\n location {\n address\n addressUrl\n locationPrecision\n latitude\n longitude\n }\n}\n",
|
||||||
|
"operationName": "SearchOffer",
|
||||||
|
"variables": {
|
||||||
|
"purpose": "RENT",
|
||||||
|
"property": "FLAT",
|
||||||
|
"address": "Brno",
|
||||||
|
"saved": False,
|
||||||
|
"categories": self.get_dispositions_data(),
|
||||||
|
"sort": "NEWEST",
|
||||||
|
"first": 300,
|
||||||
|
"skip": 0
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
logging.debug("realingo request: %s", json.dumps(json_request))
|
||||||
|
|
||||||
|
return requests.post(self.base_url, headers=self.headers, json=json_request)
|
||||||
|
|
||||||
|
|
||||||
|
def category_to_string(self, id) -> str:
|
||||||
|
return {
|
||||||
|
"FLAT1_KK": "Byt 1+kk",
|
||||||
|
"FLAT11": "Byt 1+1",
|
||||||
|
"FLAT2_KK": "Byt 2+kk",
|
||||||
|
"FLAT21": "Byt 2+1",
|
||||||
|
"FLAT3_KK": "Byt 3+kk",
|
||||||
|
"FLAT31": "Byt 3+1",
|
||||||
|
"FLAT4_KK": "Byt 4+kk",
|
||||||
|
"FLAT41": "Byt 4+1",
|
||||||
|
"FLAT5_KK": "Byt 5+kk",
|
||||||
|
"FLAT51": "Byt 5+1",
|
||||||
|
"FLAT6_AND_MORE": "Byt 6+kk a v\u011bt\u0161\xed",
|
||||||
|
"HOUSE_FAMILY": "Rodinn\xfd dům",
|
||||||
|
"HOUSE_APARTMENT": "\u010cin\u017eovn\xed",
|
||||||
|
"HOUSE_MANSION": "Vila",
|
||||||
|
"LAND_COMMERCIAL": "Komer\u010dn\xed",
|
||||||
|
"LAND_HOUSING": "Bydlen\xed",
|
||||||
|
"LAND_GARDEN": "Zahrady",
|
||||||
|
"LAND_AGRICULTURAL": "Zem\u011bd\u011blsk\xfd",
|
||||||
|
"LAND_MEADOW": "Louka",
|
||||||
|
"LAND_FOREST": "Les",
|
||||||
|
"COMMERCIAL_OFFICE": "Kancel\xe1\u0159",
|
||||||
|
"COMMERCIAL_STORAGE": "Sklad",
|
||||||
|
"COMMERCIAL_MANUFACTURING": "V\xfdrobn\xed prostor",
|
||||||
|
"COMMERCIAL_BUSINESS": "Obchod",
|
||||||
|
"COMMERCIAL_ACCOMMODATION": "Ubytov\xe1n\xed",
|
||||||
|
"COMMERCIAL_RESTAURANT": "Restaurace",
|
||||||
|
"COMMERCIAL_AGRICULTURAL": "Zem\u011bd\u011blsk\xfd objekt",
|
||||||
|
"OTHERS_HUT": "Chata",
|
||||||
|
"OTHERS_COTTAGE": "Chalupa",
|
||||||
|
"OTHERS_GARAGE": "Gar\xe1\u017e",
|
||||||
|
"OTHERS_FARMHOUSE": "Zem\u011bd\u011blsk\xe1 usedlost",
|
||||||
|
"OTHERS_POND": "Rybn\xedk",
|
||||||
|
"OTHERS_FLAT": "Atypick\xfd",
|
||||||
|
"OTHERS_OTHERS": "Pam\xe1tka",
|
||||||
|
"OTHERS_MONUMENTS": "Ostatn\xed"
|
||||||
|
}.get(id, "")
|
||||||
|
|
||||||
|
|
||||||
|
def get_latest_offers(self) -> list[RentalOffer]:
|
||||||
|
response = self.build_response().json()
|
||||||
|
|
||||||
|
items: list[RentalOffer] = []
|
||||||
|
|
||||||
|
for offer in response["data"]["searchOffer"]["items"]:
|
||||||
|
items.append(RentalOffer(
|
||||||
|
scraper = self,
|
||||||
|
link = urljoin(self.base_url, offer["url"]),
|
||||||
|
title = self.category_to_string(offer["category"]) + ", " + str(offer["area"]["main"]) + " m²",
|
||||||
|
location = offer["location"]["address"],
|
||||||
|
price = offer["price"]["total"],
|
||||||
|
image_url = urljoin(self.base_url, "/static/images/" + (offer["photos"]["main"] or ""))
|
||||||
|
))
|
||||||
|
|
||||||
|
return items
|
||||||
72
src/scrapers/scraper_remax.py
Normal file
72
src/scrapers/scraper_remax.py
Normal file
@@ -0,0 +1,72 @@
|
|||||||
|
import logging
|
||||||
|
import re
|
||||||
|
from urllib.parse import urljoin
|
||||||
|
|
||||||
|
import requests
|
||||||
|
from bs4 import BeautifulSoup
|
||||||
|
|
||||||
|
from disposition import Disposition
|
||||||
|
from scrapers.rental_offer import RentalOffer
|
||||||
|
from scrapers.scraper_base import ScraperBase
|
||||||
|
from scrapers.rental_offer import RentalOffer
|
||||||
|
from urllib.parse import urljoin
|
||||||
|
from bs4 import BeautifulSoup
|
||||||
|
|
||||||
|
|
||||||
|
class ScraperRemax(ScraperBase):
|
||||||
|
|
||||||
|
name = "Remax"
|
||||||
|
logo_url = "https://www.remax-czech.cz/apple-touch-icon.png"
|
||||||
|
color = 0x003DA5
|
||||||
|
base_url = "https://www.remax-czech.cz/reality/vyhledavani/"
|
||||||
|
|
||||||
|
disposition_mapping = {
|
||||||
|
Disposition.FLAT_1KK: "&types%5B4%5D%5B2%5D=on",
|
||||||
|
Disposition.FLAT_2KK: "&types%5B4%5D%5B3%5D=on",
|
||||||
|
Disposition.FLAT_3KK: "&types%5B4%5D%5B4%5D=on",
|
||||||
|
Disposition.FLAT_4KK: "&types%5B4%5D%5B5%5D=on",
|
||||||
|
Disposition.FLAT_1: "&types%5B4%5D%5B9%5D=on",
|
||||||
|
Disposition.FLAT_2: "&types%5B4%5D%5B10%5D=on",
|
||||||
|
Disposition.FLAT_3: "&types%5B4%5D%5B11%5D=on",
|
||||||
|
Disposition.FLAT_4: "&types%5B4%5D%5B12%5D=on",
|
||||||
|
Disposition.FLAT_5_UP: (
|
||||||
|
"&types%5B4%5D%5B6%5D=on", # 5kk
|
||||||
|
"&types%5B4%5D%5B7%5D=on", # 6kk
|
||||||
|
"&types%5B4%5D%5B8%5D=on", # 7kk
|
||||||
|
"&types%5B4%5D%5B13%5D=on", # 5+1
|
||||||
|
"&types%5B4%5D%5B14%5D=on", # 6+1
|
||||||
|
"&types%5B4%5D%5B15%5D=on", # 7+1
|
||||||
|
),
|
||||||
|
Disposition.FLAT_OTHERS: (
|
||||||
|
"&types%5B4%5D%5B16%5D=on", # atyp
|
||||||
|
"&types%5B4%5D%5B17%5D=on", # jiný
|
||||||
|
),
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def build_response(self) -> requests.Response:
|
||||||
|
url = self.base_url + "?regions%5B116%5D%5B3702%5D=on&sale=2"
|
||||||
|
url += "".join(self.get_dispositions_data())
|
||||||
|
url += "&order_by_published_date=0"
|
||||||
|
|
||||||
|
logging.debug("Remax request: %s", url)
|
||||||
|
|
||||||
|
return requests.get(url, headers=self.headers)
|
||||||
|
|
||||||
|
def get_latest_offers(self) -> list[RentalOffer]:
|
||||||
|
response = self.build_response()
|
||||||
|
soup = BeautifulSoup(response.text, 'html.parser')
|
||||||
|
|
||||||
|
items: list[RentalOffer] = []
|
||||||
|
|
||||||
|
for item in soup.select("#list .container-fluid .pl-items .pl-items__item"):
|
||||||
|
items.append(RentalOffer(
|
||||||
|
scraper = self,
|
||||||
|
link = urljoin(self.base_url, item.get('data-url')),
|
||||||
|
title = item.get("data-title"),
|
||||||
|
location = re.sub(r"\s+", " ", item.get("data-display-address")),
|
||||||
|
price = int(re.sub(r"[^\d]", "", item.get("data-price")) or "0"),
|
||||||
|
image_url = item.get("data-img")
|
||||||
|
))
|
||||||
|
|
||||||
|
return items
|
||||||
138
src/scrapers/scraper_sreality.py
Normal file
138
src/scrapers/scraper_sreality.py
Normal file
@@ -0,0 +1,138 @@
|
|||||||
|
import logging
|
||||||
|
from time import time
|
||||||
|
from urllib.parse import urljoin
|
||||||
|
|
||||||
|
import requests
|
||||||
|
|
||||||
|
from disposition import Disposition
|
||||||
|
from scrapers.rental_offer import RentalOffer
|
||||||
|
from scrapers.scraper_base import ScraperBase
|
||||||
|
from scrapers.rental_offer import RentalOffer
|
||||||
|
from time import time
|
||||||
|
import requests
|
||||||
|
from urllib.parse import urljoin
|
||||||
|
|
||||||
|
|
||||||
|
class ScraperSreality(ScraperBase):
|
||||||
|
|
||||||
|
name = "Sreality"
|
||||||
|
logo_url = "https://www.sreality.cz/img/icons/android-chrome-192x192.png"
|
||||||
|
color = 0xCC0000
|
||||||
|
base_url = "https://www.sreality.cz"
|
||||||
|
|
||||||
|
disposition_mapping = {
|
||||||
|
Disposition.FLAT_1KK: "2",
|
||||||
|
Disposition.FLAT_1: "3",
|
||||||
|
Disposition.FLAT_2KK: "4",
|
||||||
|
Disposition.FLAT_2: "5",
|
||||||
|
Disposition.FLAT_3KK: "6",
|
||||||
|
Disposition.FLAT_3: "7",
|
||||||
|
Disposition.FLAT_4KK: "8",
|
||||||
|
Disposition.FLAT_4: "9",
|
||||||
|
Disposition.FLAT_5_UP: ("10", "11", "12"),
|
||||||
|
Disposition.FLAT_OTHERS: "16",
|
||||||
|
}
|
||||||
|
|
||||||
|
_category_type_to_url = {
|
||||||
|
0: "vse",
|
||||||
|
1: "prodej",
|
||||||
|
2: "pronajem",
|
||||||
|
3: "drazby"
|
||||||
|
}
|
||||||
|
|
||||||
|
_category_main_to_url = {
|
||||||
|
0: "vse",
|
||||||
|
1: "byt",
|
||||||
|
2: "dum",
|
||||||
|
3: "pozemek",
|
||||||
|
4: "komercni",
|
||||||
|
5: "ostatni"
|
||||||
|
}
|
||||||
|
|
||||||
|
_category_sub_to_url = {
|
||||||
|
2: "1+kk",
|
||||||
|
3: "1+1",
|
||||||
|
4: "2+kk",
|
||||||
|
5: "2+1",
|
||||||
|
6: "3+kk",
|
||||||
|
7: "3+1",
|
||||||
|
8: "4+kk",
|
||||||
|
9: "4+1",
|
||||||
|
10: "5+kk",
|
||||||
|
11: "5+1",
|
||||||
|
12: "6-a-vice",
|
||||||
|
16: "atypicky",
|
||||||
|
47: "pokoj",
|
||||||
|
37: "rodinny",
|
||||||
|
39: "vila",
|
||||||
|
43: "chalupa",
|
||||||
|
33: "chata",
|
||||||
|
35: "pamatka",
|
||||||
|
40: "na-klic",
|
||||||
|
44: "zemedelska-usedlost",
|
||||||
|
19: "bydleni",
|
||||||
|
18: "komercni",
|
||||||
|
20: "pole",
|
||||||
|
22: "louka",
|
||||||
|
21: "les",
|
||||||
|
46: "rybnik",
|
||||||
|
48: "sady-vinice",
|
||||||
|
23: "zahrada",
|
||||||
|
24: "ostatni-pozemky",
|
||||||
|
25: "kancelare",
|
||||||
|
26: "sklad",
|
||||||
|
27: "vyrobni-prostor",
|
||||||
|
28: "obchodni-prostor",
|
||||||
|
29: "ubytovani",
|
||||||
|
30: "restaurace",
|
||||||
|
31: "zemedelsky",
|
||||||
|
38: "cinzovni-dum",
|
||||||
|
49: "virtualni-kancelar",
|
||||||
|
32: "ostatni-komercni-prostory",
|
||||||
|
34: "garaz",
|
||||||
|
52: "garazove-stani",
|
||||||
|
50: "vinny-sklep",
|
||||||
|
51: "pudni-prostor",
|
||||||
|
53: "mobilni-domek",
|
||||||
|
36: "jine-nemovitosti"
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def _create_link_to_offer(self, offer) -> str:
|
||||||
|
return urljoin(self.base_url, "/detail" +
|
||||||
|
"/" + self._category_type_to_url[offer["seo"]["category_type_cb"]] +
|
||||||
|
"/" + self._category_main_to_url[offer["seo"]["category_main_cb"]] +
|
||||||
|
"/" + self._category_sub_to_url[offer["seo"]["category_sub_cb"]] +
|
||||||
|
"/" + offer["seo"]["locality"] +
|
||||||
|
"/" + str(offer["hash_id"]))
|
||||||
|
|
||||||
|
def build_response(self) -> requests.Response:
|
||||||
|
url = self.base_url + "/api/cs/v2/estates?category_main_cb=1&category_sub_cb="
|
||||||
|
url += "|".join(self.get_dispositions_data())
|
||||||
|
url += "&category_type_cb=2&locality_district_id=72&locality_region_id=14&per_page=20"
|
||||||
|
url += "&tms=" + str(int(time()))
|
||||||
|
|
||||||
|
logging.debug("Sreality request: %s", url)
|
||||||
|
|
||||||
|
return requests.get(url, headers=self.headers)
|
||||||
|
|
||||||
|
def get_latest_offers(self) -> list[RentalOffer]:
|
||||||
|
response = self.build_response().json()
|
||||||
|
|
||||||
|
items: list[RentalOffer] = []
|
||||||
|
|
||||||
|
for item in response["_embedded"]["estates"]:
|
||||||
|
# Ignorovat "tip" nabídky, které úplně neodpovídají filtrům a mění se s každým vyhledáváním
|
||||||
|
if item["region_tip"] > 0:
|
||||||
|
continue
|
||||||
|
|
||||||
|
items.append(RentalOffer(
|
||||||
|
scraper = self,
|
||||||
|
link = self._create_link_to_offer(item),
|
||||||
|
title = item["name"],
|
||||||
|
location = item["locality"],
|
||||||
|
price = item["price_czk"]["value_raw"],
|
||||||
|
image_url = item["_links"]["image_middle2"][0]["href"]
|
||||||
|
))
|
||||||
|
|
||||||
|
return items
|
||||||
111
src/scrapers/scraper_ulov_domov.py
Normal file
111
src/scrapers/scraper_ulov_domov.py
Normal file
@@ -0,0 +1,111 @@
|
|||||||
|
import json
|
||||||
|
import logging
|
||||||
|
|
||||||
|
import requests
|
||||||
|
|
||||||
|
from disposition import Disposition
|
||||||
|
from scrapers.rental_offer import RentalOffer
|
||||||
|
from scrapers.scraper_base import ScraperBase
|
||||||
|
from scrapers.rental_offer import RentalOffer
|
||||||
|
import requests
|
||||||
|
|
||||||
|
|
||||||
|
class ScraperUlovDomov(ScraperBase):
|
||||||
|
|
||||||
|
name = "UlovDomov"
|
||||||
|
logo_url = "https://www.ulovdomov.cz/favicon.png"
|
||||||
|
color = 0xFFFFFF
|
||||||
|
base_url = "https://www.ulovdomov.cz/fe-api/find/seperated-offers-within-bounds"
|
||||||
|
|
||||||
|
disposition_mapping = {
|
||||||
|
Disposition.FLAT_1KK: 2,
|
||||||
|
Disposition.FLAT_1: 3,
|
||||||
|
Disposition.FLAT_2KK: 4,
|
||||||
|
Disposition.FLAT_2: 5,
|
||||||
|
Disposition.FLAT_3KK: 6,
|
||||||
|
Disposition.FLAT_3: 7,
|
||||||
|
Disposition.FLAT_4KK: 8,
|
||||||
|
Disposition.FLAT_4: 9,
|
||||||
|
Disposition.FLAT_5_UP: (10, 11, 12, 13, 14, 15), # 5kk, 5+1, 6kk, 6+1, 7kk, 7+1
|
||||||
|
Disposition.FLAT_OTHERS: 16,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def disposition_id_to_string(self, id) -> str:
|
||||||
|
return {
|
||||||
|
1: "garsonky",
|
||||||
|
2: "1+kk",
|
||||||
|
3: "1+1",
|
||||||
|
4: "2+kk",
|
||||||
|
5: "2+1",
|
||||||
|
6: "3+kk",
|
||||||
|
7: "3+1",
|
||||||
|
8: "4+kk",
|
||||||
|
9: "4+1",
|
||||||
|
10: "5+kk",
|
||||||
|
11: "5+1",
|
||||||
|
12: "6+kk",
|
||||||
|
13: "6+1",
|
||||||
|
14: "7+kk",
|
||||||
|
15: "7+1",
|
||||||
|
16: "atypický",
|
||||||
|
29: "domu",
|
||||||
|
24: "spolubydlení (1 lůžkový)",
|
||||||
|
25: "spolubydlení (2 lůžkový)",
|
||||||
|
26: "spolubydlení (3 lůžkový)",
|
||||||
|
27: "spolubydlení (4+ lůžkový)",
|
||||||
|
28: "spolubydlení (samostatný pokoj)",
|
||||||
|
"shared_room": "spolubydlení",
|
||||||
|
"5_and_more": "5 a více"
|
||||||
|
}.get(id, "")
|
||||||
|
|
||||||
|
def build_response(self) -> requests.Response:
|
||||||
|
json_request = {
|
||||||
|
"acreage_from": "",
|
||||||
|
"acreage_to": "",
|
||||||
|
"added_before": "",
|
||||||
|
"banner_panel_width_type": 480,
|
||||||
|
"bounds": {
|
||||||
|
"north_east": {
|
||||||
|
"lat": 49.294485,
|
||||||
|
"lng": 16.727853
|
||||||
|
},
|
||||||
|
"south_west": {
|
||||||
|
"lat": 49.109655,
|
||||||
|
"lng": 16.428068
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"conveniences": [],
|
||||||
|
"dispositions": self.get_dispositions_data(),
|
||||||
|
"furnishing": [],
|
||||||
|
"is_price_commision_free": None,
|
||||||
|
"limit": 20,
|
||||||
|
"offer_type_id": None,
|
||||||
|
"page": 1,
|
||||||
|
"price_from": "",
|
||||||
|
"price_to": "",
|
||||||
|
"query": "",
|
||||||
|
"sort_by": "date:desc",
|
||||||
|
"sticker": None
|
||||||
|
}
|
||||||
|
|
||||||
|
logging.debug("UlovDomov request: %s", json.dumps(json_request))
|
||||||
|
|
||||||
|
return requests.post(self.base_url, headers=self.headers, json=json_request)
|
||||||
|
|
||||||
|
def get_latest_offers(self) -> list[RentalOffer]:
|
||||||
|
response = self.build_response().json()
|
||||||
|
|
||||||
|
items: list[RentalOffer] = []
|
||||||
|
for offer in response["offers"]:
|
||||||
|
items.append(RentalOffer(
|
||||||
|
scraper = self,
|
||||||
|
link = offer["absolute_url"],
|
||||||
|
# TODO "Pronájem" podle ID?
|
||||||
|
title = "Pronájem " + self.disposition_id_to_string(offer["disposition_id"]) + " " + str(offer["acreage"]) + " m²",
|
||||||
|
location = offer["street"]["label"] + ", " + offer["village"]["label"] + " - " + offer["village_part"]["label"],
|
||||||
|
price = offer["price_rental"],
|
||||||
|
image_url = offer["photos"][0]["path"]
|
||||||
|
))
|
||||||
|
|
||||||
|
return items
|
||||||
99
src/scrapers_manager.py
Normal file
99
src/scrapers_manager.py
Normal file
@@ -0,0 +1,99 @@
|
|||||||
|
import logging
|
||||||
|
import traceback
|
||||||
|
|
||||||
|
from config import *
|
||||||
|
from disposition import Disposition
|
||||||
|
from scrapers.rental_offer import RentalOffer
|
||||||
|
from scrapers.scraper_base import ScraperBase
|
||||||
|
from scrapers.scraper_bravis import ScraperBravis
|
||||||
|
from scrapers.scraper_euro_bydleni import ScraperEuroBydleni
|
||||||
|
from scrapers.scraper_idnes_reality import ScraperIdnesReality
|
||||||
|
from scrapers.scraper_realcity import ScraperRealcity
|
||||||
|
from scrapers.scraper_realingo import ScraperRealingo
|
||||||
|
from scrapers.scraper_remax import ScraperRemax
|
||||||
|
from scrapers.scraper_sreality import ScraperSreality
|
||||||
|
from scrapers.scraper_ulov_domov import ScraperUlovDomov
|
||||||
|
from scrapers.scraper_bezrealitky import ScraperBezrealitky
|
||||||
|
|
||||||
|
|
||||||
|
def create_scrapers(dispositions: Disposition) -> list[ScraperBase]:
|
||||||
|
return [
|
||||||
|
ScraperBravis(dispositions, None),
|
||||||
|
ScraperEuroBydleni(dispositions, None),
|
||||||
|
ScraperIdnesReality(dispositions, None),
|
||||||
|
ScraperRealcity(dispositions, None),
|
||||||
|
ScraperRealingo(dispositions, None),
|
||||||
|
ScraperRemax(dispositions, None),
|
||||||
|
ScraperSreality(dispositions, None),
|
||||||
|
ScraperUlovDomov(dispositions, None),
|
||||||
|
ScraperBezrealitky(dispositions, None),
|
||||||
|
]
|
||||||
|
|
||||||
|
def create_scrapers_1kk() -> list[ScraperBase]:
|
||||||
|
return [
|
||||||
|
ScraperBravis(Disposition.FLAT_1KK, config.discord.offers_channel_1kk),
|
||||||
|
ScraperEuroBydleni(Disposition.FLAT_1KK, config.discord.offers_channel_1kk),
|
||||||
|
ScraperIdnesReality(Disposition.FLAT_1KK, config.discord.offers_channel_1kk),
|
||||||
|
ScraperRealcity(Disposition.FLAT_1KK, config.discord.offers_channel_1kk),
|
||||||
|
ScraperRealingo(Disposition.FLAT_1KK, config.discord.offers_channel_1kk),
|
||||||
|
ScraperRemax(Disposition.FLAT_1KK, config.discord.offers_channel_1kk),
|
||||||
|
ScraperSreality(Disposition.FLAT_1KK, config.discord.offers_channel_1kk),
|
||||||
|
ScraperUlovDomov(Disposition.FLAT_1KK, config.discord.offers_channel_1kk),
|
||||||
|
ScraperBezrealitky(Disposition.FLAT_1KK, config.discord.offers_channel_1kk),
|
||||||
|
]
|
||||||
|
|
||||||
|
def create_scrapers_1_1() -> list[ScraperBase]:
|
||||||
|
return [
|
||||||
|
ScraperBravis(Disposition.FLAT_1, config.discord.offers_channel_1),
|
||||||
|
ScraperEuroBydleni(Disposition.FLAT_1, config.discord.offers_channel_1),
|
||||||
|
ScraperIdnesReality(Disposition.FLAT_1, config.discord.offers_channel_1),
|
||||||
|
ScraperRealcity(Disposition.FLAT_1, config.discord.offers_channel_1),
|
||||||
|
ScraperRealingo(Disposition.FLAT_1, config.discord.offers_channel_1),
|
||||||
|
ScraperRemax(Disposition.FLAT_1, config.discord.offers_channel_1),
|
||||||
|
ScraperSreality(Disposition.FLAT_1, config.discord.offers_channel_1),
|
||||||
|
ScraperUlovDomov(Disposition.FLAT_1, config.discord.offers_channel_1),
|
||||||
|
ScraperBezrealitky(Disposition.FLAT_1, config.discord.offers_channel_1),
|
||||||
|
]
|
||||||
|
|
||||||
|
def create_scrapers_2kk() -> list[ScraperBase]:
|
||||||
|
return [
|
||||||
|
ScraperBravis(Disposition.FLAT_2KK, config.discord.offers_channel_2kk),
|
||||||
|
ScraperEuroBydleni(Disposition.FLAT_2KK, config.discord.offers_channel_2kk),
|
||||||
|
ScraperIdnesReality(Disposition.FLAT_2KK, config.discord.offers_channel_2kk),
|
||||||
|
ScraperRealcity(Disposition.FLAT_2KK, config.discord.offers_channel_2kk),
|
||||||
|
ScraperRealingo(Disposition.FLAT_2KK, config.discord.offers_channel_2kk),
|
||||||
|
ScraperRemax(Disposition.FLAT_2KK, config.discord.offers_channel_2kk),
|
||||||
|
ScraperSreality(Disposition.FLAT_2KK, config.discord.offers_channel_2kk),
|
||||||
|
ScraperUlovDomov(Disposition.FLAT_2KK, config.discord.offers_channel_2kk),
|
||||||
|
ScraperBezrealitky(Disposition.FLAT_2KK, config.discord.offers_channel_2kk),
|
||||||
|
]
|
||||||
|
|
||||||
|
def create_scrapers_2_2() -> list[ScraperBase]:
|
||||||
|
return [
|
||||||
|
ScraperBravis(Disposition.FLAT_2, config.discord.offers_channel_2),
|
||||||
|
ScraperEuroBydleni(Disposition.FLAT_2, config.discord.offers_channel_2),
|
||||||
|
ScraperIdnesReality(Disposition.FLAT_2, config.discord.offers_channel_2),
|
||||||
|
ScraperRealcity(Disposition.FLAT_2, config.discord.offers_channel_2),
|
||||||
|
ScraperRealingo(Disposition.FLAT_2, config.discord.offers_channel_2),
|
||||||
|
ScraperRemax(Disposition.FLAT_2, config.discord.offers_channel_2),
|
||||||
|
ScraperSreality(Disposition.FLAT_2, config.discord.offers_channel_2),
|
||||||
|
ScraperUlovDomov(Disposition.FLAT_2, config.discord.offers_channel_2),
|
||||||
|
ScraperBezrealitky(Disposition.FLAT_2, config.discord.offers_channel_2),
|
||||||
|
]
|
||||||
|
|
||||||
|
def fetch_latest_offers(scrapers: list[ScraperBase]) -> list[(object, RentalOffer)]:
|
||||||
|
"""Získá všechny nejnovější nabídky z dostupných serverů
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
list[RentalOffer]: Seznam nabídek
|
||||||
|
"""
|
||||||
|
|
||||||
|
offers: list[(object, RentalOffer)] = []
|
||||||
|
for scraper in scrapers:
|
||||||
|
try:
|
||||||
|
for offer in scraper.get_latest_offers():
|
||||||
|
offers.append((scraper.channel, offer))
|
||||||
|
except Exception:
|
||||||
|
logging.error(traceback.format_exc())
|
||||||
|
|
||||||
|
return offers
|
||||||
10
src/utils.py
Normal file
10
src/utils.py
Normal file
@@ -0,0 +1,10 @@
|
|||||||
|
from typing import Iterable
|
||||||
|
|
||||||
|
def flatten(xs):
|
||||||
|
"""https://stackoverflow.com/a/2158532
|
||||||
|
"""
|
||||||
|
for x in xs:
|
||||||
|
if isinstance(x, Iterable) and not isinstance(x, (str, bytes)):
|
||||||
|
yield from flatten(x)
|
||||||
|
else:
|
||||||
|
yield x
|
||||||
Reference in New Issue
Block a user