Browse Source

Code refactoring: separate scraper and movie objects

tags/v1.0.0
djib 6 months ago
parent
commit
1ff676b8a4
1 changed files with 107 additions and 75 deletions
  1. +107
    -75
      FreeboxMoviePlanner.py

+ 107
- 75
FreeboxMoviePlanner.py View File

@@ -16,105 +16,134 @@ from bs4 import BeautifulSoup
from collections import deque


class FreeboxMoviePlanner:
class Movie:
def __init__(self):
self.day = ''
self.title = ''
self.genre = ''
self.channel = ''
self.rating = ''
self.original_title = ''
self.overview = ''
self.good = False
self.tmdb_id = ''
self.url = ''

def __str__(self):
return '{}: {} - {} ({})\n TMDB: {} - {}\n @ {}\n {}'.format(
'Today' if self.day == '' else self.day,
self.title,
self.genre,
self.channel,
self.rating,
self.original_title,
self.url,
self.overview
)

def __repr__(self):
return 'Movie <{}({})>'.format(self.title, self.rating)


class TVGuideScraper:
TV_GUIDE_URL = 'https://www.programme-television.org/{}?bouquet=tnt'

def __init__(self):
logging.info('Opening config file: config.json')
with open('config.json') as config_file:
self.config = json.load(config_file)
tmdbsimple.API_KEY = self.config['tmdb-api']
@staticmethod
def getMovies(day=''):
logging.info('Connecting to {}'.format(TVGuideScraper.TV_GUIDE_URL))
r = requests.get(TVGuideScraper.TV_GUIDE_URL.format(day))
r.raise_for_status()
html = BeautifulSoup(r.text, 'html.parser')
movies = []
for channel in html.select('.bloc_cnt'):
if len(channel.select('em')):
for movietag in channel.find_all(TVGuideScraper._tag_is_film):
movie = Movie()
movie.title = \
movietag.select('.texte_titre a')[0]['title']
movie.genre = movietag.select('.texte_cat a')[0].string
movie.channel = channel.select('em')[0]\
.string.replace('Programme ', '')
movie.day = day.title()

logging.info('Found movie: {0!r}'.format(movie))

movies.append(movie)

return movies

@staticmethod
def _tag_is_film(tag):
"""
Helper to check if a tag is a film
"""
return (
tag.has_attr('data-nature')
and
tag['data-nature'] == 'films-telefilms'
)

@staticmethod
def _printMovie(movie):
print('{} - {} ({})'.format(
movie['title'],
movie['genre'],
movie['channel']
))
print(' TMDB: {} - {}\n {}'.format(
movie['rating'],
movie['original_title'],
movie['overview'],
))

def printAllMovies(self, movies):
for day, movies in movies.items():
print('=== {}'.format(day.title()))
for movie in movies:
FreeboxMoviePlanner._printMovie(movie)

def getAllMovies(self):

class FreeboxMoviePlanner:
def __init__(self):
logging.info('Opening config file: config.json')
with open('config.json') as config_file:
self.config = json.load(config_file)
tmdbsimple.API_KEY = self.config['tmdb-api']
self.movies = []

def __repr__(self):
result = 'FreeboxMoviePlanner <Movies:\n'
for movie in self.movies:
result += ' {!r}\n'.format(movie)
result += '>'
return result

def printAllMovies(self):
for movie in self.movies:
print(movie)
print()

def scapeAllMovies(self):
days = deque(['lundi', 'mardi', 'mercredi',
'jeudi', 'vendredi', 'samedi', 'dimanche'])
offset = datetime.datetime.today().weekday()
days.rotate(-1-offset)
days.appendleft('')
movies = {}
for day in days:
movies[day] = self.getMovies(day)
logging.info('Found the following movies: {}'.format(movies))
return movies
self.movies += TVGuideScraper.getMovies(day)
logging.info('Found the following movies: {}'.format(self.movies))

def getMovies(self, day=''):
logging.info('Connecting to {}'.format(self.TV_GUIDE_URL))
r = requests.get(self.TV_GUIDE_URL.format(day))
r.raise_for_status()
html = BeautifulSoup(r.text, 'html.parser')
movies = []
for channel in html.select('.bloc_cnt'):
if len(channel.select('em')):
for movie in channel.find_all(
FreeboxMoviePlanner._tag_is_film):
movie_title = movie.select('.texte_titre a')[0]['title']

thismovie = {}
thismovie['title'] = movie_title
thismovie['genre'] = movie.select('.texte_cat a')[0].string
thismovie['channel'] = channel.select('em')[0]\
.string.replace('Programme ', '')
logging.info('Found movie: {}'.format(thismovie))

tmdb_details = self._getMovieRating(movie_title)
if not tmdb_details:
logging.warning(
'No TMDB match for {}'.format(movie_title)
)
continue

thismovie['rating'] = tmdb_details['vote_average']
thismovie['original_title'] = \
tmdb_details['original_title']
thismovie['overview'] = '\n '.join(textwrap.wrap(
tmdb_details['overview'], 75)
)
if(
float(tmdb_details['vote_average'])
< self.config['minimum-rating']
):
logging.warning(
'Bad rating ({}), skipping {}'.format(
tmdb_details['vote_average'], movie_title))
else:
movies.append(thismovie)
return movies
def findMoviesOnTMDB(self):
for movie in self.movies:
tmdb_details = self._findMovieOnTMDB(movie.title)
if tmdb_details:
movie.rating = tmdb_details['vote_average']
movie.original_title = \
tmdb_details['original_title']
movie.overview = '\n '.join(textwrap.wrap(
tmdb_details['overview'], 75)
)
movie.tmdb_id = tmdb_details['id']
movie.good = \
float(movie.rating) >= self.config['minimum-rating']
movie.url = 'https://www.themoviedb.org/movie/{}?language={}' \
.format(movie.tmdb_id, self.config['tmdb-language'])

def filterBadRatings(self):
self.movies = [movie for movie in self.movies if movie.good]

def _getMovieRating(self, movie):
def _findMovieOnTMDB(self, movie):
logging.info("Searching for '{}' on TMDB".format(movie))
search = tmdbsimple.Search()
search.movie(query=movie, language=self.config['tmdb-language'])
logging.info("Found {}".format(search.results))
if len(search.results):
logging.info("Found '{}'".format(
search.results[0]['title']
))
return search.results[0]
else:
logging.warning("'{}' not found on TMDB!".format(movie))
return []


@@ -124,4 +153,7 @@ if __name__ == '__main__':
format=' %(asctime)s - %(levelname)s - %(message)s'
)
fmp = FreeboxMoviePlanner()
fmp.printAllMovies(fmp.getAllMovies())
fmp.scapeAllMovies()
fmp.findMoviesOnTMDB()
fmp.filterBadRatings()
fmp.printAllMovies()

Loading…
Cancel
Save