Browse Source

Code refactoring: separate scraper and movie objects

master
djib 2 years ago
parent
commit
1ff676b8a4
  1. 182
      FreeboxMoviePlanner.py

182
FreeboxMoviePlanner.py

@ -16,105 +16,134 @@ from bs4 import BeautifulSoup
from collections import deque
class FreeboxMoviePlanner:
class Movie:
def __init__(self):
self.day = ''
self.title = ''
self.genre = ''
self.channel = ''
self.rating = ''
self.original_title = ''
self.overview = ''
self.good = False
self.tmdb_id = ''
self.url = ''
def __str__(self):
return '{}: {} - {} ({})\n TMDB: {} - {}\n @ {}\n {}'.format(
'Today' if self.day == '' else self.day,
self.title,
self.genre,
self.channel,
self.rating,
self.original_title,
self.url,
self.overview
)
def __repr__(self):
return 'Movie <{}({})>'.format(self.title, self.rating)
class TVGuideScraper:
TV_GUIDE_URL = 'https://www.programme-television.org/{}?bouquet=tnt'
def __init__(self):
logging.info('Opening config file: config.json')
with open('config.json') as config_file:
self.config = json.load(config_file)
tmdbsimple.API_KEY = self.config['tmdb-api']
@staticmethod
def getMovies(day=''):
logging.info('Connecting to {}'.format(TVGuideScraper.TV_GUIDE_URL))
r = requests.get(TVGuideScraper.TV_GUIDE_URL.format(day))
r.raise_for_status()
html = BeautifulSoup(r.text, 'html.parser')
movies = []
for channel in html.select('.bloc_cnt'):
if len(channel.select('em')):
for movietag in channel.find_all(TVGuideScraper._tag_is_film):
movie = Movie()
movie.title = \
movietag.select('.texte_titre a')[0]['title']
movie.genre = movietag.select('.texte_cat a')[0].string
movie.channel = channel.select('em')[0]\
.string.replace('Programme ', '')
movie.day = day.title()
logging.info('Found movie: {0!r}'.format(movie))
movies.append(movie)
return movies
@staticmethod
def _tag_is_film(tag):
"""
Helper to check if a tag is a film
"""
return (
tag.has_attr('data-nature')
and
tag['data-nature'] == 'films-telefilms'
)
@staticmethod
def _printMovie(movie):
print('{} - {} ({})'.format(
movie['title'],
movie['genre'],
movie['channel']
))
print(' TMDB: {} - {}\n {}'.format(
movie['rating'],
movie['original_title'],
movie['overview'],
))
def printAllMovies(self, movies):
for day, movies in movies.items():
print('=== {}'.format(day.title()))
for movie in movies:
FreeboxMoviePlanner._printMovie(movie)
def getAllMovies(self):
class FreeboxMoviePlanner:
def __init__(self):
logging.info('Opening config file: config.json')
with open('config.json') as config_file:
self.config = json.load(config_file)
tmdbsimple.API_KEY = self.config['tmdb-api']
self.movies = []
def __repr__(self):
result = 'FreeboxMoviePlanner <Movies:\n'
for movie in self.movies:
result += ' {!r}\n'.format(movie)
result += '>'
return result
def printAllMovies(self):
for movie in self.movies:
print(movie)
print()
def scapeAllMovies(self):
days = deque(['lundi', 'mardi', 'mercredi',
'jeudi', 'vendredi', 'samedi', 'dimanche'])
offset = datetime.datetime.today().weekday()
days.rotate(-1-offset)
days.appendleft('')
movies = {}
for day in days:
movies[day] = self.getMovies(day)
logging.info('Found the following movies: {}'.format(movies))
return movies
self.movies += TVGuideScraper.getMovies(day)
logging.info('Found the following movies: {}'.format(self.movies))
def getMovies(self, day=''):
logging.info('Connecting to {}'.format(self.TV_GUIDE_URL))
r = requests.get(self.TV_GUIDE_URL.format(day))
r.raise_for_status()
html = BeautifulSoup(r.text, 'html.parser')
movies = []
for channel in html.select('.bloc_cnt'):
if len(channel.select('em')):
for movie in channel.find_all(
FreeboxMoviePlanner._tag_is_film):
movie_title = movie.select('.texte_titre a')[0]['title']
thismovie = {}
thismovie['title'] = movie_title
thismovie['genre'] = movie.select('.texte_cat a')[0].string
thismovie['channel'] = channel.select('em')[0]\
.string.replace('Programme ', '')
logging.info('Found movie: {}'.format(thismovie))
tmdb_details = self._getMovieRating(movie_title)
if not tmdb_details:
logging.warning(
'No TMDB match for {}'.format(movie_title)
)
continue
thismovie['rating'] = tmdb_details['vote_average']
thismovie['original_title'] = \
tmdb_details['original_title']
thismovie['overview'] = '\n '.join(textwrap.wrap(
tmdb_details['overview'], 75)
)
if(
float(tmdb_details['vote_average'])
< self.config['minimum-rating']
):
logging.warning(
'Bad rating ({}), skipping {}'.format(
tmdb_details['vote_average'], movie_title))
else:
movies.append(thismovie)
return movies
def findMoviesOnTMDB(self):
for movie in self.movies:
tmdb_details = self._findMovieOnTMDB(movie.title)
if tmdb_details:
movie.rating = tmdb_details['vote_average']
movie.original_title = \
tmdb_details['original_title']
movie.overview = '\n '.join(textwrap.wrap(
tmdb_details['overview'], 75)
)
movie.tmdb_id = tmdb_details['id']
movie.good = \
float(movie.rating) >= self.config['minimum-rating']
movie.url = 'https://www.themoviedb.org/movie/{}?language={}' \
.format(movie.tmdb_id, self.config['tmdb-language'])
def filterBadRatings(self):
self.movies = [movie for movie in self.movies if movie.good]
def _getMovieRating(self, movie):
def _findMovieOnTMDB(self, movie):
logging.info("Searching for '{}' on TMDB".format(movie))
search = tmdbsimple.Search()
search.movie(query=movie, language=self.config['tmdb-language'])
logging.info("Found {}".format(search.results))
if len(search.results):
logging.info("Found '{}'".format(
search.results[0]['title']
))
return search.results[0]
else:
logging.warning("'{}' not found on TMDB!".format(movie))
return []
@ -124,4 +153,7 @@ if __name__ == '__main__':
format=' %(asctime)s - %(levelname)s - %(message)s'
)
fmp = FreeboxMoviePlanner()
fmp.printAllMovies(fmp.getAllMovies())
fmp.scapeAllMovies()
fmp.findMoviesOnTMDB()
fmp.filterBadRatings()
fmp.printAllMovies()
Loading…
Cancel
Save