eit.py

Wed, 29 Nov 2017 23:34:51 +0100

author
mdd
date
Wed, 29 Nov 2017 23:34:51 +0100
changeset 22
c18abd9198c0
parent 17
842120f00078
permissions
-rw-r--r--

implemented Levenshtein algorithm for incredible speedup

#!/usr/bin/python
# -*- coding: utf-8 -*-
# iso-8859-2
"""
EitSupport
Copyright (C) 2011 betonme
Copyright (C) 2016 Wolfgang Fahl
Cleanup 2017 by mdd
"""
# This EITParser is based on:
# https://github.com/betonme/e2openplugin-EnhancedMovieCenter/blob/master/src/EitSupport.py
#
# In case of reuse of this source code please do not remove this copyright.
#
#   This program is free software: you can redistribute it and/or modify
#   it under the terms of the GNU General Public License as published by
#   the Free Software Foundation, either version 3 of the License, or
#   (at your option) any later version.
#
#   This program is distributed in the hope that it will be useful,
#   but WITHOUT ANY WARRANTY; without even the implied warranty of
#   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#   GNU General Public License for more details.
#
#   For more information on the GNU General Public License see:
#   <http://www.gnu.org/licenses/>.
#

# seite 36, inhalt der for schleife!
# https://www.dvb.org/resources/public/standards/a38_dvb-si_specification.pdf

#pylint: disable=missing-docstring
#pylint: disable=line-too-long

import os
import struct
import sys
import getopt

from datetime import datetime

from ISO639 import LanguageCodes

#def crc32(data):
#   poly = 0x4c11db7
#   crc = 0xffffffffL
#   for byte in data:
#       byte = ord(byte)
#       for bit in range(7,-1,-1):  # MSB to LSB
#           z32 = crc>>31    # top bit
#           crc = crc << 1
#           if ((byte>>bit)&1) ^ z32:
#               crc = crc ^ poly
#           crc = crc & 0xffffffffL
#   return crc

EIT_SHORT_EVENT_DESCRIPTOR = 0x4d
EIT_EXTENDED_EVENT_DESCRIPOR = 0x4e

CHARSPEC_HR = {
    u'Ć': u'\u0106', u'æ': u'\u0107', u'®': u'\u017D', u'¾': u'\u017E',
    u'©': u'\u0160', u'¹': u'\u0161', u'Č': u'\u010C', u'è': u'\u010D', u'ð': u'\u0111'
}

CHARSPEC_CZSK = {
    u'Ï'+u'C': u'Č', u'Ï'+u'E': u'Ě', u'Ï'+u'L': u'Ľ', u'Ï'+u'N': u'Ň', u'Ï'+u'R': u'Ř',
    u'Ï'+u'S': u'Š', u'Ï'+u'T': u'Ť', u'Ï'+u'Z': u'Ž', u'Ï'+u'c': u'č', u'Ï'+u'd': u'ď',
    u'Ï'+u'e': u'ě', u'Ï'+u'l': u'ľ', u'Ï'+u'n': u'ň', u'Ï'+u'r': u'ř', u'Ï'+u's': u'š',
    u'Ï'+u't': u'ť', u'Ï'+u'z': u'ž', u'Ï'+u'D': u'Ď', u'Â'+u'A': u'Á', u'Â'+u'E': u'É',
    u'Â'+u'I': u'Í', u'Â'+u'O': u'Ó', u'Â'+u'U': u'Ú', u'Â'+u'a': u'á', u'Â'+u'e': u'é',
    u'Â'+u'i': u'í', u'Â'+u'o': u'ó', u'Â'+u'u': u'ú', u'Â'+u'y': u'ý', u'Ã'+u'o': u'ô',
    u'Ã'+u'O': u'Ô', u'Ê'+u'u': u'ů', u'Ê'+u'U': u'Ů', u'È'+u'A': u'Ä', u'È'+u'E': u'Ë',
    u'È'+u'I': u'Ï', u'È'+u'O': u'Ö', u'È'+u'U': u'Ü', u'È'+u'Y': u'Ÿ', u'È'+u'a': u'ä',
    u'È'+u'e': u'ë', u'È'+u'i': u'ï', u'È'+u'o': u'ö', u'È'+u'u': u'ü', u'È'+u'y': u'ÿ'
}

def convert_charspec_hr(text):
    for i, j in CHARSPEC_HR.iteritems():
        text = text.replace(i, j)
    return text

def convert_charspec_czsk(text):
    for i, j in CHARSPEC_CZSK.iteritems():
        text = text.replace(i, j)
    return text

def parse_mjd(mjd):
    """Parse 16 bit unsigned int containing Modified Julian Date,
    as per DVB-SI spec
    returning year,month,day"""
    year = int((mjd - 15078.2) / 365.25)
    month = int((mjd - 14956.1 - int(year * 365.25)) / 30.6001)
    day = mjd - 14956 - int(year * 365.25) - int(month * 30.6001)
    correction = 0
    if month == 14 or month == 15:
        correction = 1
    return (1900 + year + correction), (month - 1 - correction * 12), day

def bcd2dec(byte):
    return (byte >> 4) * 10 + (byte & 0xf)


def mkint(data):
    """
    Convert string to Integer
    """
    return int(data) if data else 0

def todate(sdate, stime):
    """
    Convert date and time to datetime tuple
    """
    if sdate and stime:
        try:
            return datetime(
                int(sdate[0]), int(sdate[1]), int(sdate[2]),
                int(stime[0]), int(stime[1]))
        except ValueError:
            return None
    else:
        return None

def cleanstring(data):
    """remove nonprintable chars from short desc
    """
    for char in ['\x10', '\x00', '\x02', '\x15']:
        data = data.replace(char, '')
    return data

def language_iso639_2to3(alpha2):
    ret = alpha2
    if alpha2 in LanguageCodes:
        language = LanguageCodes[alpha2]
        for alpha, name in LanguageCodes.items():
            if name == language:
                if len(alpha) == 3:
                    return alpha
    return ret

class eitinfo(object):
    """Eit File support class
    Description
    http://de.wikipedia.org/wiki/Event_Information_Table
    """
    def __init__(self, path=None):
        self.eit_file = None

        self.eit = {}
        #self.iso = None

        self.load(path)

    def load(self, path):
        self.eit = {}
        self.eit_file = None
        if path:
            self.eit_file = path
            self._read_file()

    def get_genre(self):
        return self.eit.get('genre', "")

    def get_components(self):
        return self.eit.get('components', "")

    def get_startdate(self):
        return self.eit.get('startdate', "")

    def get_starttime(self):
        return self.eit.get('starttime', "")

    def get_duration(self):
        return self.eit.get('duration', "")

    def get_name(self):
        return self.eit.get('name', "").strip()

    def get_description(self):
        return self.eit.get('description', "").strip()

    def get_duration_seconds(self):
        length = self.eit.get('duration', "")
        if len(length) > 2:
            return mkint((length[0] * 60 + length[1]) * 60 + length[2])
        elif len(length) > 1:
            return mkint(length[0] * 60 + length[1])
        else:
            return mkint(length)

    def get_date(self):
        return todate(self.get_startdate(), self.get_starttime())

    def dump(self):
        """Module docstring.
        Read Eit File and show the information.
        """
        if len(self.eit) == 0:
            return None
        out = "Movie name: %s" % self.get_name()
        out += "\nGenre: %s" % self.get_genre()
        out += "\nComponents: %s" % self.get_components()
        out += "\nStartDate: %s" % self.get_date()
        out += "\nDescription: %s" % self.get_description()
        out += "\nDuration: %02i:%02i:%02i" % self.get_duration()
        out += " (%s minutes)" % (self.get_duration_seconds() / 60)

        print out
        return out

    ##############################################################################
    ## File IO Functions
    def _read_file(self):
        data = ""
        path = self.eit_file

        lang = language_iso639_2to3("de")

        if path and os.path.exists(path):
            print "Reading Event Information Table " + str(path)

            # Read data from file
            fd = None
            try:
                fd = open(path, 'rb')
                #lines = f.readlines()
                data = fd.read()
            except Exception, err:
                print "[META] Exception in readEitFile: " + str(err)
            finally:
                if fd is not None:
                    fd.close()

            # Parse the data
            if data and 12 <= len(data):
                # go through events
                pos = 0
                e = struct.unpack(">HHBBBBBBH", data[pos:pos + 12])
                event_id = e[0]
                date = parse_mjd(e[1]) # Y, M, D
                time = bcd2dec(e[2]), bcd2dec(e[3]), bcd2dec(e[4]) # HH, MM, SS
                duration = bcd2dec(e[5]), bcd2dec(e[6]), bcd2dec(e[7]) # HH, MM, SS
                #running_status = (e[8] & 0xe000) >> 13
                #free_CA_mode = e[8] & 0x1000
                descriptors_len = e[8] & 0x0fff

                #if running_status in [1, 2]:
                #    self.eit['when'] = "NEXT"
                #elif running_status in [3, 4]:
                #    self.eit['when'] = "NOW"

                self.eit['startdate'] = date
                self.eit['starttime'] = time
                self.eit['duration'] = duration

                pos = pos + 12
                short_event_descriptor = []
                short_event_descriptor_multi = []
                extended_event_descriptor = []
                extended_event_descriptor_multi = []
                component_descriptor = []
                content_descriptor = []
                linkage_descriptor = []
                parental_rating_descriptor = []
                endpos = len(data) - 1
                while pos < endpos:
                    rec = ord(data[pos])
                    length = ord(data[pos + 1]) + 2
                    if rec == 0x4D:
                        descriptor_tag = ord(data[pos + 1])
                        descriptor_length = ord(data[pos + 2])
                        ISO_639_language_code = str(data[pos + 3:pos + 5])
                        event_name_length = ord(data[pos + 5])
                        short_event_description = cleanstring(data[pos + 6:pos + 6 + event_name_length])

                        tmp_length = ord(data[pos + 6 + event_name_length])
                        self.eit['genre'] = cleanstring(data[pos + 7 + event_name_length:pos + 7 + tmp_length + event_name_length])

                        if ISO_639_language_code == lang:
                            short_event_descriptor.append(short_event_description)
                        short_event_descriptor_multi.append(short_event_description)
                    elif rec == 0x4E:
                        ISO_639_language_code = str(data[pos + 3:pos + 5])
                        extended_event_description = ""
                        extended_event_description_multi = ""
                        for i in range(pos + 8, pos + length):
                            if str(ord(data[i])) == "138":
                                extended_event_description += '\n'
                                extended_event_description_multi += '\n'
                            elif data[i] not in ['\x10', '\x00', '\x02', '\x15']:
                                extended_event_description += data[i]
                                extended_event_description_multi += data[i]
                        if ISO_639_language_code == lang:
                            extended_event_descriptor.append(extended_event_description)
                        extended_event_descriptor_multi.append(extended_event_description)
                    elif rec == 0x50:
                        #tmp_type = ord(data[pos + 3:pos + 4])
                        #print "type: %x" % tmp_type
                        component_descriptor.append(cleanstring(data[pos + 8:pos + length]))
                    elif rec == 0x54:
                        content_descriptor.append(cleanstring(data[pos + 8:pos + length]))
                    elif rec == 0x4A:
                        linkage_descriptor.append(cleanstring(data[pos + 8:pos + length]))
                    elif rec == 0x55:
                        parental_rating_descriptor.append(cleanstring(data[pos + 2:pos + length]))
                    else:
                        print "unsupported descriptor: %x %x" % (rec, pos + 12)
                        print data[pos:pos + length]

                    pos += length

                self.eit['components'] = ", ".join(component_descriptor)



                # Very bad but there can be both encodings
                # User files can be in cp1252
                # Is there no other way?
                if short_event_descriptor:
                    short_event_descriptor = "".join(short_event_descriptor)
                else:
                    short_event_descriptor = "".join(short_event_descriptor_multi)
                if short_event_descriptor:
                    #try:
                    #   short_event_descriptor = short_event_descriptor.decode("iso-8859-1").encode("utf-8")
                    #except UnicodeDecodeError:
                    #   pass
                    try:
                        short_event_descriptor.decode('utf-8')
                    except UnicodeDecodeError:
                        try:
                            short_event_descriptor = short_event_descriptor.decode("cp1252").encode("utf-8")
                        except UnicodeDecodeError:
                            # do nothing, otherwise cyrillic wont properly displayed
                            #short_event_descriptor = short_event_descriptor.decode("iso-8859-1").encode("utf-8")
                            pass
                        if (lang == "cs") or (lang == "sk"):
                            short_event_descriptor = str(convert_charspec_czsk(short_event_descriptor))
                        if lang == "hr":
                            short_event_descriptor = str(convert_charspec_hr(short_event_descriptor))
                self.eit['name'] = short_event_descriptor

                # Very bad but there can be both encodings
                # User files can be in cp1252
                # Is there no other way?
                if extended_event_descriptor:
                    extended_event_descriptor = "".join(extended_event_descriptor)
                else:
                    extended_event_descriptor = "".join(extended_event_descriptor_multi)
                if extended_event_descriptor:
                    #try:
                    #   extended_event_descriptor = extended_event_descriptor.decode("iso-8859-1").encode("utf-8")
                    #except UnicodeDecodeError:
                    #   pass
                    try:
                        extended_event_descriptor.decode('utf-8')
                    except UnicodeDecodeError:
                        try:
                            extended_event_descriptor = extended_event_descriptor.decode("cp1252").encode("utf-8")
                        except UnicodeDecodeError:
                            # do nothing, otherwise cyrillic wont properly displayed
                            #extended_event_descriptor = extended_event_descriptor.decode("iso-8859-1").encode("utf-8")
                            pass
                        if (lang == "cs") or (lang == "sk"):
                            extended_event_descriptor = str(convert_charspec_czsk(extended_event_descriptor))
                        if lang == "hr":
                            extended_event_descriptor = str(convert_charspec_hr(extended_event_descriptor))
                self.eit['description'] = extended_event_descriptor

            else:
                # No data clear all
                self.eit = {}



def main():
    # parse command line options
    try:
        opts, args = getopt.getopt(sys.argv[1:], "h", ["help"])
    except getopt.error, msg:
        print msg
        print "for help use --help"
        sys.exit(2)
    # process options
    for o, a in opts:
        if o in ("-h", "--help"):
            print __doc__
            sys.exit(0)
    # process arguments
    info = eitinfo()
    for arg in args:
        info.load(arg)
        info.dump()

if __name__ == "__main__":
    main()

mercurial