Source code for simple_report.xlsx.spreadsheet_ml

# coding: utf-8
'''
Created on 24.11.2011

@author: prefer
'''

import re
import os

from lxml.etree import QName, tostring, fromstring
from simple_report.core import XML_DEFINITION
from simple_report.core.xml_wrap import OpenXMLFile, ReletionOpenXMLFile, ReletionTypes, CommonProperties
from simple_report.core.shared_table import SharedStringsTable
from simple_report.core.exception import SectionNotFoundException, SectionException, SheetNotFoundException
from simple_report.utils import get_addr_cell
from simple_report.xlsx.cursor import Cursor
from simple_report.xlsx.section import Section, SheetData


[docs]class Comments(OpenXMLFile):
    u"""
    Комментарии в XLSX
    """
    NS = "http://schemas.openxmlformats.org/spreadsheetml/2006/main"
    NS_XDR = "http://schemas.openxmlformats.org/drawingml/2006/spreadsheetDrawing"

    section_pattern = re.compile(u'[\+|\-]+[A-Za-zА-яА-я0-9_]+')

    def __init__(self, sheet_data, *args, **kwargs):
        """
        
        :param sheet_data: данные для листа 
        :type sheet_data: xlsx.section.SheetData
        """
        super(Comments, self).__init__(*args, **kwargs)

        assert isinstance(sheet_data, SheetData)
        self._sheet_data = sheet_data

        self.sections = {}

        self.comment_list = self._root.find(QName(self.NS, 'commentList'))
        self._create_section()

        # Проверка, правильно ли указаны секции и есть ли конец секции
        self._test_sections()


    def _test_sections(self):
        u"""
        Проверка секций на валидность
        """
        for section_object in self.sections.values():
            if not section_object.name or not section_object.begin or not section_object.end:
                raise ValueError("Bad section: %s" % section_object)

    def _parse_sections(self, comment_list):
        """
        Распознаем секции
        
        :param comment_list: Список комментариев
        :type comment_list: iterable
        """

        for comment in comment_list:
            cell = comment.get('ref')
            for text in comment:
                for r in text:
                    for t in r.findall(QName(self.NS, 't')):
                        yield t.text, cell


    def _create_section(self):
        u"""
        Создание объектов секций из комментариев
        """

        map(self._add_section, self._parse_sections(self.comment_list))

    def _add_section(self, values):
        text = values[0]
        cell = values[1]

        values = self.section_pattern.findall(text)
        addr = get_addr_cell(cell)
        for value in values:
            section_name = self._get_name_section(value)

            # Такой объект должен быть
            if value.startswith('-'):
                # Такой элемент уже должен быть
                if not self.sections.get(section_name):
                    raise SectionException('Start section "%s" not found' % section_name)

                section = self.sections[section_name]

                # Второго конца быть не может
                if section.end:
                    raise SectionException('For section "%s" more than one ending tag' % section_name)

                section.end = addr
            else:
                # Второго начала у секции быть не может
                if self.sections.get(section_name):
                    raise SectionException('For section "%s" more than one beging tag' % section_name)

                self.sections[section_name] = Section(self._sheet_data, section_name, begin=addr)

    def _get_name_section(self, text):
        """
        Возвращает из наименования ++A - название секции
        
        :param text: комментарий
        :type text: basestring
        """
        for i, s in enumerate(text):
            if s.isalpha():
                return text[i:]
        else:
            raise SectionException('Section name bad format "%s"' % text)

[docs]    def get_section(self, section_name):
        """
        Получение секции по имени
        
        :param section_name:
        :type section_name:
        """
        try:
            section = self.sections[section_name]
        except KeyError:
            raise SectionNotFoundException('Section "%s" not found' % section_name)
        else:
            return section

[docs]    def get_sections(self):
        u"""
        Получение всех секций
        """
        return self.sections

    @classmethod
[docs]    def create(cls, cursor, *args, **kwargs):
        return cls(cursor, *args, **kwargs)

[docs]    def build(self):
        u"""
        Сборка файла с комментариями, предварительно удалив их
        """
        if len(self.comment_list) > 0:
            self.comment_list.clear()

        with open(self.file_path, 'w') as f:
            f.write(XML_DEFINITION + tostring(self._root))


[docs]class SharedStrings(OpenXMLFile):
    u"""
    XML-файл с общими строками, на каждую из которых могут ссылаться из
    других xml-файлов XLSX
    """
    NS = "http://schemas.openxmlformats.org/spreadsheetml/2006/main"

    def __init__(self, *args, **kwargs):
        super(SharedStrings, self).__init__(*args, **kwargs)
        self.table = SharedStringsTable(self._root)

[docs]    def build(self):
        u"""
        Сборка файла
        """
        new_root = self.table.to_xml()
        with open(self.file_path, 'w') as f:
            f.write(XML_DEFINITION + tostring(new_root))


[docs]class WorkbookSheet(ReletionOpenXMLFile):
    u"""
    Лист книги документа в формате XLSX
    """

    NS = 'http://schemas.openxmlformats.org/spreadsheetml/2006/main'
    NS_R = 'http://schemas.openxmlformats.org/officeDocument/2006/relationships'


    def __init__(self, shared_table, tags, name, sheet_id,
            *args, **kwargs):
        """
        
        :param shared_table:
        :type shared_table:
        :param tags:
        :type tags:
        :param name:
        :type name:
        :param sheet_id:
        :type sheet_id:
        """
        super(WorkbookSheet, self).__init__(*args, **kwargs)
        self.name = name
        self.sheet_id = sheet_id

        # Первый элемент: начало вывода по вертикали, второй по горизонтали
        self.sheet_data = SheetData(self._root,
            cursor=Cursor(),
            ns=self.NS,
            shared_table=shared_table,
            tags=tags,
            )

        self.drawing, self.comments = self.walk_reletion()


[docs]    def walk_reletion(self):
        """
        """
        drawing = comments = None
        if not self._reletion_root is None:
            for elem in self._reletion_root:
                param = (elem.attrib['Id'], elem.attrib['Target'])
                if elem.attrib['Type'] == ReletionTypes.DRAWING:
                    drawing = self._get_drawing(*param)

                elif elem.attrib['Type'] == ReletionTypes.COMMENTS:
                    comments = self._get_comment(*param)

        return drawing, comments


    def _get_comment(self, rel_id, target):
        u"""
        Получение объекта комментария
        """
        return Comments.create(self.sheet_data, rel_id, *self._get_path(target))

    def _get_drawing(self, rel_id, target):
        u"""
        Unused
        """

    def __str__(self):
        res = [u'Sheet name "{0}":'.format(self.name)]
        if self.comments:
            for section in self.sections:
                res.append(u'\t %s' % section)
        return u'\n'.join(res).encode('utf-8')


    def __repr__(self):
        return self.__str__()

    @property
[docs]    def sections(self):
        return self.comments.get_sections()


[docs]    def get_section(self, name):
        u"""
        Получение секции по имени
        """
        return self.comments.get_section(name)

[docs]    def get_sections(self):
        u"""
        Получение всех секций
        """
        return self.sections

[docs]    def build(self):
        u"""
        Сборка xml-файла
        """
        new_root = self.sheet_data.new_sheet()

        with open(self.file_path, 'w') as f:
            f.write(XML_DEFINITION + tostring(new_root))

        if self.comments:
            self.comments.build()

[docs]    def get_rowbreaks(self):
        return self.sheet_data.get_rowbreaks()

[docs]    def get_colbreaks(self):
        return self.sheet_data.get_colbreaks()


[docs]class WorkbookStyles(OpenXMLFile):
    """
    Unused
    """


[docs]class Workbook(ReletionOpenXMLFile):
    u"""
    Книга в формате XLSX
    """
    NS = "http://schemas.openxmlformats.org/spreadsheetml/2006/main"
    NS_R = "http://schemas.openxmlformats.org/officeDocument/2006/relationships"


    def __init__(self, tags, *args, **kwargs):
        """
        
        :param tags:
        :type tags:
        """
        super(Workbook, self).__init__(*args, **kwargs)

        self.tags = tags

        (
            self.workbook_style,
            tmp_sheets,
            self.shared_strings,
            self.calc_chain
                                ) = self.walk_reletions()
        self.sheets = self.walk(tmp_sheets)

        if self.sheets:
            # По-умолчанию активным считается первый лист
            self._active_sheet = self.sheets[0]
        else:
            raise SheetNotFoundException('Sheets not found')

[docs]    def walk_reletions(self):
        u"""
        
        """
        workbook_style = shared_strings = calc_chain = None
        sheets = {}
        for elem in self._reletion_root:
            param = (elem.attrib['Id'], elem.attrib['Target'])
            if elem.attrib['Type'] == ReletionTypes.WORKBOOK_STYLE:
                workbook_style = self._get_style(*param)

            elif elem.attrib['Type'] == ReletionTypes.WORKSHEET:
                sheets[elem.attrib['Id']] = elem.attrib['Target']

            elif elem.attrib['Type'] == ReletionTypes.SHARED_STRINGS:
                shared_strings = self._get_shared_strings(*param)
            elif elem.attrib['Type'] == ReletionTypes.CALC_CHAIN:
                calc_chain = self._get_calc_chain(*param)
                self._reletion_root.remove(elem)

        return workbook_style, sheets, shared_strings, calc_chain

[docs]    def walk(self, sheet_reletion):
        """
        """
        sheets = []
        sheets_elem = self._root.find(QName(self.NS, 'sheets'))
        for sheet_elem in sheets_elem:
            name = sheet_elem.attrib['name']
            sheet_id = sheet_elem.attrib['sheetId']
            # state = sheet_elem.attrib['state'] -- В win файле нет такого свойства

            rel_id = sheet_elem.attrib.get(QName(self.NS_R, 'id'))
            target = sheet_reletion[rel_id]
            sheet = self._get_worksheet(rel_id, target, name, sheet_id)
            sheets.append(sheet)

        return sheets

    def _get_style(self, _id, target):
        """
        Unused
        """

    def _get_worksheet(self, rel_id, target, name, sheet_id):
        """
        Получение листа книги
        :param rel_id:
        :type rel_id:
        :param target:
        :type target:
        :param name:
        :type name:
        :param sheet_id:
        :type sheet_id:
        """
        worksheet = WorkbookSheet.create(self.shared_table, self.tags,
            name, sheet_id, rel_id, *self._get_path(target))
        return worksheet

    def _get_shared_strings(self, _id, target):
        """
        Получение общих строк
        
        :param _id: идентификатор строки
        :type _id:
        :param target:
        :type target:
        """

        return SharedStrings.create(_id, *self._get_path(target))

    def _get_calc_chain(self, _id, target):
        return CalcChain.create(_id, *self._get_path(target))

[docs]    def get_section(self, name):
        """
        """
        return self._active_sheet.get_section(name)

[docs]    def get_sections(self):
        """
        """
        return self._active_sheet.get_sections()

    @property
    def active_sheet(self):
        return self._active_sheet

    @active_sheet.setter
[docs]    def active_sheet(self, value):
        assert isinstance(value, int)
        self._active_sheet = self.sheets[value]

[docs]    def build(self):
        """
        """
        map(lambda x: x.build(), self.sheets)

        self.shared_strings.build()
        if self.calc_chain:
            self.calc_chain.build()

    @property
[docs]    def shared_table(self):
        return self.shared_strings.table

[docs]    def get_rowbreaks(self):
        return self._active_sheet.get_rowbreaks()

[docs]    def get_colbreaks(self):
        return self._active_sheet.get_colbreaks()

[docs]class CommonPropertiesXLSX(CommonProperties):
    """

    """

    def _get_app_common(self, _id, target):
        """
        """
        return Workbook.create(self.tags, _id, *self._get_path(target))


[docs]class CalcChain(OpenXMLFile):
    """
    Цепочка вычислений. Указывает порядок вычислений в ячейках а также
    является кешем значений.
    (http://stackoverflow.com/questions/9004848/working-with-office-open-xml-just-how-hard-is-it)
    Поскольку довольно сложно в автоматическом режиме указывать порядок
    вычисления, просто удаляем файл + ссылки на него.
    Еще 1 плюс такого подхода - больше не должна повторяться ошибка при
    открытии файла в MS Office 2007/2010, шаблон которого был сохранен
    то в Libre/Openoffice, то в MS Office
    """

    NS = "http://schemas.openxmlformats.org/spreadsheetml/2006/main"

[docs]    def build(self):
        """
        Удаляем файл с цепочкой вычислений
        """
        if os.path.exists(self.file_path):
            os.remove(self.file_path)

    @classmethod
[docs]    def from_file(cls, file_path):
        assert file_path
        # if os.path.exists(file_path):
        #     with open(file_path) as f:
        #         return parse(f).getroot()
        # else:
        return fromstring("<fake_root/>")