Source code for simple_report.docx.wordprocessing_ml

#!coding:utf-8

import copy
import os
from lxml.etree import tostring, SubElement
from simple_report.core import XML_DEFINITION
from simple_report.core.exception import (
    SectionNotFoundException, SectionException)
from simple_report.core.xml_wrap import (
    ReletionOpenXMLFile, CommonProperties, OpenXMLFile)
from simple_report.docx.drawing import DocxImage, insert_image

__author__ = 'prefer'


[docs]class Wordprocessing(ReletionOpenXMLFile): """ Основной файл формата DOCX """ NS_W = 'http://schemas.openxmlformats.org/wordprocessingml/2006/main' # Узел контекста document # .// рекурсивно спускаемся к потомкам в поисках # <ns:p><ns:r><ns:t></ns:t></ns:r></ns:p> XPATH_QUERY = './/{0}:p/{0}:r/{0}:t' TABLES_QUERY = './/{0}:tbl' TABLE_TEXT_NODE_QUERY = './/{0}:tc/{0}:p/{0}:r/{0}:t' def __init__(self, tags, *args, **kwargs): super(Wordprocessing, self).__init__(*args, **kwargs) self.tags = tags self.table_sections = {}
[docs] def build(self): """ Сборка файла """ with open(self.file_path, 'w') as f: f.write(XML_DEFINITION + tostring(self._root))
[docs] def set_params(self, params): """ Подстановка параметров :param params: параметры подстановки :result: None """ # self.merge_same_nodes() text_nodes = self._root.xpath( self.XPATH_QUERY.format('w'), namespaces={'w': self.NS_W} ) self._set_params(text_nodes, params, self.doc_rels)
[docs] def get_signature(self, node): signature = [] for subnode in list(node): if subnode.tag != '{%s}lang' % self.NS_W: signature.append((subnode.tag, sorted(subnode.items()))) return signature
[docs] def merge_same_nodes(self): """ Слияние одинаковых нод - нужно, т.к. редакторы DOCX привносят свои специфичные изменения, которые нам не нужны :result: """ paragraphs = list(self._root.xpath( './/{0}:p'.format('w'), namespaces={'w': self.NS_W} )) t_tag = '{%s}t' % self.NS_W r_tag = '{%s}r' % self.NS_W rpr_tag = '{%s}rPr' % self.NS_W tab_tag = '{%s}tab' % self.NS_W for paragraph in paragraphs: par_nodes = list(paragraph) old_signature = None signature = None old_node = None for par_node in par_nodes: if par_node.tag != r_tag: old_node = None continue for node in list(par_node): if node.tag == rpr_tag: old_signature = signature signature = self.get_signature(node) elif node.tag == tab_tag: old_node = None elif node.tag == t_tag: if old_node is not None and old_signature == signature: # delete r node old_node[1].text = old_node[1].text + node.text #old_node = (par_node, node) paragraph.remove(par_node) else: old_node = (par_node, node)
@classmethod def _set_params(cls, text_nodes, params, doc_rels=None): def sorting_key((k, v)): if not isinstance(k, basestring): return 1 return -len(k) for node in text_nodes: for key_param, value in sorted(params.items(), key=sorting_key): if key_param in node.text: # if len(node.text) > 0 and node.text[0] == '#' and # node.text[-1] == '#': text_to_replace = '#%s#' % key_param if text_to_replace in node.text: if isinstance(value, DocxImage): insert_image( node, value, text_to_replace, doc_rels) else: node.text = node.text.replace( text_to_replace, unicode(value)) else: node.text = node.text.replace( key_param, unicode(value))
[docs] def get_all_parameters(self): """ Получение всех параметров """ text_nodes = self._root.xpath( self.XPATH_QUERY.format('w'), namespaces={'w': self.NS_W}) for node in text_nodes: if ( len(node.text) > 0 and node.text[0] == '#' and node.text[-1] == '#' ): yield node.text
[docs] def get_tables(self): """ Получаем таблицы в DOCX """ return self._root.findall( self.TABLES_QUERY.format('w'), namespaces={'w': self.NS_W} )
[docs] def set_docx_table_sections(self): """ установка секций таблиц в документах DOCX :result: None """ tables = self.get_tables() for table in tables: text_nodes = table.findall( '{0}:tr'.format('w'), namespaces={'w': self.NS_W} ) section = Section(table) section_name = None rows_to_delete = [] for row_node in text_nodes: col_nodes = row_node.findall( self.TABLE_TEXT_NODE_QUERY.format('w'), namespaces={'w': self.NS_W} ) if not col_nodes: continue col_nodes_text = u''.join(x.text for x in col_nodes) if col_nodes_text and col_nodes_text[:2] == '#!': section_name = col_nodes_text[2:] if section_name in self.table_sections: raise SectionException( ('Section named {0} has been found ' 'more than 1 time in docx table').format( section_name ) ) rows_to_delete.append(row_node) # del text_nodes[text_nodes.index(row_node)] elif col_nodes_text and col_nodes_text[-2:] == '!#': self.table_sections[section_name] = section section_name = None section = Section(table) rows_to_delete.append(row_node) elif section_name: section.append(copy.copy(row_node)) # del text_nodes[text_nodes.index(row_node)] rows_to_delete.append(row_node) for row_node in rows_to_delete: row_node.getparent().remove(row_node)
[docs] def get_section(self, section_name): """ Получение секции таблицы в документе DOCX по имени :param section_name: имя секции :result: секция """ if not self.table_sections: self.set_docx_table_sections() section = self.table_sections.get(section_name) section.doc_rels = self.doc_rels if section is None: raise SectionNotFoundException( 'Section named {0} has not been found'.format(section_name) ) return section
[docs]class DocumentRelsXMLFile(OpenXMLFile): NS = "" FILENAME = "document.xml.rels" def __init__(self, tags, *args, **kwargs): super(DocumentRelsXMLFile, self).__init__(*args, **kwargs) file_path = os.path.join( self.current_folder, 'word', '_rels', self.file_name ) self._root = None self.max_rid = 0 max_rid = 0 if os.path.exists(file_path): self._root = self.from_file(file_path) for child in self._root: # print child.tag, dict(child.attrib), 'child' attrib = dict(child.attrib) rid = attrib.get('Id', '') if rid.startswith('rId'): max_rid = max([max_rid, int(rid[3:])]) self.max_rid = max_rid
[docs] def next_rid(self): self.max_rid = self.max_rid + 1 return "rId%s" % self.max_rid
@classmethod
[docs] def create(cls, folder, tags): """ Получение экземпляра класса :param cls: класс :param folder: путь до директории с распакованным XML-документом :param tags: теги :result: Экземпляр класса """ reletion_path = os.path.join(folder, 'word', '_rels', cls.FILENAME) rel_id = None # Корневой файл связей file_name = cls.FILENAME return cls(tags, rel_id, folder, file_name, reletion_path, )
[docs] def build(self): with open(self.file_path, 'w') as file_: file_.write(XML_DEFINITION + tostring(self._root))
[docs]class ContentTypesXMLFile(OpenXMLFile): """ Типы объектов """ NS = 'http://schemas.openxmlformats.org/package/2006/content-types' FILENAME = '[Content_Types].xml' def __init__(self, tags, *args, **kwargs): super(ContentTypesXMLFile, self).__init__(*args, **kwargs) assert not self.file_name is None file_path = os.path.join( self.current_folder, self.file_name ) self.types_root = None if os.path.exists(file_path): self.types_root = self.from_file(file_path)
[docs] def build(self): # root = self.types_root.getroot() root = self.types_root # <Default Extension="jpg" ContentType="image/jpeg" /> found_jpg = False found_png = False for child in root: if child.tag == '{%s}Default' % self.NS: attrib = dict(child.attrib) if attrib.get('ContentType') == 'image/jpeg' and attrib.get( 'Extension' ) == 'jpg': found_jpg = True if attrib.get('ContentType') == 'image/png' and attrib.get( 'Extension' ) == 'png': found_png = True if not found_jpg: SubElement( root, 'Default', attrib={ 'ContentType': 'image/jpeg', 'Extension': 'jpg' }, # nsmap={ # 'w': self.NS # } ) if not found_png: SubElement( root, 'Default', attrib={ 'ContentType': 'image/png', 'Extension': 'png' }, # nsmap={ # 'w': self.NS # } ) with open(self.file_path, 'w') as f: f.write(XML_DEFINITION + tostring(root))
@classmethod
[docs] def create(cls, folder, tags): """ Получение экземпляра класса :param cls: класс :param folder: путь до директории с распакованным XML-документом :param tags: теги :result: Экземпляр класса """ reletion_path = os.path.join(folder, cls.FILENAME) rel_id = None # Корневой файл связей file_name = '[Content_Types].xml' return cls(tags, rel_id, folder, file_name, reletion_path, )
[docs]class CommonPropertiesDOCX(CommonProperties): """ """ def _get_app_common(self, _id, target): """ """ return Wordprocessing.create(self.tags, _id, *self._get_path(target))
[docs]class Section(object): """ Секция таблицы docx документа. Поддерживает ограниченное число операций В частности, строчки таблицы выводятся полностью, т.е. минимальной единицей секции является строка таблицы """ def __init__(self, table): self._content = [] self.table = table
[docs] def append(self, table_row): self._content.append(table_row)
[docs] def flush(self, params): for row in self._content: new_row = copy.copy(row) text_nodes = new_row.findall( './/{0}:tc/{0}:p/{0}:r/{0}:t'.format('w'), namespaces={'w': Wordprocessing.NS_W} ) Wordprocessing._set_params(text_nodes, params, self.doc_rels) self.table.append(new_row)