#!coding:utf-8
import copy
import os
from lxml.etree import tostring, SubElement
from simple_report.core import XML_DEFINITION
from simple_report.core.exception import (
SectionNotFoundException, SectionException)
from simple_report.core.xml_wrap import (
ReletionOpenXMLFile, CommonProperties, OpenXMLFile)
from simple_report.docx.drawing import DocxImage, insert_image
__author__ = 'prefer'
[docs]class Wordprocessing(ReletionOpenXMLFile):
"""
Основной файл формата DOCX
"""
NS_W = 'http://schemas.openxmlformats.org/wordprocessingml/2006/main'
# Узел контекста document
# .// рекурсивно спускаемся к потомкам в поисках
# <ns:p><ns:r><ns:t></ns:t></ns:r></ns:p>
XPATH_QUERY = './/{0}:p/{0}:r/{0}:t'
TABLES_QUERY = './/{0}:tbl'
TABLE_TEXT_NODE_QUERY = './/{0}:tc/{0}:p/{0}:r/{0}:t'
def __init__(self, tags, *args, **kwargs):
super(Wordprocessing, self).__init__(*args, **kwargs)
self.tags = tags
self.table_sections = {}
[docs] def build(self):
"""
Сборка файла
"""
with open(self.file_path, 'w') as f:
f.write(XML_DEFINITION + tostring(self._root))
[docs] def set_params(self, params):
"""
Подстановка параметров
:param params: параметры подстановки
:result: None
"""
#
self.merge_same_nodes()
text_nodes = self._root.xpath(
self.XPATH_QUERY.format('w'), namespaces={'w': self.NS_W}
)
self._set_params(text_nodes, params, self.doc_rels)
[docs] def get_signature(self, node):
signature = []
for subnode in list(node):
if subnode.tag != '{%s}lang' % self.NS_W:
signature.append((subnode.tag, sorted(subnode.items())))
return signature
[docs] def merge_same_nodes(self):
"""
Слияние одинаковых нод - нужно, т.к. редакторы DOCX
привносят свои специфичные изменения, которые нам не нужны
:result:
"""
paragraphs = list(self._root.xpath(
'.//{0}:p'.format('w'), namespaces={'w': self.NS_W}
))
t_tag = '{%s}t' % self.NS_W
r_tag = '{%s}r' % self.NS_W
rpr_tag = '{%s}rPr' % self.NS_W
tab_tag = '{%s}tab' % self.NS_W
for paragraph in paragraphs:
par_nodes = list(paragraph)
old_signature = None
signature = None
old_node = None
for par_node in par_nodes:
if par_node.tag != r_tag:
old_node = None
continue
for node in list(par_node):
if node.tag == rpr_tag:
old_signature = signature
signature = self.get_signature(node)
elif node.tag == tab_tag:
old_node = None
elif node.tag == t_tag:
if old_node is not None and old_signature == signature:
# delete r node
old_node[1].text = old_node[1].text + node.text
#old_node = (par_node, node)
paragraph.remove(par_node)
else:
old_node = (par_node, node)
@classmethod
def _set_params(cls, text_nodes, params, doc_rels=None):
def sorting_key((k, v)):
if not isinstance(k, basestring):
return 1
return -len(k)
for node in text_nodes:
for key_param, value in sorted(params.items(), key=sorting_key):
if key_param in node.text:
# if len(node.text) > 0 and node.text[0] == '#' and
# node.text[-1] == '#':
text_to_replace = '#%s#' % key_param
if text_to_replace in node.text:
if isinstance(value, DocxImage):
insert_image(
node, value, text_to_replace, doc_rels)
else:
node.text = node.text.replace(
text_to_replace, unicode(value))
else:
node.text = node.text.replace(
key_param, unicode(value))
[docs] def get_all_parameters(self):
"""
Получение всех параметров
"""
text_nodes = self._root.xpath(
self.XPATH_QUERY.format('w'), namespaces={'w': self.NS_W})
for node in text_nodes:
if (
len(node.text) > 0 and
node.text[0] == '#' and
node.text[-1] == '#'
):
yield node.text
[docs] def get_tables(self):
"""
Получаем таблицы в DOCX
"""
return self._root.findall(
self.TABLES_QUERY.format('w'), namespaces={'w': self.NS_W}
)
[docs] def set_docx_table_sections(self):
"""
установка секций таблиц в документах DOCX
:result: None
"""
tables = self.get_tables()
for table in tables:
text_nodes = table.findall(
'{0}:tr'.format('w'),
namespaces={'w': self.NS_W}
)
section = Section(table)
section_name = None
rows_to_delete = []
for row_node in text_nodes:
col_nodes = row_node.findall(
self.TABLE_TEXT_NODE_QUERY.format('w'),
namespaces={'w': self.NS_W}
)
if not col_nodes:
continue
col_nodes_text = u''.join(x.text for x in col_nodes)
if col_nodes_text and col_nodes_text[:2] == '#!':
section_name = col_nodes_text[2:]
if section_name in self.table_sections:
raise SectionException(
('Section named {0} has been found '
'more than 1 time in docx table').format(
section_name
)
)
rows_to_delete.append(row_node)
# del text_nodes[text_nodes.index(row_node)]
elif col_nodes_text and col_nodes_text[-2:] == '!#':
self.table_sections[section_name] = section
section_name = None
section = Section(table)
rows_to_delete.append(row_node)
elif section_name:
section.append(copy.copy(row_node))
# del text_nodes[text_nodes.index(row_node)]
rows_to_delete.append(row_node)
for row_node in rows_to_delete:
row_node.getparent().remove(row_node)
[docs] def get_section(self, section_name):
"""
Получение секции таблицы в документе DOCX по имени
:param section_name: имя секции
:result: секция
"""
if not self.table_sections:
self.set_docx_table_sections()
section = self.table_sections.get(section_name)
section.doc_rels = self.doc_rels
if section is None:
raise SectionNotFoundException(
'Section named {0} has not been found'.format(section_name)
)
return section
[docs]class DocumentRelsXMLFile(OpenXMLFile):
NS = ""
FILENAME = "document.xml.rels"
def __init__(self, tags, *args, **kwargs):
super(DocumentRelsXMLFile, self).__init__(*args, **kwargs)
file_path = os.path.join(
self.current_folder,
'word',
'_rels',
self.file_name
)
self._root = None
self.max_rid = 0
max_rid = 0
if os.path.exists(file_path):
self._root = self.from_file(file_path)
for child in self._root:
# print child.tag, dict(child.attrib), 'child'
attrib = dict(child.attrib)
rid = attrib.get('Id', '')
if rid.startswith('rId'):
max_rid = max([max_rid, int(rid[3:])])
self.max_rid = max_rid
[docs] def next_rid(self):
self.max_rid = self.max_rid + 1
return "rId%s" % self.max_rid
@classmethod
[docs] def create(cls, folder, tags):
"""
Получение экземпляра класса
:param cls: класс
:param folder: путь до директории с распакованным XML-документом
:param tags: теги
:result: Экземпляр класса
"""
reletion_path = os.path.join(folder, 'word', '_rels', cls.FILENAME)
rel_id = None # Корневой файл связей
file_name = cls.FILENAME
return cls(tags, rel_id, folder, file_name, reletion_path, )
[docs] def build(self):
with open(self.file_path, 'w') as file_:
file_.write(XML_DEFINITION + tostring(self._root))
[docs]class ContentTypesXMLFile(OpenXMLFile):
"""
Типы объектов
"""
NS = 'http://schemas.openxmlformats.org/package/2006/content-types'
FILENAME = '[Content_Types].xml'
def __init__(self, tags, *args, **kwargs):
super(ContentTypesXMLFile, self).__init__(*args, **kwargs)
assert not self.file_name is None
file_path = os.path.join(
self.current_folder,
self.file_name
)
self.types_root = None
if os.path.exists(file_path):
self.types_root = self.from_file(file_path)
[docs] def build(self):
# root = self.types_root.getroot()
root = self.types_root
# <Default Extension="jpg" ContentType="image/jpeg" />
found_jpg = False
found_png = False
for child in root:
if child.tag == '{%s}Default' % self.NS:
attrib = dict(child.attrib)
if attrib.get('ContentType') == 'image/jpeg' and attrib.get(
'Extension'
) == 'jpg':
found_jpg = True
if attrib.get('ContentType') == 'image/png' and attrib.get(
'Extension'
) == 'png':
found_png = True
if not found_jpg:
SubElement(
root, 'Default', attrib={
'ContentType': 'image/jpeg',
'Extension': 'jpg'
},
# nsmap={
# 'w': self.NS
# }
)
if not found_png:
SubElement(
root, 'Default', attrib={
'ContentType': 'image/png',
'Extension': 'png'
},
# nsmap={
# 'w': self.NS
# }
)
with open(self.file_path, 'w') as f:
f.write(XML_DEFINITION + tostring(root))
@classmethod
[docs] def create(cls, folder, tags):
"""
Получение экземпляра класса
:param cls: класс
:param folder: путь до директории с распакованным XML-документом
:param tags: теги
:result: Экземпляр класса
"""
reletion_path = os.path.join(folder, cls.FILENAME)
rel_id = None # Корневой файл связей
file_name = '[Content_Types].xml'
return cls(tags, rel_id, folder, file_name, reletion_path, )
[docs]class CommonPropertiesDOCX(CommonProperties):
"""
"""
def _get_app_common(self, _id, target):
"""
"""
return Wordprocessing.create(self.tags, _id, *self._get_path(target))
[docs]class Section(object):
"""
Секция таблицы docx документа. Поддерживает ограниченное число операций
В частности, строчки таблицы выводятся полностью, т.е. минимальной
единицей секции является строка таблицы
"""
def __init__(self, table):
self._content = []
self.table = table
[docs] def append(self, table_row):
self._content.append(table_row)
[docs] def flush(self, params):
for row in self._content:
new_row = copy.copy(row)
text_nodes = new_row.findall(
'.//{0}:tc/{0}:p/{0}:r/{0}:t'.format('w'),
namespaces={'w': Wordprocessing.NS_W}
)
Wordprocessing._set_params(text_nodes, params, self.doc_rels)
self.table.append(new_row)