Skip to content

Commit

Permalink
1
Browse files Browse the repository at this point in the history
  • Loading branch information
renoyuan committed Jul 18, 2024
1 parent ff5980e commit 99dbe04
Show file tree
Hide file tree
Showing 13 changed files with 158 additions and 138 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,8 @@ dmypy.json
test/*.ofd
test/*.pdf
test/*.json
test/data
test/test
增值税电子专票5
*.ofd
*.pdf
Expand Down
10 changes: 3 additions & 7 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,7 @@

### 更新

v0.3.1 解决了一些wps适配 问题。 新增加了pfd转ofd电子版的支持,ofd 转pdf 中签章的解析

v0.3.3 解决了一些bug ,去除了对opencv的依赖 环境体积减少50M左右,后续可能会尝试把一些依赖改为选装按需安装。


### 常见问题
Expand All @@ -27,11 +26,9 @@ https://github.com/renoyuan/easyofd/wiki/FAQ

1 环境,后续可能会尝试减少一些第三包的依赖压缩环境体积 -- 主要是opencv 和numpy

2 gui 工具 ,之前提供过一个开箱即用的gui工具 v0.1.0.1版本后就没有更新过了,有时间会更新下。

3 功能上 对于pfd2ofd 和 ofd 生成 可能会有一些优化
2 功能上 对于pfd2ofd 和 ofd 生成 可能会有一些优化

4 需求收集,若有其他相关easyofd 的需求和建议可以git 上给我提,有意思的需求我会考虑尝试。
3 需求收集,若有其他相关easyofd 的需求和建议可以git 上给我提,有意思的需求我会考虑尝试。



Expand All @@ -52,7 +49,6 @@ https://github.com/renoyuan/easyofd/wiki/FAQ




关于 jb2格式图片解析
使用了第三方库 jbig2dec 去读取jb2格式图片 参考下面链接安装使用jbig2dec
https://github.com/rillian/jbig2dec
Expand Down
2 changes: 1 addition & 1 deletion easyofd/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
from .ofd import OFD
__version__ = "0.3.2"
__version__ = "0.3.4"
__all__ = ["OFD"]
27 changes: 14 additions & 13 deletions easyofd/draw/draw_ofd.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@
from datetime import datetime

import xmltodict
import cv2
from PIL import Image
from loguru import logger

Expand Down Expand Up @@ -190,6 +189,7 @@ def build_content_res(self, pil_img_list=None, pdf_info_list=None, id_obj: CurId
return content_res_list

def pil_2_bytes(self, image):
""""""
# 创建一个 BytesIO 对象
img_bytesio = BytesIO()

Expand All @@ -203,24 +203,24 @@ def pil_2_bytes(self, image):
img_bytesio.close()
return img_bytes

def __call__(self, pdf_bytes, cv2_img_list=None, optional_text=False):
def __call__(self, pdf_bytes=None, pil_img_list=None, optional_text=False):
"""
input pdf | imgs if pdf >optional_text or not
0 解析pdf文件
1 构建必要的ofd template
2 转化为 ofd
"""
pdf_obj = DPFParser()
if optional_text: # 生成可编辑ofd:
pdf_info_list, pfd_res_uuid_map = pdf_obj.extract_text_with_details(pdf_bytes) # 解析pdf
logger.debug(f"pdf_info_list: {pdf_info_list} \n pfd_res_uuid_map {pfd_res_uuid_map}")

page_pil_img_list = None

else: # 插入图片ofd
if cv2_img_list: # 读取 图片
page_pil_img_list = [(self.pil_2_bytes(Image.fromarray(cv2.cvtColor(_img,cv2.COLOR_BGR2RGB))),
_img.shape[1], _img.shape[0]) for _img in cv2_img_list]
else: # 读取 pdf 转图片
page_pil_img_list = None

# 插入图片ofd
if pil_img_list: # 读取 图片
page_pil_img_list = [(self.pil_2_bytes(_img),_img.size[0]/self.OP,_img.size[1]/self.OP) for _img in pil_img_list]
else: # 读取 pdf 转图片
if optional_text: # 生成可编辑ofd:
pdf_info_list, pfd_res_uuid_map = pdf_obj.extract_text_with_details(pdf_bytes) # 解析pdf
logger.debug(f"pdf_info_list: {pdf_info_list} \n pfd_res_uuid_map {pfd_res_uuid_map}")
else:
img_list = pdf_obj.to_img(pdf_bytes)
page_pil_img_list = [(self.pil_2_bytes(Image.frombytes("RGB", [_img.width, _img.height],
_img.samples)), _img.width/self.OP, _img.height/self.OP) for _img in img_list]
Expand Down Expand Up @@ -266,6 +266,7 @@ def __call__(self, pdf_bytes, cv2_img_list=None, optional_text=False):
if __name__ == "__main__":

pdf_p = r"D:\renodoc\技术栈\GBT_33190-2016_电子文件存储与交换格式版式文档.pdf"
pdf_p = r"F:\code\easyofd\test"
with open(pdf_p,"rb") as f:
content = f.read()

Expand Down
128 changes: 66 additions & 62 deletions easyofd/draw/draw_pdf.py
Original file line number Diff line number Diff line change
Expand Up @@ -277,66 +277,70 @@ def draw_signature(self, canvas, signatures_page_list, page_size):
def draw_line(self,canvas,line_list,page_size):
"""绘制线条"""
# print("绘制",line_list)

def match_mode(Abbr: list):
"""
解析AbbreviatedData
匹配各种线条模式
S 定义起始 坐标 x, y
M 移动到指定坐标 x, y
L 从当前点移动到指定点 x, y
Q x1 y1 x2 y2 二次贝塞尔曲线
B x1 y1 x2 y2 x3 y3 三次贝塞尔曲线
A 到 x,y 的圆弧 并移动到 x,y rx 长轴 ry 短轴 angle 旋转角度 large为1表示 大于180 的弧 为0时表示小于180的弧 swcpp 为1 表示顺时针旋转 0 表示逆时针旋转
C 当前点和SubPath自动闭合
"""
relu_list = []
mode = ""
modes = ["S", "M", "L", "Q", "B", "A", "C"]
mode_dict = {}
for idx, i in enumerate(Abbr):
if i in modes:
mode = i
if mode_dict:
relu_list.append(mode_dict)
mode_dict = {"mode": i, "points": []}

else:
mode_dict["points"].append(i)

if idx + 1 == len(Abbr):
relu_list.append(mode_dict)
return relu_list


def assemble(relu_list: list):
start_point = {}
acticon = []
for i in relu_list:
if i.get("mode") == "M":
start_point = i
elif i.get("mode") in ['B', "Q", 'L']:
acticon.append({"start_point": start_point,
"end_point": i
})
return acticon

def convert_coord(p_list, direction, page_size, pos):
"""坐标转换ofd2pdf"""
new_p_l = []
for p in p_list:
if direction == "x":

new_p = (float(pos[0]) + float(p)) * self.OP
else:
new_p = (float(page_size[3]) - float(pos[1]) - float(p)) * self.OP
new_p_l.append(new_p)
return new_p_l

for line in line_list:
Abbr = line.get("AbbreviatedData").split(" ") # AbbreviatedData
color = line.get("FillColor",[0,0,0])

def match_mode(Abbr:list):
"""
解析AbbreviatedData
匹配各种线条模式
S 定义起始 坐标 x, y
M 移动到指定坐标 x, y
L 从当前点移动到指定点 x, y
Q x1 y1 x2 y2 二次贝塞尔曲线
B x1 y1 x2 y2 x3 y3 三次贝塞尔曲线
A 到 x,y 的圆弧 并移动到 x,y rx 长轴 ry 短轴 angle 旋转角度 large为1表示 大于180 的弧 为0时表示小于180的弧 swcpp 为1 表示顺时针旋转 0 表示逆时针旋转
C 当前点和SubPath自动闭合
"""
relu_list = []
mode = ""
modes = ["S","M","L","Q","B","A","C"]
mode_dict = {}
for idx, i in enumerate(Abbr):
if i in modes:
mode = i
if mode_dict:
relu_list.append(mode_dict)
mode_dict = {"mode": i, "points": []}

else:
mode_dict["points"].append(i)

if idx + 1 == len(Abbr):
relu_list.append(mode_dict)
return relu_list


relu_list = match_mode(Abbr)
# TODO 组合 relu_list 1 M L 直线 2 M B*n 三次贝塞尔线 3 M Q*n 二次贝塞尔线
def assemble(relu_list: list):
start_point = {}
acticon = []
for i in relu_list:
if i.get("mode") == "M":
start_point = i
elif i.get("mode") in ['B', "Q", 'L']:
acticon.append({"start_point": start_point,
"end_point":i
})
return acticon

def convert_coord(p_list,direction, page_size, pos):
"""坐标转换ofd2pdf"""
new_p_l = []
for p in p_list:
if direction == "x":

new_p = (float(pos[0]) + float(p)) * self.OP
else:
new_p = (float(page_size[3]) - float(pos[1]) - float(p)) * self.OP
new_p_l.append(new_p)
return new_p_l


# print(relu_list)

acticons = assemble(relu_list)
Expand Down Expand Up @@ -405,20 +409,20 @@ def draw_pdf(self):
c.setPageSize((page_size[2]*self.OP, page_size[3]*self.OP))

# 写入图片
self.draw_img(c, img_list, images, page_size)


if img_list:
self.draw_img(c, img_list, images, page_size)

# 写入文本
self.draw_chars(c, text_list, fonts, page_size)
if text_list:
self.draw_chars(c, text_list, fonts, page_size)

# 绘制线条
self.draw_line(c, line_list, page_size)
if line_list:
self.draw_line(c, line_list, page_size)

# 绘制签章
self.draw_signature(c, signatures_page_id.get(page_id), page_size)


if signatures_page_id:
self.draw_signature(c, signatures_page_id.get(page_id), page_size)


# print("去写入")
Expand Down
4 changes: 2 additions & 2 deletions easyofd/draw/font_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,14 +13,14 @@
import os
import shutil
import logging
from io import BytesIO,StringIO
from io import BytesIO, StringIO
import string
from uuid import uuid1
import random
import traceback
import logging

import numpy as np

import tempfile
import xmltodict
from fontTools.ttLib import TTFont as ttLib_TTFont
Expand Down
4 changes: 3 additions & 1 deletion easyofd/draw/pdf_parse.py
Original file line number Diff line number Diff line change
Expand Up @@ -153,7 +153,7 @@ def extract_text_with_details(self, pdf_bytes):
# print("details_list",details_list)
return details_list, res_uuid_map
def to_img(self, buffer_pdf):
"""转图片"""
"""pdf2img"""
pix_list = []
pdfDoc = fitz.open(stream=buffer_pdf)
for pg in range(pdfDoc.page_count):
Expand All @@ -166,6 +166,8 @@ def to_img(self, buffer_pdf):
# zoom_x,zoom_y = (1,1)
mat = fitz.Matrix(zoom_x, zoom_y).prerotate(rotate)
pix = page.get_pixmap(matrix=mat, alpha=False)


pix_list.append(pix)
return pix_list

Expand Down
36 changes: 13 additions & 23 deletions easyofd/ofd.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,8 @@
from typing import Any

import fitz
import cv2
import numpy as np

from PIL import Image
from loguru import logger

from easyofd.parser_ofd import OFDParser
Expand Down Expand Up @@ -79,23 +79,24 @@ def pdf2img(self, pdfbytes):
zoom_x, zoom_y = 1.6, 1.6
mat = fitz.Matrix(zoom_x, zoom_y).prerotate(rotate)
pix = page.get_pixmap(matrix=mat, alpha=False)
image = np.ndarray((pix.height, pix.width, 3), dtype=np.uint8, buffer=pix.samples)
pil_image = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
# image = np.ndarray((pix.height, pix.width, 3), dtype=np.uint8, buffer=pix.samples)
# print(image.shape)
# print(image[2])
image_list.append(image)
logger.info(f"to_jpg")
image_list.append(pil_image)
logger.info(f"pdf2img")
return image_list

def jpg2ofd(self,imglist:list):
"""
imglist: cv2 image list
imglist: pil image list
"""
ofd_byte = OFDWrite()(cv2_img_list=imglist)
ofd_byte = OFDWrite()(pil_img_list=imglist)
return ofd_byte

def jpg2pfd(self,imglist:list):
"""
imglist: cv2 image list
imglist: PIL image list
1 构建data
2 DrawPDF(self.data)()
"""
Expand All @@ -105,25 +106,14 @@ def jpg2pfd(self,imglist:list):

def to_jpg(self,format="jpg"):
"""
return numpy list
return pil list
"""
assert self.data,f"data is None"
assert self.data, f"data is None"
image_list = []
pdfbytes = self.to_pdf()

doc = fitz.open(stream=pdfbytes, filetype="pdf")

for page in doc:
rotate = int(0)
zoom_x, zoom_y = 1.6, 1.6
mat = fitz.Matrix(zoom_x, zoom_y).prerotate(rotate)
pix = page.get_pixmap(matrix=mat, alpha=False)
image = np.ndarray((pix.height, pix.width, 3), dtype=np.uint8, buffer=pix.samples)
# print(image.shape)
# print(image[2])
image_list.append(image)
logger.info(f"to_jpg")
image_list = self.pdf2img(pdfbytes)
return image_list



def del_data(self,):
Expand Down
Loading

0 comments on commit 99dbe04

Please sign in to comment.