diff --git a/changes.txt b/changes.txt index db6c1d188..6a20b40c6 100644 --- a/changes.txt +++ b/changes.txt @@ -6,12 +6,26 @@ Change Log * Fixed issues: + * **Fixed** `4462 `_: delete_pages() does not accept a single int + * **Fixed** `4565 `_: MacOS uses Tesseract and not Tesseract-OCR + * **Fixed** `4590 `_: TypeError in utils.py scrub(): annot.update_file(buffer=...) is invalid + * **Fixed** `4614 `_: Intercept bad widgets when inserting to another PDF + * **Fixed** `4639 `_: pymupdf.mupdf.FzErrorGeneric: code=1: Director error: : 'JM_new_bbox_device_Device' object has no attribute 'layer_name' + +* Fixed issues when built with MuPDF master: + * **Fixed** `3806 `_: pdf to image rendering ignore optional content offs + * **Fixed** `4388 `_: Incorrect PixMap from page due to cached data from other PDF + * **Fixed** `4457 `_: Wrong characters displayed after font subsetting (w/ native method) + * **Fixed** `4533 `_: Open PDF error segmentation fault + * **Fixed** `4571 `_: Broken merged pdfs. + * Other: + * Check that #4392 `Segfault when running with pytest and -Werror` is fixed if PyMuPDF is built with swig>=4.4. * Add `Page.clip_to_rect()`. * Improved search for Tesseract data. * Retrospectively mark #4496 as fixed in 1.26.1. - * Document.insert_pdf(): Ignore erroneous widgets. + * Retrospectively mark #4503 as fixed in 1.26.3. * Added experimental support for Graal. @@ -22,6 +36,7 @@ Change Log * Fixed issues: * **Fixed** `4462 `_: delete_pages() does not accept a single int + * **Fixed** `4503 `_: Undetected character styles * **Fixed** `4527 `_: Rect.intersects() is much slower than necessary * **Fixed** `4564 `_: Possible encoding issue in PDF metadata * **Fixed** `4575 `_: Bug with IRect contains method diff --git a/docs/page.rst b/docs/page.rst index c0617d546..a01bd0d10 100644 --- a/docs/page.rst +++ b/docs/page.rst @@ -246,7 +246,7 @@ In a nutshell, this is what you can do with PyMuPDF: |history_end| - .. method:: add_file_annot(pos, buffer, filename, ufilename=None, desc=None, icon="PushPin") + .. method:: add_file_annot(point, buffer_, filename, ufilename=None, desc=None, icon="PushPin") PDF only: Add a file attachment annotation with a "PushPin" icon at the specified location. diff --git a/setup.py b/setup.py index 3072a795c..015eaf741 100755 --- a/setup.py +++ b/setup.py @@ -736,12 +736,27 @@ def add(flavour, from_, to_): except Exception as e: log(f'Failed to get git information: {e}') sha, comment, diff, branch = (None, None, None, None) + swig = PYMUPDF_SETUP_SWIG or 'swig' + swig_version_text = run(f'{swig} --version', capture=1) + m = re.search('\nSWIG Version ([^\n]+)', swig_version_text) + log(f'{swig_version_text=}') + assert m, f'Unrecognised {swig_version_text=}' + swig_version = m.group(1) + def int_or_0(text): + try: + return int(text) + except Exception: + return 0 + swig_version_tuple = tuple(int_or_0(i) for i in swig_version.split('.')) + log(f'{swig_version=}') text = '' text += f'mupdf_location = {mupdf_location!r}\n' text += f'pymupdf_version = {version_p!r}\n' text += f'pymupdf_git_sha = {sha!r}\n' text += f'pymupdf_git_diff = {diff!r}\n' text += f'pymupdf_git_branch = {branch!r}\n' + text += f'swig_version = {swig_version!r}\n' + text += f'swig_version_tuple = {swig_version_tuple!r}\n' add('p', text.encode(), f'{to_dir}/_build.py') # Add single README file. diff --git a/src/__init__.py b/src/__init__.py index c3ef099a9..7daf3f463 100644 --- a/src/__init__.py +++ b/src/__init__.py @@ -383,6 +383,8 @@ def _int_rc(text): from ._build import pymupdf_git_diff # noqa F401 from ._build import pymupdf_git_sha # noqa F401 from ._build import pymupdf_version # noqa F401 +from ._build import swig_version # noqa F401 +from ._build import swig_version_tuple # noqa F401 mupdf_version = mupdf.FZ_VERSION diff --git a/tests/resources/test_4388_BOZ1.pdf b/tests/resources/test_4388_BOZ1.pdf new file mode 100644 index 000000000..6d2b23b68 Binary files /dev/null and b/tests/resources/test_4388_BOZ1.pdf differ diff --git a/tests/resources/test_4388_BUL1.pdf b/tests/resources/test_4388_BUL1.pdf new file mode 100644 index 000000000..b681354e1 Binary files /dev/null and b/tests/resources/test_4388_BUL1.pdf differ diff --git a/tests/resources/test_4571.pdf b/tests/resources/test_4571.pdf new file mode 100644 index 000000000..991a0512b Binary files /dev/null and b/tests/resources/test_4571.pdf differ diff --git a/tests/resources/test_4639.pdf b/tests/resources/test_4639.pdf new file mode 100644 index 000000000..11d0d1ee3 Binary files /dev/null and b/tests/resources/test_4639.pdf differ diff --git a/tests/test_font.py b/tests/test_font.py index d9f38f5d8..7cf1137b2 100644 --- a/tests/test_font.py +++ b/tests/test_font.py @@ -239,7 +239,7 @@ def test_4457(): ('https://github.com/user-attachments/files/20862923/test_4457_a.pdf', 'test_4457_a.pdf', None, 4), ('https://github.com/user-attachments/files/20862922/test_4457_b.pdf', 'test_4457_b.pdf', None, 9), ) - for url, name, size, rms_after_max in files: + for url, name, size, rms_old_after_max in files: path = util.download(url, name, size) with pymupdf.open(path) as document: @@ -318,10 +318,13 @@ def test_4457(): assert text_before == text assert rms_before == 0 - # As of 2025-05-20 there are some differences in some characters, e.g. - # the non-ascii characters in `Philipp Krahenbuhl`. - # See and . - assert rms_after < rms_after_max + if pymupdf.mupdf_version_tuple >= (1, 27): + assert rms_after == 0 + else: + # As of 2025-05-20 there are some differences in some characters, + # e.g. the non-ascii characters in `Philipp Krahenbuhl`. See + # and . + assert abs(rms_after - rms_old_after_max) < 2 # Avoid test failure caused by mupdf warnings. wt = pymupdf.TOOLS.mupdf_warnings() diff --git a/tests/test_general.py b/tests/test_general.py index bb17acdf0..777148d7c 100644 --- a/tests/test_general.py +++ b/tests/test_general.py @@ -1915,18 +1915,22 @@ def show(items): def test_4533(): - if 1: - print(f'test_4533(): doing nothing because known to segv.') - return + print() path = util.download( 'https://github.com/user-attachments/files/20497146/NineData_user_manual_V3.0.5.pdf', 'test_4533.pdf', size=16864501, ) - print(f'Opening {path=}.', flush=1) - with pymupdf.open(path) as document: - print(f'Have opened {path=}.', flush=1) - print(f'{len(document)=}', flush=1) + # This bug is a segv so we run the test in a child process. + command = f'{sys.executable} -c "import pymupdf; document = pymupdf.open({path!r}); print(len(document))"' + print(f'Running: {command}') + cp = subprocess.run(command, shell=1, check=0) + e = cp.returncode + print(f'{e=}') + if pymupdf.mupdf_version_tuple >= (1, 27): + assert e == 0 + else: + assert e != 0 def test_4564(): @@ -1960,3 +1964,90 @@ def test_gitinfo(): print(f'{pymupdf.pymupdf_version=}') print(f'pymupdf.pymupdf_git_diff:\n{textwrap.indent(pymupdf.pymupdf_git_diff, " ")}') + +def test_4392(): + print() + path = os.path.normpath(f'{__file__}/../../tests/test_4392.py') + with open(path, 'w') as f: + f.write('import pymupdf\n') + + command = f'pytest {path}' + print(f'Running: {command}', flush=1) + e1 = subprocess.run(command, shell=1, check=0).returncode + print(f'{e1=}') + + command = f'pytest -Werror {path}' + print(f'Running: {command}', flush=1) + e2 = subprocess.run(command, shell=1, check=0).returncode + print(f'{e2=}') + + command = f'{sys.executable} -Werror -c "import pymupdf"' + print(f'Running: {command}', flush=1) + e3 = subprocess.run(command, shell=1, check=0).returncode + print(f'{e3=}') + + print(f'{e1=} {e2=} {e3=}') + + print(f'{pymupdf.swig_version=}') + print(f'{pymupdf.swig_version_tuple=}') + + assert e1 == 5 + if pymupdf.swig_version_tuple >= (4, 4): + assert e2 == 5 + assert e3 == 0 + else: + # We get SEGV's etc with older swig. + if platform.system() == 'Linux': + assert (e2, e3) == (139, 139) + elif platform.system() == 'Darwin': + assert (e2, e3) == (-11, -11) + elif platform.system() == 'Windows': + assert (e2, e3) == (0xc0000005, 0xc0000005) + + +def test_4639(): + path = os.path.normpath(f'{__file__}/../../tests/resources/test_4639.pdf') + with pymupdf.open(path) as document: + page = document[-1] + page.get_bboxlog(layers=True) + + +def test_4590(): + + # Create test PDF. + path = os.path.normpath(f'{__file__}/../../tests/test_4590.pdf') + with pymupdf.open() as document: + page = document.new_page() + + # Add some text + text = 'This PDF contains a file attachment annotation.' + page.insert_text((72, 72), text, fontsize=12) + + # Create a sample file. + path_sample = os.path.normpath(f'{__file__}/../../tests/test_4590_annotation_sample.txt') + with open(path_sample, 'w') as f: + f.write('This is a sample attachment file.') + + # Read file as bytes + with open(path_sample, 'rb') as f: + sample = f.read() + + # Define annotation position (rect or point) + annot_pos = pymupdf.Rect(72, 100, 92, 120) # PushPin icon rectangle + + # Add the file attachment annotation + page.add_file_annot( + point = annot_pos, + buffer_ = sample, + filename = 'sample.txt', + ufilename = 'sample.txt', + desc = 'A test attachment file.', + icon = 'PushPin', + ) + + # Save the PDF + document.save(path) + + # Check pymupdf.Document.scrub() works. + with pymupdf.open(path) as document: + document.scrub() diff --git a/tests/test_insertpdf.py b/tests/test_insertpdf.py index 7f6f07c02..e7961b6a0 100644 --- a/tests/test_insertpdf.py +++ b/tests/test_insertpdf.py @@ -314,3 +314,22 @@ def test_4412(): new_doc.insert_pdf(doc, from_page=1, to_page=1) new_doc.save(buf) assert len(new_doc)==1 + + +def test_4571(): + path = os.path.normpath(f'{__file__}/../../tests/resources/test_4571.pdf') + path_out = os.path.normpath(f'{__file__}/../../tests/resources/test_4571_out.pdf') + with pymupdf.open() as newdocument: + with pymupdf.open(path) as document: + newdocument.insert_pdf(document) + newdocument.save(path_out, garbage=4, clean=False) + print(f'Have saved to: {path_out=}') + with open(path_out, 'rb') as f: + content = f.read() + if pymupdf.mupdf_version_tuple >= (1, 27): + # Correct. + assert b'<>' in content + else: + # Incorrect. + assert b'<>' in content + diff --git a/tests/test_pagedelete.py b/tests/test_pagedelete.py index 73593fa29..65f42e4b6 100644 --- a/tests/test_pagedelete.py +++ b/tests/test_pagedelete.py @@ -91,3 +91,25 @@ def test_3150(): doc = pymupdf.open(filename) doc.select(pages) assert doc.page_count == len(pages) + + +def test_4462(): + path0 = os.path.normpath(f'{__file__}/../../tests/resources/test_4462_0.pdf') + path1 = os.path.normpath(f'{__file__}/../../tests/resources/test_4462_1.pdf') + path2 = os.path.normpath(f'{__file__}/../../tests/resources/test_4462_2.pdf') + with pymupdf.open() as document: + document.new_page() + document.new_page() + document.new_page() + document.new_page() + document.save(path0) + with pymupdf.open(path0) as document: + assert len(document) == 4 + document.delete_page(-1) + document.save(path1) + with pymupdf.open(path1) as document: + assert len(document) == 3 + document.delete_pages(-1) + document.save(path2) + with pymupdf.open(path2) as document: + assert len(document) == 2 diff --git a/tests/test_pixmap.py b/tests/test_pixmap.py index 122280324..0fc3a52e0 100644 --- a/tests/test_pixmap.py +++ b/tests/test_pixmap.py @@ -596,7 +596,35 @@ def test_3806(): pixmap.save(path_png) rms = gentle_compare.pixmaps_rms(path_png_expected, pixmap) print(f'{rms=}') - if pymupdf.mupdf_version_tuple >= (1, 27): + if pymupdf.mupdf_version_tuple >= (1, 26, 6): assert rms < 0.1 else: assert rms > 50 + + +def test_4388(): + print() + path_BOZ1 = os.path.normpath(f'{__file__}/../../tests/resources/test_4388_BOZ1.pdf') + path_BUL1 = os.path.normpath(f'{__file__}/../../tests/resources/test_4388_BUL1.pdf') + path_correct = os.path.normpath(f'{__file__}/../../tests/resources/test_4388_BUL1.pdf.correct.png') + path_test = os.path.normpath(f'{__file__}/../../tests/resources/test_4388_BUL1.pdf.test.png') + + with pymupdf.open(path_BUL1) as bul: + pixmap_correct = bul.load_page(0).get_pixmap() + pixmap_correct.save(path_correct) + + pymupdf.TOOLS.store_shrink(100) + + with pymupdf.open(path_BOZ1) as boz: + boz.load_page(0).get_pixmap() + + with pymupdf.open(path_BUL1) as bul: + pixmap_test = bul.load_page(0).get_pixmap() + pixmap_test.save(path_test) + + rms = gentle_compare.pixmaps_rms(pixmap_correct, pixmap_test) + print(f'{rms=}') + if pymupdf.mupdf_version_tuple >= (1, 27): + assert rms == 0 + else: + assert rms >= 10 diff --git a/tests/test_textextract.py b/tests/test_textextract.py index 9b9fabda3..5883a3673 100644 --- a/tests/test_textextract.py +++ b/tests/test_textextract.py @@ -895,7 +895,7 @@ def test_4546(): def test_4503(): # Check detection of strikeout text. Behaviour is improved with - # mupdf>=1.26.2, but not perfect. + # mupdf>=1.26.2, and fixed with mupdf>=1.26.3. # path = os.path.normpath(f'{__file__}/../../tests/resources/test_4503.pdf') span_0 = None