diff --git a/.github/dependabot.yml b/.github/dependabot.yml new file mode 100644 index 0000000..2390d8c --- /dev/null +++ b/.github/dependabot.yml @@ -0,0 +1,10 @@ +version: 2 +updates: + - package-ecosystem: "github-actions" + directory: "/" + schedule: + interval: "monthly" + groups: + github-actions: + patterns: + - "*" diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 251eb0b..872a424 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -1,5 +1,8 @@ name: ci on: [push, pull_request] +concurrency: # https://stackoverflow.com/questions/66335225#comment133398800_72408109 + group: ${{ github.workflow }}-${{ github.ref || github.run_id }} + cancel-in-progress: ${{ github.event_name == 'pull_request' }} jobs: ci: strategy: @@ -19,13 +22,11 @@ jobs: with: python-version: ${{ matrix.python-version }} allow-prereleases: true + - name: Copy libmagic into magic dir + run: ${{ (runner.os == 'Windows' && 'bash add_libmagic.sh') || 'sudo -E bash add_libmagic.sh' }} - run: pip install --upgrade pip - run: pip install --upgrade pytest - run: pip install --editable . - - if: runner.os == 'macOS' - run: brew install libmagic - - if: runner.os == 'Windows' - run: pip install python-magic-bin - run: LC_ALL=en_US.UTF-8 pytest shell: bash timeout-minutes: 15 # Limit Windows infinite loop. diff --git a/.github/workflows/wheels.yml b/.github/workflows/wheels.yml new file mode 100644 index 0000000..907e3a6 --- /dev/null +++ b/.github/workflows/wheels.yml @@ -0,0 +1,180 @@ +name: wheels + +on: + pull_request: + push: + branches: master + release: + types: [released, prereleased] + workflow_dispatch: # allows running workflow manually from the Actions tab + +concurrency: # https://stackoverflow.com/questions/66335225#comment133398800_72408109 + group: ${{ github.workflow }}-${{ github.ref || github.run_id }} + cancel-in-progress: ${{ github.event_name == 'pull_request' }} + +jobs: + build-sdist: + runs-on: ubuntu-latest + + env: + PIP_DISABLE_PIP_VERSION_CHECK: 1 + + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: '3.x' + + - run: sudo apt-get install -y libmagic1 + + - name: Build source distribution + run: | + pip install --upgrade setuptools wheel pip build + python -m build --sdist + + - uses: actions/upload-artifact@v4 + with: + name: dist + path: dist/*.tar.* + + + build-wheels-matrix: + runs-on: ubuntu-latest + outputs: + include: ${{ steps.set-matrix.outputs.include }} + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-python@v5 + with: + python-version: '3.x' + - run: pip install cibuildwheel==2.17.0 # sync version with pypa/cibuildwheel below + - id: set-matrix + env: + # only mention one (trivial) python version, as py2.py3 wheels only need to be build once per arch + CIBW_PROJECT_REQUIRES_PYTHON: '==3.12.*' + # skip PyPy wheels for now, and skip i686 wheels because pytest is failing + CIBW_SKIP: pp* *i686 + run: | + MATRIX_INCLUDE=$( + { + cibuildwheel --print-build-identifiers --platform linux --arch all | jq -nRc '{"only": inputs, "os": "ubuntu-latest"}' \ + && cibuildwheel --print-build-identifiers --platform macos --arch x86_64 | jq -nRc '{"only": inputs, "os": "macos-13"}' \ + && cibuildwheel --print-build-identifiers --platform macos --arch arm64 | jq -nRc '{"only": inputs, "os": "macos-14"}' \ + && cibuildwheel --print-build-identifiers --platform windows --arch x86,AMD64 | jq -nRc '{"only": inputs, "os": "windows-latest"}' + } | jq -sc + ) + echo "include=$MATRIX_INCLUDE" >> $GITHUB_OUTPUT + + + build-wheels: + name: build ${{ matrix.only }} + needs: build-wheels-matrix + runs-on: ${{ matrix.os }} + + strategy: + fail-fast: false + matrix: + include: ${{ fromJson(needs.build-wheels-matrix.outputs.include) }} + + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - name: Set up QEMU + if: runner.os == 'Linux' + uses: docker/setup-qemu-action@v3 + + # For Windows, grabbing latest file from MSYS2 is easier than building from source + # It's generally up to date ref https://packages.msys2.org/base/mingw-w64-file + - name: Setup MSYS2 and install file + if: runner.os == 'Windows' + uses: msys2/setup-msys2@v2.27.0 + with: + msystem: ${{ endsWith(matrix.only, '32') && 'mingw32' || 'mingw64' }} + location: D:\ + install: >- + ${{ endsWith(matrix.only, '32') && 'mingw-w64-i686-file' || 'mingw-w64-x86_64-file' }} + + # The DLL dependency tree flattened out ref "Dependencies" https://packages.msys2.org/packages/mingw-w64-x86_64-file + - name: Copy Windows ddl and mgc + if: runner.os == 'Windows' + run: | + cp "/msys64/${{ endsWith(matrix.only, '32') && 'mingw32' || 'mingw64' }}/share/misc/magic.mgc" "magic" + cp "/msys64/${{ endsWith(matrix.only, '32') && 'mingw32' || 'mingw64' }}/bin/libmagic-1.dll" "magic" + cp "/msys64/${{ endsWith(matrix.only, '32') && 'mingw32' || 'mingw64' }}/bin/libsystre-0.dll" "magic" + cp "/msys64/${{ endsWith(matrix.only, '32') && 'mingw32' || 'mingw64' }}/bin/libtre-5.dll" "magic" + cp "/msys64/${{ endsWith(matrix.only, '32') && 'mingw32' || 'mingw64' }}/bin/libasprintf-0.dll" "magic" + cp "/msys64/${{ endsWith(matrix.only, '32') && 'mingw32' || 'mingw64' }}/bin/libintl-8.dll" "magic" + cp "/msys64/${{ endsWith(matrix.only, '32') && 'mingw32' || 'mingw64' }}/bin/libatomic-1.dll" "magic" + cp "/msys64/${{ endsWith(matrix.only, '32') && 'mingw32' || 'mingw64' }}/bin/libgomp-1.dll" "magic" + cp "/msys64/${{ endsWith(matrix.only, '32') && 'mingw32' || 'mingw64' }}/bin/libquadmath-0.dll" "magic" + cp "/msys64/${{ endsWith(matrix.only, '32') && 'mingw32' || 'mingw64' }}/bin/libstdc++-6.dll" "magic" + cp "/msys64/${{ endsWith(matrix.only, '32') && 'mingw32' || 'mingw64' }}/bin/libcharset-1.dll" "magic" + cp "/msys64/${{ endsWith(matrix.only, '32') && 'mingw32' || 'mingw64' }}/bin/libiconv-2.dll" "magic" + + # These are needed additionally in the win32 wheel ref https://packages.msys2.org/packages/mingw-w64-i686-file + - name: Copy additional 32-bit runtime DLLs + if: runner.os == 'Windows' && endsWith(matrix.only, '32') + run: | + cp "/msys64/mingw32/bin/libgcc_s_dw2-1.dll" "magic" + cp "/msys64/mingw32/bin/libwinpthread-1.dll" "magic" + + - uses: pypa/cibuildwheel@v2.17.0 # sync version with pip install cibuildwheel above + timeout-minutes: 10 + with: + only: ${{ matrix.only }} + env: + CIBW_BUILD_VERBOSITY: 1 + # add compiled libmagic to the build directory (to include in the wheel) + CIBW_BEFORE_BUILD_MACOS: sudo -E bash add_libmagic.sh + CIBW_BEFORE_BUILD_LINUX: bash add_libmagic.sh + # build macos wheels with maximum backwards compatibility (gcc -mmacosx-version-min flag) + MACOSX_DEPLOYMENT_TARGET: ${{ ( endsWith( matrix.only, 'arm64' ) && '11.0' ) || '10.9' }} + # simple smoke test run on each wheel: this is an HLS MP4 video, only recognised in recent versions of libmagic + CIBW_TEST_COMMAND: python -c "import magic; assert magic.Magic(mime=True).from_buffer(b'\x00\x00\x00\x1cftypiso5\x00\x00\x00\x01isomiso5hlsf\x00\x00') == 'video/mp4'" + + - uses: actions/upload-artifact@v4 + with: + name: dist-${{ matrix.only }} + path: wheelhouse/*.whl + + + publish: + if: github.event_name == 'release' + needs: [build-sdist, build-wheels] + runs-on: ubuntu-latest + + permissions: + contents: write # softprops/action-gh-release + id-token: write # pypa/gh-action-pypi-publish + + steps: + - uses: actions/setup-python@v5 + with: + python-version: 3.x + + - uses: actions/download-artifact@v4 + with: + path: dist/ + pattern: dist-* + merge-multiple: true + + - run: ls -ltra dist/ + + - run: pip install --upgrade python-magic --find-links ./dist + + - name: Smoketest + run: python -c "import magic; magic.Magic()" + + - name: Upload release assets + uses: softprops/action-gh-release@v0.1.15 + with: + files: dist/* + + - name: Publish package distributions to PyPI + uses: pypa/gh-action-pypi-publish@v1.8.14 diff --git a/CHANGELOG b/CHANGELOG index a8370c6..eb7cac9 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,10 +1,14 @@ -Changes to 0.4.29: +Changes to 0.4.28: +- libmagic and magic.mgc now come bundled in the wheels on PyPI, and will be copied + into site-packages/magic along with the Python files of this library +- magic.loader.load_lib now first searches for libmagic in the same directory as the + Python files, then in the current working directory, and only then in standard paths +- magic.Magic(magic_file=...) and magic.compat.Magic.load(magic_file=...) will now + prefer "magic.mgc" in the same directory as the Python files, only if left + unspecified by the user (and the MAGIC env var is empty or not set) - support MAGIC_SYMLINK (via follow_symlink flag on Magic constructor) - correctly throw FileNotFoundException depending on flag - -Changes to 0.4.28: - - support "magic-1.dll" on Windows, which is produced by vcpkg - add python 3.10 to tox config - update test for upstream gzip extensions diff --git a/README.md b/README.md index 010cc8f..89c1807 100644 --- a/README.md +++ b/README.md @@ -3,11 +3,11 @@ [![ci](https://github.com/ahupp/python-magic/actions/workflows/ci.yml/badge.svg)](https://github.com/ahupp/python-magic/actions/workflows/ci.yml) [![Join the chat at https://gitter.im/ahupp/python-magic](https://badges.gitter.im/ahupp/python-magic.svg)](https://gitter.im/ahupp/python-magic?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge) -python-magic is a Python interface to the libmagic file type -identification library. libmagic identifies file types by checking +[python-magic](https://github.com/ahupp/python-magic) is a Python interface to the libmagic file type +identification library. libmagic identifies file types by checking their headers according to a predefined list of file types. This functionality is exposed to the command line by the Unix command -`file`. +[`file`](https://www.darwinsys.com/file/). ## Usage @@ -31,8 +31,7 @@ will fail throw if this is attempted. ```python >>> f = magic.Magic(uncompress=True) >>> f.from_file('testdata/test.gz') -'ASCII text (gzip compressed data, was "test", last modified: Sat Jun 28 -21:32:52 2008, from Unix)' +'ASCII text (gzip compressed data, was "test", last modified: Sat Jun 28 21:32:52 2008, from Unix)' ``` You can also combine the flag options: @@ -45,27 +44,53 @@ You can also combine the flag options: ## Installation -The current stable version of python-magic is available on PyPI and -can be installed by running `pip install python-magic`. +This module is a simple [CDLL](https://docs.python.org/3/library/ctypes.html) wrapper around the libmagic C library. +The current stable version of python-magic is available on [PyPI](http://pypi.python.org/pypi/python-magic/) +and can be installed by running `pip install python-magic`. -Other sources: +Compiled libmagic and the magic database come bundled in the wheels on PyPI. +You can use your own `magic.mgc` database by setting the `MAGIC` +environment variable, or by using `magic.Magic(magic_file='path/to/magic.mgc')`. +If you want to compile your own libmagic, circumvent the wheels +by installing from source: `pip install python-magic --no-binary python-magic`. -- PyPI: http://pypi.python.org/pypi/python-magic/ -- GitHub: https://github.com/ahupp/python-magic +For systems not supported by the wheels, pip installs from source, +requiring libmagic to be available before installing python-magic: -This module is a simple wrapper around the libmagic C library, and -that must be installed as well: +### Linux -### Debian/Ubuntu +The Linux wheels should run on most systems out of the box. +Depending on your system and CPU architecture, there might be no compatible wheel uploaded. +However, precompiled libmagic might still be available for your system: + +```sh +# Debian/Ubuntu +apt-get update && apt-get install -y libmagic1 +# Alpine +apk add --update libmagic +# RHEL +dnf install file-libs ``` -sudo apt-get install libmagic1 -``` + +### Windows + +The DLLs that are bundled in the Windows wheels are compiled by @julian-r +and are hosted at https://github.com/julian-r/file-windows/releases. + +For ARM64 Windows, you'll need to compile libmagic from source. ### OSX -- When using Homebrew: `brew install libmagic` -- When using macports: `port install file` +The Mac wheels are compiled with maximum backward compatibility. +For older Macs, you'll need to install libmagic from source: + +```sh +# homebrew +brew install libmagic +# macports +port install file +``` If python-magic fails to load the library it may be in a non-standard location, in which case you can set the environment variable `DYLD_LIBRARY_PATH` to point to it. @@ -78,7 +103,7 @@ If python-magic fails to load the library it may be in a non-standard location, - 'MagicException: could not find any magic files!': some installations of libmagic do not correctly point to their magic database file. Try specifying the path to the file explicitly in the - constructor: `magic.Magic(magic_file="path_to_magic_file")`. + constructor: `magic.Magic(magic_file='path/to/magic.mgc')`. - 'WindowsError: [Error 193] %1 is not a valid Win32 application': Attempting to run the 32-bit libmagic DLL in a 64-bit build of @@ -88,7 +113,6 @@ If python-magic fails to load the library it may be in a non-standard location, - 'WindowsError: exception: access violation writing 0x00000000 ' This may indicate you are mixing Windows Python and Cygwin Python. Make sure your libmagic and python builds are consistent. - ## Bug Reports python-magic is a thin layer over the libmagic C library. diff --git a/add_libmagic.sh b/add_libmagic.sh new file mode 100755 index 0000000..b1133ff --- /dev/null +++ b/add_libmagic.sh @@ -0,0 +1,58 @@ +#!/usr/bin/env bash + +set -euxo pipefail + +install_source() { + # install from source + # https://www.darwinsys.com/file/ + # https://github.com/file/file/blob/FILE5_46/INSTALL#L51 + ( + version="file-5.46" && + tmpfile="$(mktemp)" && + curl -sSLo "${tmpfile}" "https://astron.com/pub/file/${version}.tar.gz" && + tar xvf "${tmpfile}" && + cd "${version}" && + ./configure && + make && + make install && + make installcheck && + cd .. && + rm -r "${version}" + ) || ( cd .. && false ) +} + +install_precompiled() { + # Mac https://formulae.brew.sh/formula/libmagic + # Debian https://packages.ubuntu.com/libmagic1 + # Alpine https://pkgs.alpinelinux.org/package/libmagic + # RHEL https://git.almalinux.org/rpms/file + if [ -n "$(which brew)" ]; then + brew install libmagic + elif [ -n "$(which apt-get)" ]; then + apt-get update + apt-get install -y libmagic1 + elif [ -n "$(which apk)" ]; then + apk add --update libmagic + elif [ -n "$(which dnf)" ]; then + dnf --setopt install_weak_deps=false -y install file-libs + fi +} + +copy_libmagic() { + # on cibuildwheel, the lib needs to exist in the project before running setup.py + # copy lib into the magic dir, regardless of platform + # this python command relies on current working directory containing `./magic/loader.py` + libmagic_path="$(python -c 'from magic.loader import load_lib; print(load_lib()._name)')" && + cp "${libmagic_path}" "magic" && + # additionally copy compiled db into magic dir (prefer the one installed by install_source) + ( ( cp "/usr/local/share/misc/magic.mgc" "magic" || cp "/usr/share/misc/magic.mgc" "magic" ) || true ) && + # check what was copied + ls -ltra magic +} + +# skip windows (taken care of separately in wheels.yml) +python -c 'import platform; assert platform.system() != "Windows"' || ( echo "skipping on windows" && exit 0 ) +# prefer a recent build from source +install_source || install_precompiled +# files to be copied into the wheel +copy_libmagic diff --git a/magic/__init__.py b/magic/__init__.py index 851b717..210fa2f 100644 --- a/magic/__init__.py +++ b/magic/__init__.py @@ -111,6 +111,12 @@ def __init__( self.cookie = magic_open(self.flags) self.lock = threading.Lock() + if magic_file is None and not os.environ.get("MAGIC"): + # wheels package the mime database in this directory + # prefer it when no magic file is specified by the user + mime_db = os.path.join(os.path.dirname(__file__), "magic.mgc") + if os.path.exists(mime_db): + magic_file = mime_db magic_load(self.cookie, magic_file) # MAGIC_EXTENSION was added in 523 or 524, so bail if diff --git a/magic/compat.py b/magic/compat.py index 6ab9400..a0ffb09 100644 --- a/magic/compat.py +++ b/magic/compat.py @@ -4,6 +4,8 @@ Python bindings for libmagic ''' +import ctypes +import os import threading from collections import namedtuple @@ -14,6 +16,13 @@ _libraries = {} _libraries['magic'] = loader.load_lib() +magic_file = None +if not os.environ.get("MAGIC"): + # wheels package the mime database in this directory + # prefer it when no magic file is specified by the user + mime_db = os.path.join(os.path.dirname(__file__), 'magic.mgc') + if os.path.exists(mime_db): + magic_file = mime_db # Flag constants for open and setflags MAGIC_NONE = NONE = 0 @@ -190,7 +199,7 @@ def setflags(self, flags): """ return _setflags(self._magic_t, flags) - def load(self, filename=None): + def load(self, filename=magic_file): """ Must be called to load entries in the colon separated list of database files passed as argument or the default database file if no argument diff --git a/magic/loader.py b/magic/loader.py index f8d59fa..c084298 100644 --- a/magic/loader.py +++ b/magic/loader.py @@ -2,34 +2,57 @@ import ctypes import sys import glob -import os.path +import os import logging +import subprocess logger = logging.getLogger(__name__) +here = os.path.dirname(__file__) def _lib_candidates_linux(): - """Yield possible libmagic library names on Linux. - - This is necessary because alpine is bad - """ - yield "libmagic.so.1" + """Yield possible libmagic library names on Linux.""" + fnames = ("libmagic.so.1", "libmagic.so") + + for fname in fnames: + # libmagic bundled in the wheel + yield os.path.join(here, fname) + # libmagic in the current working directory + yield os.path.join(os.path.abspath("."), fname) + # libmagic install from source default destination path + yield os.path.join("/usr/local/lib", fname) + # on some linux systems (musl/alpine), find_library('magic') returns None + # first try finding libmagic using ldconfig + # otherwise fall back to /usr/lib/ + yield subprocess.check_output( + "( ldconfig -p | grep '{0}' | grep -o '/.*' ) || echo '/usr/lib/{0}'".format( + fname + ), + shell=True, + universal_newlines=True, + ).strip() def _lib_candidates_macos(): """Yield possible libmagic library names on macOS.""" paths = [ - "/opt/homebrew/lib", + # libmagic bundled in the wheel + here, + # libmagic in the current working directory + os.path.abspath("."), + # libmagic in other common sources like homebrew "/opt/local/lib", "/usr/local/lib", + "/opt/homebrew/lib", ] + glob.glob("/usr/local/Cellar/libmagic/*/lib") + for path in paths: yield os.path.join(path, "libmagic.dylib") def _lib_candidates_windows(): """Yield possible libmagic library names on Windows.""" - prefixes = ( + fnames = ( "libmagic", "magic1", "magic-1", @@ -37,16 +60,17 @@ def _lib_candidates_windows(): "libmagic-1", "msys-magic-1", ) - for prefix in prefixes: - # find_library searches in %PATH% but not the current directory, - # so look for both - yield "./%s.dll" % (prefix,) - yield find_library(prefix) + for fname in fnames: + # libmagic bundled in the wheel + yield os.path.join(here, "%s.dll" % fname) + # libmagic in the current working directory + yield os.path.join(os.path.abspath("."), "%s.dll" % fname) + # find_library searches in %PATH% but not the current directory + yield find_library(fname) -def _lib_candidates(): - yield find_library("magic") +def _lib_candidates(): func = { "cygwin": _lib_candidates_windows, "darwin": _lib_candidates_macos, @@ -60,6 +84,9 @@ def _lib_candidates(): for path in func(): yield path + # fallback + yield find_library("magic") + def load_lib(): exc = [] @@ -77,5 +104,5 @@ def load_lib(): # It is better to raise an ImportError since we are importing magic module raise ImportError( - "python-magic: failed to find libmagic. Check your installation: \n" + msg + "python-magic: failed to find libmagic. Check your installation: \n" + msg ) diff --git a/setup.py b/setup.py index 54aff08..8220e25 100644 --- a/setup.py +++ b/setup.py @@ -4,6 +4,7 @@ import setuptools import io import os +import sys def read(file_name): @@ -14,6 +15,29 @@ def read(file_name): return f.read() +def get_cmdclass(): + """Build a platform-specific wheel when `setup.py bdist_wheel` is called.""" + if sys.version_info[0] == 2: + return {} + + try: + from wheel.bdist_wheel import bdist_wheel + except ImportError: + return {} + + class bdist_wheel_platform_specific(bdist_wheel): + def get_tag(self): + python, abi, _ = super().get_tag() + # get the platform tag based on libmagic included in this wheel + self.root_is_pure = False + _, _, plat = super().get_tag() + return python, abi, plat + + return {"bdist_wheel": bdist_wheel_platform_specific} + + +cmdclass = get_cmdclass() + setuptools.setup( name="python-magic", description="File type identification using libmagic", @@ -25,8 +49,9 @@ def read(file_name): long_description_content_type="text/markdown", packages=["magic"], package_data={ - "magic": ["py.typed", "*.pyi", "**/*.pyi"], + "magic": ["py.typed", "*.pyi", "*.dylib*", "*.dll", "*.so*", "magic.mgc"] }, + cmdclass=cmdclass, keywords="mime magic file", license="MIT", python_requires=">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*", diff --git a/test/python_magic_test.py b/test/python_magic_test.py index 5076044..31d9c35 100755 --- a/test/python_magic_test.py +++ b/test/python_magic_test.py @@ -5,10 +5,8 @@ import shutil import sys import tempfile -from typing import List, Union import unittest - -import pytest +from typing import List, Optional # for output which reports a local time os.environ["TZ"] = "GMT" @@ -26,7 +24,7 @@ class TestFile: file_name: str mime_results: List[str] text_results: List[str] - no_check_elf_results: Union[List[str], None] + no_check_elf_results: Optional[List[str]] buf_equals_file: bool = True @@ -90,7 +88,7 @@ class TestFile: ], b"test.snappy.parquet": [ (COMMON_MIME, ["application/octet-stream"]), - (COMMON_PLAIN, ["Apache Parquet", "Par archive data"]), + (COMMON_PLAIN, ["data", "Apache Parquet", "Par archive data"]), (NO_SOFT, ["data"]), ], b"test.json": [