258 lines
		
	
	
		
			8.6 KiB
		
	
	
	
		
			Python
		
	
	
	
			
		
		
	
	
			258 lines
		
	
	
		
			8.6 KiB
		
	
	
	
		
			Python
		
	
	
	
"""Utilities related archives.
 | 
						|
"""
 | 
						|
 | 
						|
import logging
 | 
						|
import os
 | 
						|
import shutil
 | 
						|
import stat
 | 
						|
import tarfile
 | 
						|
import zipfile
 | 
						|
from typing import Iterable, List, Optional
 | 
						|
from zipfile import ZipInfo
 | 
						|
 | 
						|
from pip._internal.exceptions import InstallationError
 | 
						|
from pip._internal.utils.filetypes import (
 | 
						|
    BZ2_EXTENSIONS,
 | 
						|
    TAR_EXTENSIONS,
 | 
						|
    XZ_EXTENSIONS,
 | 
						|
    ZIP_EXTENSIONS,
 | 
						|
)
 | 
						|
from pip._internal.utils.misc import ensure_dir
 | 
						|
 | 
						|
logger = logging.getLogger(__name__)
 | 
						|
 | 
						|
 | 
						|
SUPPORTED_EXTENSIONS = ZIP_EXTENSIONS + TAR_EXTENSIONS
 | 
						|
 | 
						|
try:
 | 
						|
    import bz2  # noqa
 | 
						|
 | 
						|
    SUPPORTED_EXTENSIONS += BZ2_EXTENSIONS
 | 
						|
except ImportError:
 | 
						|
    logger.debug("bz2 module is not available")
 | 
						|
 | 
						|
try:
 | 
						|
    # Only for Python 3.3+
 | 
						|
    import lzma  # noqa
 | 
						|
 | 
						|
    SUPPORTED_EXTENSIONS += XZ_EXTENSIONS
 | 
						|
except ImportError:
 | 
						|
    logger.debug("lzma module is not available")
 | 
						|
 | 
						|
 | 
						|
def current_umask() -> int:
 | 
						|
    """Get the current umask which involves having to set it temporarily."""
 | 
						|
    mask = os.umask(0)
 | 
						|
    os.umask(mask)
 | 
						|
    return mask
 | 
						|
 | 
						|
 | 
						|
def split_leading_dir(path: str) -> List[str]:
 | 
						|
    path = path.lstrip("/").lstrip("\\")
 | 
						|
    if "/" in path and (
 | 
						|
        ("\\" in path and path.find("/") < path.find("\\")) or "\\" not in path
 | 
						|
    ):
 | 
						|
        return path.split("/", 1)
 | 
						|
    elif "\\" in path:
 | 
						|
        return path.split("\\", 1)
 | 
						|
    else:
 | 
						|
        return [path, ""]
 | 
						|
 | 
						|
 | 
						|
def has_leading_dir(paths: Iterable[str]) -> bool:
 | 
						|
    """Returns true if all the paths have the same leading path name
 | 
						|
    (i.e., everything is in one subdirectory in an archive)"""
 | 
						|
    common_prefix = None
 | 
						|
    for path in paths:
 | 
						|
        prefix, rest = split_leading_dir(path)
 | 
						|
        if not prefix:
 | 
						|
            return False
 | 
						|
        elif common_prefix is None:
 | 
						|
            common_prefix = prefix
 | 
						|
        elif prefix != common_prefix:
 | 
						|
            return False
 | 
						|
    return True
 | 
						|
 | 
						|
 | 
						|
def is_within_directory(directory: str, target: str) -> bool:
 | 
						|
    """
 | 
						|
    Return true if the absolute path of target is within the directory
 | 
						|
    """
 | 
						|
    abs_directory = os.path.abspath(directory)
 | 
						|
    abs_target = os.path.abspath(target)
 | 
						|
 | 
						|
    prefix = os.path.commonprefix([abs_directory, abs_target])
 | 
						|
    return prefix == abs_directory
 | 
						|
 | 
						|
 | 
						|
def set_extracted_file_to_default_mode_plus_executable(path: str) -> None:
 | 
						|
    """
 | 
						|
    Make file present at path have execute for user/group/world
 | 
						|
    (chmod +x) is no-op on windows per python docs
 | 
						|
    """
 | 
						|
    os.chmod(path, (0o777 & ~current_umask() | 0o111))
 | 
						|
 | 
						|
 | 
						|
def zip_item_is_executable(info: ZipInfo) -> bool:
 | 
						|
    mode = info.external_attr >> 16
 | 
						|
    # if mode and regular file and any execute permissions for
 | 
						|
    # user/group/world?
 | 
						|
    return bool(mode and stat.S_ISREG(mode) and mode & 0o111)
 | 
						|
 | 
						|
 | 
						|
def unzip_file(filename: str, location: str, flatten: bool = True) -> None:
 | 
						|
    """
 | 
						|
    Unzip the file (with path `filename`) to the destination `location`.  All
 | 
						|
    files are written based on system defaults and umask (i.e. permissions are
 | 
						|
    not preserved), except that regular file members with any execute
 | 
						|
    permissions (user, group, or world) have "chmod +x" applied after being
 | 
						|
    written. Note that for windows, any execute changes using os.chmod are
 | 
						|
    no-ops per the python docs.
 | 
						|
    """
 | 
						|
    ensure_dir(location)
 | 
						|
    zipfp = open(filename, "rb")
 | 
						|
    try:
 | 
						|
        zip = zipfile.ZipFile(zipfp, allowZip64=True)
 | 
						|
        leading = has_leading_dir(zip.namelist()) and flatten
 | 
						|
        for info in zip.infolist():
 | 
						|
            name = info.filename
 | 
						|
            fn = name
 | 
						|
            if leading:
 | 
						|
                fn = split_leading_dir(name)[1]
 | 
						|
            fn = os.path.join(location, fn)
 | 
						|
            dir = os.path.dirname(fn)
 | 
						|
            if not is_within_directory(location, fn):
 | 
						|
                message = (
 | 
						|
                    "The zip file ({}) has a file ({}) trying to install "
 | 
						|
                    "outside target directory ({})"
 | 
						|
                )
 | 
						|
                raise InstallationError(message.format(filename, fn, location))
 | 
						|
            if fn.endswith("/") or fn.endswith("\\"):
 | 
						|
                # A directory
 | 
						|
                ensure_dir(fn)
 | 
						|
            else:
 | 
						|
                ensure_dir(dir)
 | 
						|
                # Don't use read() to avoid allocating an arbitrarily large
 | 
						|
                # chunk of memory for the file's content
 | 
						|
                fp = zip.open(name)
 | 
						|
                try:
 | 
						|
                    with open(fn, "wb") as destfp:
 | 
						|
                        shutil.copyfileobj(fp, destfp)
 | 
						|
                finally:
 | 
						|
                    fp.close()
 | 
						|
                    if zip_item_is_executable(info):
 | 
						|
                        set_extracted_file_to_default_mode_plus_executable(fn)
 | 
						|
    finally:
 | 
						|
        zipfp.close()
 | 
						|
 | 
						|
 | 
						|
def untar_file(filename: str, location: str) -> None:
 | 
						|
    """
 | 
						|
    Untar the file (with path `filename`) to the destination `location`.
 | 
						|
    All files are written based on system defaults and umask (i.e. permissions
 | 
						|
    are not preserved), except that regular file members with any execute
 | 
						|
    permissions (user, group, or world) have "chmod +x" applied after being
 | 
						|
    written.  Note that for windows, any execute changes using os.chmod are
 | 
						|
    no-ops per the python docs.
 | 
						|
    """
 | 
						|
    ensure_dir(location)
 | 
						|
    if filename.lower().endswith(".gz") or filename.lower().endswith(".tgz"):
 | 
						|
        mode = "r:gz"
 | 
						|
    elif filename.lower().endswith(BZ2_EXTENSIONS):
 | 
						|
        mode = "r:bz2"
 | 
						|
    elif filename.lower().endswith(XZ_EXTENSIONS):
 | 
						|
        mode = "r:xz"
 | 
						|
    elif filename.lower().endswith(".tar"):
 | 
						|
        mode = "r"
 | 
						|
    else:
 | 
						|
        logger.warning(
 | 
						|
            "Cannot determine compression type for file %s",
 | 
						|
            filename,
 | 
						|
        )
 | 
						|
        mode = "r:*"
 | 
						|
    tar = tarfile.open(filename, mode, encoding="utf-8")
 | 
						|
    try:
 | 
						|
        leading = has_leading_dir([member.name for member in tar.getmembers()])
 | 
						|
        for member in tar.getmembers():
 | 
						|
            fn = member.name
 | 
						|
            if leading:
 | 
						|
                fn = split_leading_dir(fn)[1]
 | 
						|
            path = os.path.join(location, fn)
 | 
						|
            if not is_within_directory(location, path):
 | 
						|
                message = (
 | 
						|
                    "The tar file ({}) has a file ({}) trying to install "
 | 
						|
                    "outside target directory ({})"
 | 
						|
                )
 | 
						|
                raise InstallationError(message.format(filename, path, location))
 | 
						|
            if member.isdir():
 | 
						|
                ensure_dir(path)
 | 
						|
            elif member.issym():
 | 
						|
                try:
 | 
						|
                    tar._extract_member(member, path)
 | 
						|
                except Exception as exc:
 | 
						|
                    # Some corrupt tar files seem to produce this
 | 
						|
                    # (specifically bad symlinks)
 | 
						|
                    logger.warning(
 | 
						|
                        "In the tar file %s the member %s is invalid: %s",
 | 
						|
                        filename,
 | 
						|
                        member.name,
 | 
						|
                        exc,
 | 
						|
                    )
 | 
						|
                    continue
 | 
						|
            else:
 | 
						|
                try:
 | 
						|
                    fp = tar.extractfile(member)
 | 
						|
                except (KeyError, AttributeError) as exc:
 | 
						|
                    # Some corrupt tar files seem to produce this
 | 
						|
                    # (specifically bad symlinks)
 | 
						|
                    logger.warning(
 | 
						|
                        "In the tar file %s the member %s is invalid: %s",
 | 
						|
                        filename,
 | 
						|
                        member.name,
 | 
						|
                        exc,
 | 
						|
                    )
 | 
						|
                    continue
 | 
						|
                ensure_dir(os.path.dirname(path))
 | 
						|
                assert fp is not None
 | 
						|
                with open(path, "wb") as destfp:
 | 
						|
                    shutil.copyfileobj(fp, destfp)
 | 
						|
                fp.close()
 | 
						|
                # Update the timestamp (useful for cython compiled files)
 | 
						|
                tar.utime(member, path)
 | 
						|
                # member have any execute permissions for user/group/world?
 | 
						|
                if member.mode & 0o111:
 | 
						|
                    set_extracted_file_to_default_mode_plus_executable(path)
 | 
						|
    finally:
 | 
						|
        tar.close()
 | 
						|
 | 
						|
 | 
						|
def unpack_file(
 | 
						|
    filename: str,
 | 
						|
    location: str,
 | 
						|
    content_type: Optional[str] = None,
 | 
						|
) -> None:
 | 
						|
    filename = os.path.realpath(filename)
 | 
						|
    if (
 | 
						|
        content_type == "application/zip"
 | 
						|
        or filename.lower().endswith(ZIP_EXTENSIONS)
 | 
						|
        or zipfile.is_zipfile(filename)
 | 
						|
    ):
 | 
						|
        unzip_file(filename, location, flatten=not filename.endswith(".whl"))
 | 
						|
    elif (
 | 
						|
        content_type == "application/x-gzip"
 | 
						|
        or tarfile.is_tarfile(filename)
 | 
						|
        or filename.lower().endswith(TAR_EXTENSIONS + BZ2_EXTENSIONS + XZ_EXTENSIONS)
 | 
						|
    ):
 | 
						|
        untar_file(filename, location)
 | 
						|
    else:
 | 
						|
        # FIXME: handle?
 | 
						|
        # FIXME: magic signatures?
 | 
						|
        logger.critical(
 | 
						|
            "Cannot unpack file %s (downloaded from %s, content-type: %s); "
 | 
						|
            "cannot detect archive format",
 | 
						|
            filename,
 | 
						|
            location,
 | 
						|
            content_type,
 | 
						|
        )
 | 
						|
        raise InstallationError(f"Cannot determine archive format of {location}")
 |