pypi_browser/pypi.py

import base64
import contextlib
import dataclasses
import itertools
import os.path
import typing
import urllib.parse

import aiofiles.os
import httpx


@dataclasses.dataclass(frozen=True)
class PyPIConfig:
    cache_path: str
    pypi_url: str


class PackageDoesNotExist(Exception):
    pass


async def package_metadata(config: PyPIConfig, client: httpx.AsyncClient, package: str) -> typing.Dict:
    resp = await client.get(f'{config.pypi_url}/pypi/{package}/json')
    if resp.status_code == 404:
        raise PackageDoesNotExist(package)
    resp.raise_for_status()
    return resp.json()


async def files_for_package(config: PyPIConfig, package: str) -> typing.Dict[str, typing.Set[str]]:
    async with httpx.AsyncClient() as client:
        metadata = await package_metadata(config, client, package)

    return {
        version: {file_['filename'] for file_ in files}
        for version, files in metadata['releases'].items()
        if len(files) > 0
    }


class CannotFindFileError(Exception):
    pass


def _storage_path(config: PyPIConfig, package: str, filename: str) -> str:
    return os.path.join(
        config.cache_path,
        # Base64-encoding the names to calculate the storage path just to be
        # extra sure to avoid any path traversal vulnerabilities.
        base64.urlsafe_b64encode(package.encode('utf8')).decode('ascii'),
        base64.urlsafe_b64encode(filename.encode('utf8')).decode('ascii'),
    )


@contextlib.asynccontextmanager
async def _atomic_file(path: str, mode: str = 'w') -> typing.Any:
    async with aiofiles.tempfile.NamedTemporaryFile(mode, dir=os.path.dirname(path), delete=False) as f:
        try:
            yield f
        except BaseException:
            await aiofiles.os.remove(f.name)
            raise
        else:
            # This is atomic since the temporary file was created in the same directory.
            await aiofiles.os.rename(f.name, path)


async def downloaded_file_path(config: PyPIConfig, package: str, filename: str) -> str:
    """Return path on filesystem to downloaded PyPI file.

    May be instant if the file is already cached; otherwise it will download
    it and may take a while.
    """
    stored_path = _storage_path(config, package, filename)
    if await aiofiles.os.path.exists(stored_path):
        return stored_path

    async with httpx.AsyncClient() as client:
        metadata = await package_metadata(config, client, package)

        # Parsing versions from non-wheel Python packages isn't perfectly
        # reliable, so just search through all releases until we find a
        # matching file.
        for file_ in itertools.chain.from_iterable(metadata['releases'].values()):
            if file_['filename'] == filename:
                url = urllib.parse.urljoin(config.pypi_url, file_['url'])
                break
        else:
            raise CannotFindFileError(package, filename)

        await aiofiles.os.makedirs(os.path.dirname(stored_path), exist_ok=True)

        async with _atomic_file(stored_path, 'wb') as f:
            async with client.stream('GET', url) as resp:
                resp.raise_for_status()
                async for chunk in resp.aiter_bytes():
                    await f.write(chunk)

        return stored_path
Metadata
View Raw File