Source code for hfutils.cache.delete

import logging
from functools import partial
from typing import Optional, Callable

from huggingface_hub import scan_cache_dir, CachedRepoInfo, CachedRevisionInfo, DeleteCacheStrategy, CacheNotFound


def _collect_revisions(fn: Callable[[CachedRepoInfo, CachedRevisionInfo], bool], cache_dir: Optional[str] = None) \
        -> DeleteCacheStrategy:
    """
    Collects revisions from the cache that match a specific condition defined by the function `fn`.

    :param fn: A callable that takes a CachedRepoInfo and CachedRevisionInfo and returns a boolean.
               If True, the revision matches the condition.
    :type fn: Callable[[CachedRepoInfo, CachedRevisionInfo], bool]
    :param cache_dir: The directory where the cache is stored. If None, uses the default directory.
    :type cache_dir: Optional[str]
    :return: A strategy for deleting the collected revisions.
    :rtype: DeleteCacheStrategy
    """
    revision_hashes = set()
    scan = scan_cache_dir(cache_dir=cache_dir)
    for repo in scan.repos:
        for revision in repo.revisions:
            if fn(repo, revision):
                revision_hashes.add(revision.commit_hash)
    logging.info(f'Revisions to delete from huggingface cache: {sorted(revision_hashes)}')
    return scan.delete_revisions(*revision_hashes)


def _is_repo_match(repo: CachedRepoInfo, repo_id: Optional[str] = None, repo_type: Optional[str] = None) -> bool:
    """
    Checks if a repository matches the specified ID and type.

    :param repo: The repository information.
    :type repo: CachedRepoInfo
    :param repo_id: The ID of the repository to match. If None, matches any ID.
    :type repo_id: Optional[str]
    :param repo_type: The type of the repository to match. If None, matches any type.
    :type repo_type: Optional[str]
    :return: True if the repository matches the conditions, False otherwise.
    :rtype: bool
    """
    if repo_id and repo_type:
        return repo.repo_id == repo_id and repo.repo_type == repo_type
    elif repo_id:
        return repo.repo_id == repo_id
    elif repo_type:
        return repo.repo_type == repo_type
    else:
        return True


def _is_detached_revision(
        repo: CachedRepoInfo, revision: CachedRevisionInfo,
        repo_id: Optional[str] = None, repo_type: Optional[str] = None,
) -> bool:
    """
    Determines if a revision is detached (no references) and matches the specified repository ID and type.

    :param repo: The repository information.
    :type repo: CachedRepoInfo
    :param revision: The revision information.
    :type revision: CachedRevisionInfo
    :param repo_id: The repository ID to match. If None, matches any ID.
    :type repo_id: Optional[str]
    :param repo_type: The repository type to match. If None, matches any type.
    :type repo_type: Optional[str]
    :return: True if the revision is detached and matches the repository conditions, False otherwise.
    :rtype: bool
    """
    if len(revision.refs) == 0:
        return _is_repo_match(repo, repo_id, repo_type)
    else:
        return False


[docs]def delete_detached_cache( repo_id: Optional[str] = None, repo_type: Optional[str] = None, cache_dir: Optional[str] = None ): """ Deletes all detached revisions from the cache that match the specified repository ID and type. :param repo_id: The repository ID to match. If None, matches any ID. :type repo_id: Optional[str] :param repo_type: The repository type to match. If None, matches any type. :type repo_type: Optional[str] :param cache_dir: The directory where the cache is stored. If None, uses the default directory. :type cache_dir: Optional[str] """ try: # noinspection PyTypeChecker strategy = _collect_revisions( fn=partial( _is_detached_revision, repo_id=repo_id, repo_type=repo_type, ), cache_dir=cache_dir, ) logging.info(f'{strategy.expected_freed_size_str} space will be freed.') strategy.execute() except CacheNotFound: logging.info('No cache file found.')
def _is_selected_revision( repo: CachedRepoInfo, revision: CachedRevisionInfo, repo_id: Optional[str] = None, repo_type: Optional[str] = None, ) -> bool: """ Determines if a revision matches the specified repository ID and type. :param repo: The repository information. :type repo: CachedRepoInfo :param revision: The revision information. :type revision: CachedRevisionInfo :param repo_id: The repository ID to match. If None, matches any ID. :type repo_id: Optional[str] :param repo_type: The repository type to match. If None, matches any type. :type repo_type: Optional[str] :return: True if the revision matches the repository conditions, False otherwise. :rtype: bool """ _ = repo, revision return _is_repo_match(repo, repo_id, repo_type)
[docs]def delete_cache( repo_id: Optional[str] = None, repo_type: Optional[str] = None, cache_dir: Optional[str] = None ): """ Deletes all revisions from the cache that match the specified repository ID and type. :param repo_id: The repository ID to match. If None, matches any ID. :type repo_id: Optional[str] :param repo_type: The repository type to match. If None, matches any type. :type repo_type: Optional[str] :param cache_dir: The directory where the cache is stored. If None, uses the default directory. :type cache_dir: Optional[str] """ try: # noinspection PyTypeChecker strategy = _collect_revisions( fn=partial( _is_selected_revision, repo_id=repo_id, repo_type=repo_type, ), cache_dir=cache_dir, ) logging.info(f'{strategy.expected_freed_size_str} space will be freed.') strategy.execute() except CacheNotFound: logging.info('No cache file found.')