38 lines
1.2 KiB
Python
38 lines
1.2 KiB
Python
""" s3 support for remote file interactivity """
|
|
from urllib.parse import urlparse as parse_url
|
|
|
|
from pandas.compat._optional import import_optional_dependency
|
|
|
|
s3fs = import_optional_dependency(
|
|
"s3fs", extra="The s3fs package is required to handle s3 files."
|
|
)
|
|
|
|
|
|
def _strip_schema(url):
|
|
"""Returns the url without the s3:// part"""
|
|
result = parse_url(url, allow_fragments=False)
|
|
return result.netloc + result.path
|
|
|
|
|
|
def get_filepath_or_buffer(
|
|
filepath_or_buffer, encoding=None, compression=None, mode=None
|
|
):
|
|
from botocore.exceptions import NoCredentialsError
|
|
|
|
if mode is None:
|
|
mode = "rb"
|
|
|
|
fs = s3fs.S3FileSystem(anon=False)
|
|
try:
|
|
filepath_or_buffer = fs.open(_strip_schema(filepath_or_buffer), mode)
|
|
except (FileNotFoundError, NoCredentialsError):
|
|
# boto3 has troubles when trying to access a public file
|
|
# when credentialed...
|
|
# An OSError is raised if you have credentials, but they
|
|
# aren't valid for that bucket.
|
|
# A NoCredentialsError is raised if you don't have creds
|
|
# for that bucket.
|
|
fs = s3fs.S3FileSystem(anon=True)
|
|
filepath_or_buffer = fs.open(_strip_schema(filepath_or_buffer), mode)
|
|
return filepath_or_buffer, None, compression, True
|