Source code for pandas.io.gbq

""" Google BigQuery support """
from __future__ import annotations

from typing import (
    TYPE_CHECKING,
    Any,
)

from pandas.compat._optional import import_optional_dependency

if TYPE_CHECKING:
    import google.auth

    from pandas import DataFrame


def _try_import():
    # since pandas is a dependency of pandas-gbq
    # we need to import on first use
    msg = (
        "pandas-gbq is required to load data from Google BigQuery. "
        "See the docs: https://pandas-gbq.readthedocs.io."
    )
    pandas_gbq = import_optional_dependency("pandas_gbq", extra=msg)
    return pandas_gbq


[docs]def read_gbq( query: str, project_id: str | None = None, index_col: str | None = None, col_order: list[str] | None = None, reauth: bool = False, auth_local_webserver: bool = True, dialect: str | None = None, location: str | None = None, configuration: dict[str, Any] | None = None, credentials: google.auth.credentials.Credentials | None = None, use_bqstorage_api: bool | None = None, max_results: int | None = None, progress_bar_type: str | None = None, ) -> DataFrame: """ Load data from Google BigQuery. This function requires the `pandas-gbq package <https://pandas-gbq.readthedocs.io>`__. See the `How to authenticate with Google BigQuery <https://pandas-gbq.readthedocs.io/en/latest/howto/authentication.html>`__ guide for authentication instructions. Parameters ---------- query : str SQL-Like Query to return data values. project_id : str, optional Google BigQuery Account project ID. Optional when available from the environment. index_col : str, optional Name of result column to use for index in results DataFrame. col_order : list(str), optional List of BigQuery column names in the desired order for results DataFrame. reauth : bool, default False Force Google BigQuery to re-authenticate the user. This is useful if multiple accounts are used. auth_local_webserver : bool, default True Use the `local webserver flow`_ instead of the `console flow`_ when getting user credentials. .. _local webserver flow: https://google-auth-oauthlib.readthedocs.io/en/latest/reference/google_auth_oauthlib.flow.html#google_auth_oauthlib.flow.InstalledAppFlow.run_local_server .. _console flow: https://google-auth-oauthlib.readthedocs.io/en/latest/reference/google_auth_oauthlib.flow.html#google_auth_oauthlib.flow.InstalledAppFlow.run_console *New in version 0.2.0 of pandas-gbq*. .. versionchanged:: 1.5.0 Default value is changed to ``True``. Google has deprecated the ``auth_local_webserver = False`` `"out of band" (copy-paste) flow <https://developers.googleblog.com/2022/02/making-oauth-flows-safer.html?m=1#disallowed-oob>`_. dialect : str, default 'legacy' Note: The default value is changing to 'standard' in a future version. SQL syntax dialect to use. Value can be one of: ``'legacy'`` Use BigQuery's legacy SQL dialect. For more information see `BigQuery Legacy SQL Reference <https://cloud.google.com/bigquery/docs/reference/legacy-sql>`__. ``'standard'`` Use BigQuery's standard SQL, which is compliant with the SQL 2011 standard. For more information see `BigQuery Standard SQL Reference <https://cloud.google.com/bigquery/docs/reference/standard-sql/>`__. location : str, optional Location where the query job should run. See the `BigQuery locations documentation <https://cloud.google.com/bigquery/docs/dataset-locations>`__ for a list of available locations. The location must match that of any datasets used in the query. *New in version 0.5.0 of pandas-gbq*. configuration : dict, optional Query config parameters for job processing. For example: configuration = {'query': {'useQueryCache': False}} For more information see `BigQuery REST API Reference <https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query>`__. credentials : google.auth.credentials.Credentials, optional Credentials for accessing Google APIs. Use this parameter to override default credentials, such as to use Compute Engine :class:`google.auth.compute_engine.Credentials` or Service Account :class:`google.oauth2.service_account.Credentials` directly. *New in version 0.8.0 of pandas-gbq*. use_bqstorage_api : bool, default False Use the `BigQuery Storage API <https://cloud.google.com/bigquery/docs/reference/storage/>`__ to download query results quickly, but at an increased cost. To use this API, first `enable it in the Cloud Console <https://console.cloud.google.com/apis/library/bigquerystorage.googleapis.com>`__. You must also have the `bigquery.readsessions.create <https://cloud.google.com/bigquery/docs/access-control#roles>`__ permission on the project you are billing queries to. This feature requires version 0.10.0 or later of the ``pandas-gbq`` package. It also requires the ``google-cloud-bigquery-storage`` and ``fastavro`` packages. max_results : int, optional If set, limit the maximum number of rows to fetch from the query results. progress_bar_type : Optional, str If set, use the `tqdm <https://tqdm.github.io/>`__ library to display a progress bar while the data downloads. Install the ``tqdm`` package to use this feature. Possible values of ``progress_bar_type`` include: ``None`` No progress bar. ``'tqdm'`` Use the :func:`tqdm.tqdm` function to print a progress bar to :data:`sys.stderr`. ``'tqdm_notebook'`` Use the :func:`tqdm.tqdm_notebook` function to display a progress bar as a Jupyter notebook widget. ``'tqdm_gui'`` Use the :func:`tqdm.tqdm_gui` function to display a progress bar as a graphical dialog box. Returns ------- df: DataFrame DataFrame representing results of query. See Also -------- pandas_gbq.read_gbq : This function in the pandas-gbq library. DataFrame.to_gbq : Write a DataFrame to Google BigQuery. Examples -------- Example taken from `Google BigQuery documentation <https://cloud.google.com/bigquery/docs/pandas-gbq-migration>`_ >>> sql = "SELECT name FROM table_name WHERE state = 'TX' LIMIT 100;" >>> df = pd.read_gbq(sql, dialect="standard") # doctest: +SKIP >>> project_id = "your-project-id" # doctest: +SKIP >>> df = pd.read_gbq(sql, ... project_id=project_id, ... dialect="standard" ... ) # doctest: +SKIP """ pandas_gbq = _try_import() kwargs: dict[str, str | bool | int | None] = {} # START: new kwargs. Don't populate unless explicitly set. if use_bqstorage_api is not None: kwargs["use_bqstorage_api"] = use_bqstorage_api if max_results is not None: kwargs["max_results"] = max_results kwargs["progress_bar_type"] = progress_bar_type # END: new kwargs return pandas_gbq.read_gbq( query, project_id=project_id, index_col=index_col, col_order=col_order, reauth=reauth, auth_local_webserver=auth_local_webserver, dialect=dialect, location=location, configuration=configuration, credentials=credentials, **kwargs, )
def to_gbq( dataframe: DataFrame, destination_table: str, project_id: str | None = None, chunksize: int | None = None, reauth: bool = False, if_exists: str = "fail", auth_local_webserver: bool = True, table_schema: list[dict[str, str]] | None = None, location: str | None = None, progress_bar: bool = True, credentials: google.auth.credentials.Credentials | None = None, ) -> None: pandas_gbq = _try_import() pandas_gbq.to_gbq( dataframe, destination_table, project_id=project_id, chunksize=chunksize, reauth=reauth, if_exists=if_exists, auth_local_webserver=auth_local_webserver, table_schema=table_schema, location=location, progress_bar=progress_bar, credentials=credentials, )