작업 순서 (Cloud Shell 기준)

pip install --user google-cloud-storage

nano load_yellow_taxi_data.py

# BUCKET_NAME 수정
# client = storage.Client() 확인

Ctrl + O
Enter
Ctrl + X

python load_yellow_taxi_data.py

그 다음 바로 확인 👇

gsutil ls gs://내 버킷 이름/

쿼리 테이블 셋업

CREATE SCHEMA IF NOT EXISTS `taxi_hw`;

CREATE OR REPLACE EXTERNAL TABLE `taxi_hw.yellow_taxi_external`
OPTIONS (
  format = 'PARQUET',
  uris = [
    'gs://yellow-taxi-hw-2024/yellow_tripdata_2024-01.parquet',
    'gs://yellow-taxi-hw-2024/yellow_tripdata_2024-02.parquet',
    'gs://yellow-taxi-hw-2024/yellow_tripdata_2024-03.parquet',
    'gs://yellow-taxi-hw-2024/yellow_tripdata_2024-04.parquet',
    'gs://yellow-taxi-hw-2024/yellow_tripdata_2024-05.parquet',
    'gs://yellow-taxi-hw-2024/yellow_tripdata_2024-06.parquet'
  ]
);

Question 1. Counting records

Question 2. Data read estimation