finn_frotscher
10/07/2021, 8:43 AMmeltano invoke airflow run
Traceback (most recent call last):
File "/Users/finn/Library/Python/3.8/lib/python/site-packages/meltano/cli/__init__.py", line 52, in main
raise CliError(str(err)) from err
meltano.cli.utils.CliError: Airflow metadata database could not be initialized: `airflow initdb` failed
Airflow metadata database could not be initialized: `airflow initdb` failed
finn_frotscher
10/07/2021, 3:04 PMamanda.folson
10/07/2021, 3:28 PMmeltano --log-level=debug
casey
10/07/2021, 5:11 PMdocker-compose up
?finn_frotscher
10/07/2021, 5:31 PMfinn_frotscher
10/07/2021, 5:32 PMcasey
10/07/2021, 5:33 PMdocker-compose.yml
file?casey
10/07/2021, 5:34 PMorchestrators
section of your meltano.yml
filefinn_frotscher
10/08/2021, 11:15 AMx-meltano-image: &meltano-image
image: meltano-bi:prod # Change me to a name and tag that makes sense for your project
build: ./meltano
x-meltano-env: &meltano-env
MELTANO_DATABASE_URI: postgresql://${POSTGRES_USER}:${MELTANO_POSTGRES_PASSWORD}@meltano-system-db/${MELTANO_POSTGRES_DB}
x-airflow-env: &airflow-env
AIRFLOW__CORE__SQL_ALCHEMY_CONN: postgres://${POSTGRES_USER}:${AIRFLOW_POSTGRES_PASSWORD}@airflow-metadata-db/${AIRFLOW_POSTGRES_DB}
AIRFLOW__CORE__EXECUTOR: LocalExecutor
x-meltano-postgres-env: &meltano-postgres-env
POSTGRES_USER: ${POSTGRES_USER}
POSTGRES_PASSWORD: ${MELTANO_POSTGRES_PASSWORD}
POSTGRES_DB: ${MELTANO_POSTGRES_DB}
PGDATA: /var/lib/postgresql/data/pgdata
x-airflow-postgres-env: &airflow-postgres-env
POSTGRES_USER: ${POSTGRES_USER}
POSTGRES_PASSWORD: ${AIRFLOW_POSTGRES_PASSWORD}
POSTGRES_DB: ${AIRFLOW_POSTGRES_DB}
PGDATA: /var/lib/postgresql/data/pgdata
services:
meltano-ui:
<<: *meltano-image
command: ui
environment:
<<: *meltano-env
volumes:
- meltano_elt_logs_data:/project/.meltano/logs/elt
expose:
- 5000
ports:
- 5000:5000
depends_on:
- meltano-system-db
networks:
- meltano
restart: unless-stopped
labels:
- "traefik.enable=true"
- "traefik.http.routers.whoami.rule=Host(``)"
- "traefik.http.routers.whoami.entrypoints=websecure"
- "traefik.http.routers.whoami.tls.certresolver=myresolver"
airflow-webserver:
<<: *meltano-image
command: invoke airflow webserver
environment:
<<: *meltano-env
<<: *airflow-env
expose:
- 8080
ports:
- 8080:8080
depends_on:
- meltano-system-db
- airflow-metadata-db
networks:
- meltano
- airflow
restart: unless-stopped
labels:
- "traefik.enable=true"
- "traefik.http.routers.whoami.rule=Host(``)"
- "traefik.http.routers.whoami.entrypoints=websecure"
- "traefik.http.routers.whoami.tls.certresolver=myresolver"
airflow-scheduler:
<<: *meltano-image
command: invoke airflow scheduler
environment:
<<: *meltano-env
<<: *airflow-env
volumes:
- meltano_elt_logs_data:/project/.meltano/logs/elt
expose:
- 8793
depends_on:
- meltano-system-db
- airflow-metadata-db
networks:
- meltano
- airflow
restart: unless-stopped
meltano-system-db:
image: postgres
environment:
<<: *meltano-postgres-env
volumes:
- meltano_postgresql_data:/var/lib/postgresql/data
- ./sql/init.sql:/docker-entrypoint-initdb.d/init.sql
expose:
- 5432
networks:
- meltano
restart: unless-stopped
airflow-metadata-db:
image: postgres
environment:
<<: *airflow-postgres-env
volumes:
- airflow_postgresql_data:/var/lib/postgresql/data
expose:
- 5432
networks:
- airflow
restart: unless-stopped
finn_frotscher
10/08/2021, 11:17 AMplugins:
extractors:
- name: tap-hubspot
variant: singer-io
pip_url: git+<https://github.com/singer-io/tap-hubspot.git>
config:
client_id: e173xxx632
redirect_uri: /-/oauth
start_date: '2019-01-01'
select:
- contacts.*
- deals.*
- companies.*
- campaigns.*
- engagements.*
- forms.*
- owners.*
- name: tap-postgres
variant: transferwise
pip_url: pipelinewise-tap-postgres
config:
break_at_end_lsn: false
dbname: company-next
default_replication_method: LOG_BASED
host: <http://api.company.com|api.company.com>
user: company
loaders:
- name: target-bigquery
variant: adswerve
pip_url: git+<https://github.com/adswerve/target-bigquery.git@v0.10.2>
config:
credentials_path: /Users/finn/code/company/bi/meltano/client_secrets.json
project_id: warehouse
location: EU
add_metadata_columns: true
- name: target-bigquery--postgres
inherit_from: target-bigquery
config:
dataset_id: company
- name: target-bigquery--hubspot
inherit_from: target-bigquery
config:
dataset_id: external_hubspot
orchestrators:
- name: airflow
pip_url: apache-airflow==2.1.2 --constraint <https://raw.githubusercontent.com/apache/airflow/constraints-2.1.2/constraints-${MELTANO__PYTHON_VERSION}.txt>
files:
- name: airflow
pip_url: git+<https://gitlab.com/meltano/files-airflow.git>
schedules:
- name: hubspot-to-bigquery
extractor: tap-hubspot
loader: target-bigquery--hubspot
transform: skip
interval: '@once'
start_date: 2019-01-01 00:00:00
- name: postgres-to-bigquery
extractor: tap-postgres
loader: target-bigquery--postgres
transform: skip
interval: '@once'
start_date: 2019-01-01 00:00:00
casey
10/08/2021, 3:50 PMpip_url
in orchestrators
includes a reference to psycopg2
like so: pip_url: psycopg2 apache-airflow==2.1.2 --constraint <https://raw.githubusercontent.com/apache/airflow/constraints-2.1.2/constraints-3.7.txt>
. See this section of the docs (2nd bullet point): https://meltano.com/docs/production.html#airflow-orchestrator