Can not init airflow using `meltano invoke airflow...
# troubleshooting
f
Can not init airflow using
meltano invoke airflow run
Copy code
Traceback (most recent call last):
  File "/Users/finn/Library/Python/3.8/lib/python/site-packages/meltano/cli/__init__.py", line 52, in main
    raise CliError(str(err)) from err
meltano.cli.utils.CliError: Airflow metadata database could not be initialized: `airflow initdb` failed
Airflow metadata database could not be initialized: `airflow initdb` failed
i have been struggling with issue after issue for the entire week now. is meltano production ready?
a
Is there anything that comes up when you run
meltano --log-level=debug
c
@finn_frotscher is this during
docker-compose up
?
f
@casey yes
@amanda.folson i have log level set as an env flag. its allways on
c
Could you post your
docker-compose.yml
file?
and the
orchestrators
section of your
meltano.yml
file
f
docker compose:
Copy code
x-meltano-image: &meltano-image
  image: meltano-bi:prod # Change me to a name and tag that makes sense for your project
  build: ./meltano

x-meltano-env: &meltano-env
  MELTANO_DATABASE_URI: postgresql://${POSTGRES_USER}:${MELTANO_POSTGRES_PASSWORD}@meltano-system-db/${MELTANO_POSTGRES_DB}

x-airflow-env: &airflow-env
  AIRFLOW__CORE__SQL_ALCHEMY_CONN: postgres://${POSTGRES_USER}:${AIRFLOW_POSTGRES_PASSWORD}@airflow-metadata-db/${AIRFLOW_POSTGRES_DB}
  AIRFLOW__CORE__EXECUTOR: LocalExecutor

x-meltano-postgres-env: &meltano-postgres-env
  POSTGRES_USER: ${POSTGRES_USER}
  POSTGRES_PASSWORD: ${MELTANO_POSTGRES_PASSWORD}  
  POSTGRES_DB: ${MELTANO_POSTGRES_DB}
  PGDATA: /var/lib/postgresql/data/pgdata

x-airflow-postgres-env: &airflow-postgres-env
  POSTGRES_USER: ${POSTGRES_USER}
  POSTGRES_PASSWORD: ${AIRFLOW_POSTGRES_PASSWORD}  
  POSTGRES_DB: ${AIRFLOW_POSTGRES_DB}
  PGDATA: /var/lib/postgresql/data/pgdata

services:
 meltano-ui:
    <<: *meltano-image
    command: ui
    environment:
      <<: *meltano-env
    volumes:
      - meltano_elt_logs_data:/project/.meltano/logs/elt
    expose:
      - 5000
    ports:
      - 5000:5000
    depends_on:
      - meltano-system-db
    networks:
      - meltano
    restart: unless-stopped
    labels:
      - "traefik.enable=true"
      - "traefik.http.routers.whoami.rule=Host(``)"
      - "traefik.http.routers.whoami.entrypoints=websecure"
      - "traefik.http.routers.whoami.tls.certresolver=myresolver"

  airflow-webserver:
    <<: *meltano-image
    command: invoke airflow webserver
    environment:
      <<: *meltano-env
      <<: *airflow-env
    expose:
      - 8080
    ports:
      - 8080:8080
    depends_on:
      - meltano-system-db
      - airflow-metadata-db
    networks:
      - meltano
      - airflow
    restart: unless-stopped
    labels:
      - "traefik.enable=true"
      - "traefik.http.routers.whoami.rule=Host(``)"
      - "traefik.http.routers.whoami.entrypoints=websecure"
      - "traefik.http.routers.whoami.tls.certresolver=myresolver"
  
  airflow-scheduler:
    <<: *meltano-image
    command: invoke airflow scheduler
    environment:
      <<: *meltano-env
      <<: *airflow-env
    volumes:
      - meltano_elt_logs_data:/project/.meltano/logs/elt
    expose:
      - 8793
    depends_on:
      - meltano-system-db
      - airflow-metadata-db
    networks:
      - meltano
      - airflow
    restart: unless-stopped
  
  meltano-system-db:
    image: postgres
    environment:
      <<: *meltano-postgres-env
    volumes:
      - meltano_postgresql_data:/var/lib/postgresql/data
      - ./sql/init.sql:/docker-entrypoint-initdb.d/init.sql
    expose:
      - 5432
    networks:
      - meltano
    restart: unless-stopped
  airflow-metadata-db:
    image: postgres
    environment:
      <<: *airflow-postgres-env
    volumes:
      - airflow_postgresql_data:/var/lib/postgresql/data
    expose:
      - 5432
    networks:
      - airflow
    restart: unless-stopped
meltano:
Copy code
plugins:
  extractors:
  - name: tap-hubspot
    variant: singer-io
    pip_url: git+<https://github.com/singer-io/tap-hubspot.git>
    config:
      client_id: e173xxx632
      redirect_uri: /-/oauth
      start_date: '2019-01-01'
    select:
    - contacts.*
    - deals.*
    - companies.*
    - campaigns.*
    - engagements.*
    - forms.*
    - owners.*
  - name: tap-postgres
    variant: transferwise
    pip_url: pipelinewise-tap-postgres
    config:
      break_at_end_lsn: false
      dbname: company-next
      default_replication_method: LOG_BASED
      host: <http://api.company.com|api.company.com>
      user: company
  loaders:
  - name: target-bigquery
    variant: adswerve
    pip_url: git+<https://github.com/adswerve/target-bigquery.git@v0.10.2>
    config:
      credentials_path: /Users/finn/code/company/bi/meltano/client_secrets.json
      project_id: warehouse
      location: EU
      add_metadata_columns: true
  - name: target-bigquery--postgres
    inherit_from: target-bigquery
    config:
      dataset_id: company
  - name: target-bigquery--hubspot
    inherit_from: target-bigquery
    config:
      dataset_id: external_hubspot
  orchestrators:
  - name: airflow
    pip_url: apache-airflow==2.1.2 --constraint <https://raw.githubusercontent.com/apache/airflow/constraints-2.1.2/constraints-${MELTANO__PYTHON_VERSION}.txt>
  files:
  - name: airflow
    pip_url: git+<https://gitlab.com/meltano/files-airflow.git>
schedules:
- name: hubspot-to-bigquery
  extractor: tap-hubspot
  loader: target-bigquery--hubspot
  transform: skip
  interval: '@once'
  start_date: 2019-01-01 00:00:00
- name: postgres-to-bigquery
  extractor: tap-postgres
  loader: target-bigquery--postgres
  transform: skip
  interval: '@once'
  start_date: 2019-01-01 00:00:00
c
My
pip_url
in
orchestrators
includes a reference to
psycopg2
like so:
pip_url: psycopg2 apache-airflow==2.1.2 --constraint <https://raw.githubusercontent.com/apache/airflow/constraints-2.1.2/constraints-3.7.txt>
. See this section of the docs (2nd bullet point): https://meltano.com/docs/production.html#airflow-orchestrator