본문 바로가기
프로그래밍

apache airflow 설치

by 메이슨김 2021. 9. 28.

가상환경생성

# 아래와 같은 명령어로 진행시 유저홈 .conda/envs/ 밑에 생성됨.
# conda create -n airflow2 python=3.8
  
conda create --offline --prefix /opt/anaconda/envs/apache-airflow python=3.8

airflow 설치

conda install -y -c conda-forge apache-airflow

환경 변수 설정.

  • 환경 변수에서 AIRFLOW_HOME 설정, 기존에 설정되어 있다면 꼭 변경. init db 명령어시 설정 파일을 덮어씀.
# /etc/profile.d/airflow.sh
export AIRFLOW_HOME=/opt/anaconda/envs/apache-airflow
export PATH=$PATH:$AIRFLOW_HOME/bin

airflow db init

  • 초기 embedded  된 sqllite  로 설치됨
  • AIRFLOW_HOME path 에 airflow.cfg 파일이 생성됨.
bin/airflow db init

config 파일 변경

# dag 디렉토리 변경
ags_folder = /app/data/apache-airflow/dags


# timezone
#default_timezone = utc
default_timezone = Asia/Seoul


# 병렬처리를 위한 executor 타입 변경
#executor = SequentialExecutor
executor = LocalExecutor


# 기본 SequentialExecutor 를 변경하기 위해서는 레파지토리로 사용할 rdbms가 별도로 필요(postgresql 설치 및 셋팅 완료)
#sql_alchemy_conn = sqlite:////data1/app/anaconda3/envs/airflow2/airflow.db
sql_alchemy_conn = postgresql+psycopg2://airflow:airflow@{{DB_HOST}}/airflow


# example dag를 노출하지 않음.
load_examples = False


# dag 실행시 과거날짜부터 실행되지 않도록 기본 catchup 설정을 false로 변경, 명시적으로 dag 파일에 써주는 것이 좋음.
catchup_by_default = False


# log 설정
base_log_folder = /app/log/apache-airflow
logging_level = ERROR
dag_processor_manager_log_location = /app/log/apache-airflow/dag_processor_manager/dag_processor_manager.log


# web server 설정
web_server_port = 30000


# scheduler 설정
child_process_log_directory = /app/log/apache-airflow/scheduler

PostgresSQL 설치

postgresql airflow 유저 및 데이터베이스 설정

postgres=# create user airflow with encrypted password 'airflow';
CREATE ROLE
postgres=# create database airflow;
CREATE DATABASE
postgres=# grant all privileges on database airflow to airflow;
GRANT
postgres=# \c airflow
You are now connected to database "airflow" as user "postgres".
airflow=# grant all privileges  on tables in schema public to airflow;
ERROR:  syntax error at or near "in"
LINE 1: grant all privileges  on tables in schema public to airflow;
                                        ^
airflow=# grant all privileges on all tables in schema public to airflow;
GRANT
airflow=#

airflow init db 재실행

  • 기존 sqllite 로 설정되어 있던 repository db 를 설치된 postgresql 로 변경
bin/airflow db init


./bin/airflow users create --username admin --firstname Tony --lastname Stark --role Admin --email ironman@example.com
  • psycopg2 라이브러리 에러
# apache-airflow envs 활성화 후에 진행
Traceback (most recent call last):
  File "/opt/anaconda3/envs/apache-airflow/bin/airflow", line 7, in <module>
    from airflow.__main__ import main
  File "/opt/anaconda3/envs/apache-airflow/lib/python3.8/site-packages/airflow/__init__.py", line 46, in <module>
    settings.initialize()
  File "/opt/anaconda3/envs/apache-airflow/lib/python3.8/site-packages/airflow/settings.py", line 447, in initialize
    configure_orm()
  File "/opt/anaconda3/envs/apache-airflow/lib/python3.8/site-packages/airflow/settings.py", line 222, in configure_orm
    engine = create_engine(SQL_ALCHEMY_CONN, connect_args=connect_args, **engine_args)
  File "/opt/anaconda3/envs/apache-airflow/lib/python3.8/site-packages/sqlalchemy/engine/__init__.py", line 520, in create_engine
    return strategy.create(*args, **kwargs)
  File "/opt/anaconda3/envs/apache-airflow/lib/python3.8/site-packages/sqlalchemy/engine/strategies.py", line 87, in create
    dbapi = dialect_cls.dbapi(**dbapi_args)
  File "/opt/anaconda3/envs/apache-airflow/lib/python3.8/site-packages/sqlalchemy/dialects/postgresql/psycopg2.py", line 778, in dbapi
    import psycopg2
ModuleNotFoundError: No module named 'psycopg2'


# 외부망이 막혀 있다면 conda-purge 에서 tar.gz 받아서 설치
conda install --offline psycopg2-2.9.1-py38h497a2fe_0.tar.bz2
  • psycopg2 설치 후 libpq.so.5 를 찾지 못함.
Traceback (most recent call last):
  File "/opt/anaconda3/envs/apache-airflow/bin/airflow", line 7, in <module>
    from airflow.__main__ import main
  File "/opt/anaconda3/envs/apache-airflow/lib/python3.8/site-packages/airflow/__init__.py", line 46, in <module>
    settings.initialize()
  File "/opt/anaconda3/envs/apache-airflow/lib/python3.8/site-packages/airflow/settings.py", line 447, in initialize
    configure_orm()
  File "/opt/anaconda3/envs/apache-airflow/lib/python3.8/site-packages/airflow/settings.py", line 222, in configure_orm
    engine = create_engine(SQL_ALCHEMY_CONN, connect_args=connect_args, **engine_args)
  File "/opt/anaconda3/envs/apache-airflow/lib/python3.8/site-packages/sqlalchemy/engine/__init__.py", line 520, in create_engine
    return strategy.create(*args, **kwargs)
  File "/opt/anaconda3/envs/apache-airflow/lib/python3.8/site-packages/sqlalchemy/engine/strategies.py", line 87, in create
    dbapi = dialect_cls.dbapi(**dbapi_args)
  File "/opt/anaconda3/envs/apache-airflow/lib/python3.8/site-packages/sqlalchemy/dialects/postgresql/psycopg2.py", line 778, in dbapi
    import psycopg2
  File "/opt/anaconda3/envs/apache-airflow/lib/python3.8/site-packages/psycopg2/__init__.py", line 51, in <module>
    from psycopg2._psycopg import (                     # noqa
ImportError: libpq.so.5: cannot open shared object file: No such file or directory


# sudo yum install -y libpq.so.5 설치 후 다시 에러
# 32비트 용으로 설치가 되어 에러가 발생.
Traceback (most recent call last):
  File "/opt/anaconda3/envs/apache-airflow/bin/airflow", line 7, in <module>
    from airflow.__main__ import main
  File "/opt/anaconda3/envs/apache-airflow/lib/python3.8/site-packages/airflow/__init__.py", line 46, in <module>
    settings.initialize()
  File "/opt/anaconda3/envs/apache-airflow/lib/python3.8/site-packages/airflow/settings.py", line 447, in initialize
    configure_orm()
  File "/opt/anaconda3/envs/apache-airflow/lib/python3.8/site-packages/airflow/settings.py", line 222, in configure_orm
    engine = create_engine(SQL_ALCHEMY_CONN, connect_args=connect_args, **engine_args)
  File "/opt/anaconda3/envs/apache-airflow/lib/python3.8/site-packages/sqlalchemy/engine/__init__.py", line 520, in create_engine
    return strategy.create(*args, **kwargs)
  File "/opt/anaconda3/envs/apache-airflow/lib/python3.8/site-packages/sqlalchemy/engine/strategies.py", line 87, in create
    dbapi = dialect_cls.dbapi(**dbapi_args)
  File "/opt/anaconda3/envs/apache-airflow/lib/python3.8/site-packages/sqlalchemy/dialects/postgresql/psycopg2.py", line 778, in dbapi
    import psycopg2
  File "/opt/anaconda3/envs/apache-airflow/lib/python3.8/site-packages/psycopg2/__init__.py", line 51, in <module>
    from psycopg2._psycopg import (                     # noqa
ImportError: libpq.so.5: wrong ELF class: ELFCLASS32

# PostgreSQL이 설치된 서버에서 라이브러리 파일 복사
scp /usr/pgsql-13/lib/libpq.so.5.13 {{DB_HOST}}:/usr/lib
# 기존 링크를 지우고 libpq.so.5 링크 생성
sudo rm -f /usr/lib/libpq.so.5
sudo ln -s /usr/lib/libpq.so.5.13 libpq.so.5

# /etc/profile 에 LD LIBRARY PATH 생성
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/lib
반응형

'프로그래밍' 카테고리의 다른 글

쿠버네티스 명령어  (0) 2023.03.24