가상환경생성
# 아래와 같은 명령어로 진행시 유저홈 .conda/envs/ 밑에 생성됨. # conda create -n airflow2 python=3.8 conda create --offline --prefix /opt/anaconda/envs/apache-airflow python=3.8 |
airflow 설치
conda install -y -c conda-forge apache-airflow |
환경 변수 설정.
- 환경 변수에서 AIRFLOW_HOME 설정, 기존에 설정되어 있다면 꼭 변경. init db 명령어시 설정 파일을 덮어씀.
# /etc/profile.d/airflow.sh export AIRFLOW_HOME=/opt/anaconda/envs/apache-airflow export PATH=$PATH:$AIRFLOW_HOME/bin |
airflow db init
- 초기 embedded 된 sqllite 로 설치됨
- AIRFLOW_HOME path 에 airflow.cfg 파일이 생성됨.
bin/airflow db init |
config 파일 변경
# dag 디렉토리 변경 ags_folder = /app/data/apache-airflow/dags # timezone #default_timezone = utc default_timezone = Asia/Seoul # 병렬처리를 위한 executor 타입 변경 #executor = SequentialExecutor executor = LocalExecutor # 기본 SequentialExecutor 를 변경하기 위해서는 레파지토리로 사용할 rdbms가 별도로 필요(postgresql 설치 및 셋팅 완료) #sql_alchemy_conn = sqlite:////data1/app/anaconda3/envs/airflow2/airflow.db sql_alchemy_conn = postgresql+psycopg2://airflow:airflow@{{DB_HOST}}/airflow # example dag를 노출하지 않음. load_examples = False # dag 실행시 과거날짜부터 실행되지 않도록 기본 catchup 설정을 false로 변경, 명시적으로 dag 파일에 써주는 것이 좋음. catchup_by_default = False # log 설정 base_log_folder = /app/log/apache-airflow logging_level = ERROR dag_processor_manager_log_location = /app/log/apache-airflow/dag_processor_manager/dag_processor_manager.log # web server 설정 web_server_port = 30000 # scheduler 설정 child_process_log_directory = /app/log/apache-airflow/scheduler |
PostgresSQL 설치
postgresql airflow 유저 및 데이터베이스 설정
postgres=# create user airflow with encrypted password 'airflow'; CREATE ROLE postgres=# create database airflow; CREATE DATABASE postgres=# grant all privileges on database airflow to airflow; GRANT postgres=# \c airflow You are now connected to database "airflow" as user "postgres". airflow=# grant all privileges on tables in schema public to airflow; ERROR: syntax error at or near "in" LINE 1: grant all privileges on tables in schema public to airflow; ^ airflow=# grant all privileges on all tables in schema public to airflow; GRANT airflow=# |
airflow init db 재실행
- 기존 sqllite 로 설정되어 있던 repository db 를 설치된 postgresql 로 변경
bin/airflow db init ./bin/airflow users create --username admin --firstname Tony --lastname Stark --role Admin --email ironman@example.com |
- psycopg2 라이브러리 에러
# apache-airflow envs 활성화 후에 진행 Traceback (most recent call last): File "/opt/anaconda3/envs/apache-airflow/bin/airflow", line 7, in <module> from airflow.__main__ import main File "/opt/anaconda3/envs/apache-airflow/lib/python3.8/site-packages/airflow/__init__.py", line 46, in <module> settings.initialize() File "/opt/anaconda3/envs/apache-airflow/lib/python3.8/site-packages/airflow/settings.py", line 447, in initialize configure_orm() File "/opt/anaconda3/envs/apache-airflow/lib/python3.8/site-packages/airflow/settings.py", line 222, in configure_orm engine = create_engine(SQL_ALCHEMY_CONN, connect_args=connect_args, **engine_args) File "/opt/anaconda3/envs/apache-airflow/lib/python3.8/site-packages/sqlalchemy/engine/__init__.py", line 520, in create_engine return strategy.create(*args, **kwargs) File "/opt/anaconda3/envs/apache-airflow/lib/python3.8/site-packages/sqlalchemy/engine/strategies.py", line 87, in create dbapi = dialect_cls.dbapi(**dbapi_args) File "/opt/anaconda3/envs/apache-airflow/lib/python3.8/site-packages/sqlalchemy/dialects/postgresql/psycopg2.py", line 778, in dbapi import psycopg2 ModuleNotFoundError: No module named 'psycopg2' # 외부망이 막혀 있다면 conda-purge 에서 tar.gz 받아서 설치 conda install --offline psycopg2-2.9.1-py38h497a2fe_0.tar.bz2 |
- psycopg2 설치 후 libpq.so.5 를 찾지 못함.
Traceback (most recent call last): File "/opt/anaconda3/envs/apache-airflow/bin/airflow", line 7, in <module> from airflow.__main__ import main File "/opt/anaconda3/envs/apache-airflow/lib/python3.8/site-packages/airflow/__init__.py", line 46, in <module> settings.initialize() File "/opt/anaconda3/envs/apache-airflow/lib/python3.8/site-packages/airflow/settings.py", line 447, in initialize configure_orm() File "/opt/anaconda3/envs/apache-airflow/lib/python3.8/site-packages/airflow/settings.py", line 222, in configure_orm engine = create_engine(SQL_ALCHEMY_CONN, connect_args=connect_args, **engine_args) File "/opt/anaconda3/envs/apache-airflow/lib/python3.8/site-packages/sqlalchemy/engine/__init__.py", line 520, in create_engine return strategy.create(*args, **kwargs) File "/opt/anaconda3/envs/apache-airflow/lib/python3.8/site-packages/sqlalchemy/engine/strategies.py", line 87, in create dbapi = dialect_cls.dbapi(**dbapi_args) File "/opt/anaconda3/envs/apache-airflow/lib/python3.8/site-packages/sqlalchemy/dialects/postgresql/psycopg2.py", line 778, in dbapi import psycopg2 File "/opt/anaconda3/envs/apache-airflow/lib/python3.8/site-packages/psycopg2/__init__.py", line 51, in <module> from psycopg2._psycopg import ( # noqa ImportError: libpq.so.5: cannot open shared object file: No such file or directory # sudo yum install -y libpq.so.5 설치 후 다시 에러 # 32비트 용으로 설치가 되어 에러가 발생. Traceback (most recent call last): File "/opt/anaconda3/envs/apache-airflow/bin/airflow", line 7, in <module> from airflow.__main__ import main File "/opt/anaconda3/envs/apache-airflow/lib/python3.8/site-packages/airflow/__init__.py", line 46, in <module> settings.initialize() File "/opt/anaconda3/envs/apache-airflow/lib/python3.8/site-packages/airflow/settings.py", line 447, in initialize configure_orm() File "/opt/anaconda3/envs/apache-airflow/lib/python3.8/site-packages/airflow/settings.py", line 222, in configure_orm engine = create_engine(SQL_ALCHEMY_CONN, connect_args=connect_args, **engine_args) File "/opt/anaconda3/envs/apache-airflow/lib/python3.8/site-packages/sqlalchemy/engine/__init__.py", line 520, in create_engine return strategy.create(*args, **kwargs) File "/opt/anaconda3/envs/apache-airflow/lib/python3.8/site-packages/sqlalchemy/engine/strategies.py", line 87, in create dbapi = dialect_cls.dbapi(**dbapi_args) File "/opt/anaconda3/envs/apache-airflow/lib/python3.8/site-packages/sqlalchemy/dialects/postgresql/psycopg2.py", line 778, in dbapi import psycopg2 File "/opt/anaconda3/envs/apache-airflow/lib/python3.8/site-packages/psycopg2/__init__.py", line 51, in <module> from psycopg2._psycopg import ( # noqa ImportError: libpq.so.5: wrong ELF class: ELFCLASS32 # PostgreSQL이 설치된 서버에서 라이브러리 파일 복사 scp /usr/pgsql-13/lib/libpq.so.5.13 {{DB_HOST}}:/usr/lib # 기존 링크를 지우고 libpq.so.5 링크 생성 sudo rm -f /usr/lib/libpq.so.5 sudo ln -s /usr/lib/libpq.so.5.13 libpq.so.5 # /etc/profile 에 LD LIBRARY PATH 생성 export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/lib |
반응형