Dear EOS community and experts,
I’m sticked on a problem, I can’t advance anymore, I don’t see what is the problem in my configuration.
I’m in the process to test the upgrade a EOS test instance from 4.8.105 to 5.1.30 (before 5.2.x).
Here is my config : QDB are ok :
$ pdsh -l root -w "lyoeostestqdb" "for i in 7777 7778 7779; do echo \$i; redis-cli -p \$i raft-info|grep -E 'STATUS|LAST-APPLIED|REPLICA'; done"
lyoeostestqdb: 7777
lyoeostestqdb: LAST-APPLIED 4365015
lyoeostestqdb: STATUS LEADER
lyoeostestqdb: REPLICA lyoeostestqdb.in2p3.fr:7778 | ONLINE | UP-TO-DATE | LOG-SIZE 4365016 | VERSION 5.1.3.5.1.30
lyoeostestqdb: REPLICA lyoeostestqdb.in2p3.fr:7779 | ONLINE | UP-TO-DATE | LOG-SIZE 4365016 | VERSION 5.1.3.5.1.30
lyoeostestqdb: 7778
lyoeostestqdb: LAST-APPLIED 4365015
lyoeostestqdb: STATUS FOLLOWER
lyoeostestqdb: 7779
lyoeostestqdb: LAST-APPLIED 4365015
lyoeostestqdb: STATUS FOLLOWER
On the MGM :
[root@lyoeostestmgm ~]# rpm -qa |grep -E "eos|xrootd"
eos-client-5.1.30-1.el7.cern.x86_64
eos-grpc-1.41.0-1.el7.x86_64
eos-folly-2019.11.11.00-1.el7.cern.x86_64
eos-server-5.1.30-1.el7.cern.x86_64
eos-nginx-1.25.0-0.el7.cern.x86_64
eos-folly-deps-2019.11.11.00-1.el7.cern.x86_64
eos-libmicrohttpd-0.9.38-eos.el7.cern.x86_64
eos-protobuf3-3.17.3-1.el7.cern.eos.x86_64
eos-xrootd-5.5.10-1.el7.cern.x86_64
eos-ns-inspect-5.1.30-1.el7.cern.x86_64
Config :
[root@lyoeostestmgm ~]# eos daemon config mgm mgm info
[putenv] DAEMON_COREFILE_LIMIT=unlimited
[putenv] EOS_ALLOW_SAME_HOST_IN_GROUP=1
[putenv] EOS_AUTOLOAD_CONFIG=default
[putenv] EOS_BROKER_URL=root://localhost:1097//eos/
[putenv] EOS_FED_MANAGER="cms-xrd-transit.cern.ch+:1213"
[putenv] EOS_FED_SITENAME="T3_FR_IPNL"
[putenv] EOS_GEOTAG="cc-c18"
[putenv] EOS_HTTP_CONNECTION_MEMORY_LIMIT=4194304
[putenv] EOS_HTTP_THREADPOOL=epoll
[putenv] EOS_HTTP_THREADPOOL_SIZE=16
[putenv] EOS_INSTANCE_NAME=lyoeostestmgm.in2p3.fr
[putenv] EOS_MGM_ALIAS=lyoeostestmgm.in2p3.fr
[putenv] EOS_MGM_FUSEX_MAX_CHILDREN=262144
[putenv] EOS_MGM_HOST=lyoeostestmgm.in2p3.fr
[putenv] EOS_MGM_HOST_TARGET=lyoeostestmgm.in2p3.fr
[putenv] EOS_MGM_HTTP_PORT=8000
[putenv] EOS_MGM_LISTING_CACHE=0
[putenv] EOS_NO_STACKTRACE=1
[putenv] EOS_NS_ACCOUNTING=1
[putenv] EOS_PSS_MGM=$EOS_MGM_ALIAS:1094
[putenv] EOS_PSS_PATH=/
[putenv] EOS_PSS_PORT=1098
[putenv] EOS_START_SYNC_SEPARATELY=1
[putenv] EOS_SYNCTIME_ACCOUNTING=1
[putenv] EOS_UTF8=""
[putenv] EOS_XROOTD=/opt/eos/xrootd/
[putenv] GEO_TAG=local
[putenv] GSI=
[putenv] INSTANCE_NAME=lyoeostestmgm.in2p3.fr
[putenv] KRB5=
[putenv] KRB5RCACHETYPE=none
[putenv] LD_LIBRARY_PATH=/opt/eos/xrootd//lib64:/opt/eos/grpc/lib64
[putenv] LD_PRELOAD=/usr/lib64/libjemalloc.so
[putenv] SERVER_HOST=lyoeostestmgm.in2p3.fr
# ---------------------------------------
# ------------- i n i t -----------------
# ---------------------------------------
mkdir -p /var/run/eos/
chown daemon:root /var/run/eos/
if [ -e /etc/eos.keytab ]; then chown daemon /etc/eos.keytab ; chmod 400 /etc/eos.keytab ; fi
mkdir -p /var/eos/md /var/eos/report
chmod 755 /var/eos /var/eos/report
mkdir -p /var/spool/eos/core/mgm /var/spool/eos/core/mq /var/spool/eos/core/fst /var/spool/eos/core/qdb /var/spool/eos/admin
mkdir -p /var/log/eos
chown -R daemon /var/spool/eos
find /var/log/eos -maxdepth 1 -type d -exec chown daemon {} \;
find /var/eos/ -maxdepth 1 -mindepth 1 -not -path "/var/eos/fs" -not -path "/var/eos/fusex" -type d -exec chown -R daemon {} \;
chmod -R 775 /var/spool/eos
mkdir -p /var/eos/auth /var/eos/stage
chown daemon /var/eos/auth /var/eos/stage
setfacl -m default:u:daemon:r /var/eos/auth/
# ---------------------------------------
# ------------- s y s c o n f i g -------
# ---------------------------------------
SERVER_HOST=lyoeostestmgm.in2p3.fr
INSTANCE_NAME=lyoeostestmgm.in2p3.fr
GEO_TAG=local
EOS_XROOTD=/opt/eos/xrootd/
LD_LIBRARY_PATH=/opt/eos/xrootd//lib64:/opt/eos/grpc/lib64
LD_PRELOAD=/usr/lib64/libjemalloc.so
DAEMON_COREFILE_LIMIT=unlimited
LD_PRELOAD=/usr/lib64/libjemalloc.so
KRB5RCACHETYPE=none
KRB5=
GSI=
EOS_MGM_HOST=lyoeostestmgm.in2p3.fr
EOS_MGM_HOST_TARGET=lyoeostestmgm.in2p3.fr
EOS_START_SYNC_SEPARATELY=1
EOS_INSTANCE_NAME=lyoeostestmgm.in2p3.fr
EOS_AUTOLOAD_CONFIG=default
EOS_BROKER_URL=root://localhost:1097//eos/
EOS_GEOTAG="cc-c18"
EOS_MGM_ALIAS=lyoeostestmgm.in2p3.fr
EOS_NO_STACKTRACE=1
EOS_UTF8=""
EOS_NS_ACCOUNTING=1
EOS_SYNCTIME_ACCOUNTING=1
EOS_MGM_LISTING_CACHE=0
EOS_ALLOW_SAME_HOST_IN_GROUP=1
EOS_MGM_HTTP_PORT=8000
EOS_HTTP_THREADPOOL=epoll
EOS_HTTP_THREADPOOL_SIZE=16
EOS_HTTP_CONNECTION_MEMORY_LIMIT=4194304
EOS_FED_MANAGER="cms-xrd-transit.cern.ch+:1213"
EOS_FED_SITENAME="T3_FR_IPNL"
EOS_PSS_PORT=1098
EOS_PSS_MGM=$EOS_MGM_ALIAS:1094
EOS_PSS_PATH=/
EOS_MGM_FUSEX_MAX_CHILDREN=262144
# ---------------------------------------
# ------------- m o d u l e s -----------
# ---------------------------------------
# ---------------------------------------
# ------------- x r o o t d ------------
# ---------------------------------------
# running config file: /var/run/eos/xrd.cf.mgm
xrootd.fslib libXrdEosMgm.so
xrootd.seclib libXrdSec.so
xrootd.async off nosf
xrootd.chksum adler32
xrd.sched mint 8 maxt 256 idle 64
all.export / nolock
all.role manager
oss.fdlimit 16384 32768
sec.protocol unix
sec.protocol sss -c /etc/eos.keytab -s /etc/eos.keytab
sec.protocol gsi -crl:3 -cert:/etc/grid-security/hostcert.pem -key:/etc/grid-security/hostkey.pem -gridmap:/etc/grid-security/grid-mapfile -d:0 -vomsfun:libXrdSecgsiVOMS.so -vomsfunparms:certfmt=pem|grpopt=0 -gmapopt:11 -vomsat:1 -moninfo:1 -gmapto:30 -exppxy:/var/eos/auth/gsi#<uid>
sec.protbind localhost.localdomain unix sss
sec.protbind localhost unix sss
sec.protbind * only gsi sss unix
mgmofs.fs /
mgmofs.targetport 1095
mgmofs.broker root://localhost:1097//eos/
mgmofs.instance lyoeostestmgm.in2p3.fr
mgmofs.metalog /var/eos/md
mgmofs.txdir /var/eos/tx
mgmofs.authdir /var/eos/auth
mgmofs.archivedir /var/eos/archive
mgmofs.qosdir /var/eos/qos
mgmofs.reportstorepath /var/eos/report
mgmofs.autoloadconfig default
mgmofs.qoscfg /var/eos/qos/qos.conf
mgmofs.nslib /usr/lib64/libEosNsQuarkdb.so
mgmofs.qdbcluster lyoeostestqdb.in2p3.fr:7777 lyoeostestqdb.in2p3.fr:7778 lyoeostestqdb.in2p3.fr:7779
mgmofs.qdbpassword_file /etc/eos.keytab
xrd.protocol XrdHttp:8443 libXrdHttp.so
xrd.tls /etc/grid-security/hostcert.pem /etc/grid-security/hostkey.pem
xrd.tlsca certdir /etc/grid-security/certificates/
http.gridmap /etc/grid-security/grid-mapfile
http.secxtractor libXrdVoms.so
http.trace all
http.exthandler xrdtpc libXrdHttpTPC.so
http.exthandler EosMgmHttp libEosMgmHttp.so eos::mgm::http::redirect-to-https=1
mgmofs.macaroonslib libXrdMacaroons.so libXrdAccSciTokens.so
macaroons.secretkey /etc/eos.macaroon.secret
macaroons.trace all
all.sitename lyoeostestmgm.in2p3.fr
ofs.tpc redirect delegated lyoeos-gw.in2p3.fr:1094
#########################################
the MQ on the MGM :
[root@lyoeostestmgm ~]# eos daemon config mq mq info
[putenv] EOS_XROOTD=/opt/eos/xrootd/
[putenv] GEO_TAG=local
[putenv] INSTANCE_NAME=lyoeostestmgm.in2p3.fr
[putenv] LD_LIBRARY_PATH=/opt/eos/xrootd//lib64:/opt/eos/grpc/lib64
[putenv] LD_PRELOAD=/usr/lib64/libjemalloc.so
[putenv] SERVER_HOST=lyoeostestmgm.in2p3.fr
# ---------------------------------------
# ------------- i n i t -----------------
# ---------------------------------------
mkdir -p /var/run/eos/
chown daemon:root /var/run/eos/
if [ -e /etc/eos.keytab ]; then chown daemon /etc/eos.keytab ; chmod 400 /etc/eos.keytab ; fi
mkdir -p /var/eos/md /var/eos/report
chmod 755 /var/eos /var/eos/report
mkdir -p /var/spool/eos/core/mgm /var/spool/eos/core/mq /var/spool/eos/core/fst /var/spool/eos/core/qdb /var/spool/eos/admin
mkdir -p /var/log/eos
chown -R daemon /var/spool/eos
find /var/log/eos -maxdepth 1 -type d -exec chown daemon {} \;
find /var/eos/ -maxdepth 1 -mindepth 1 -not -path "/var/eos/fs" -not -path "/var/eos/fusex" -type d -exec chown -R daemon {} \;
chmod -R 775 /var/spool/eos
mkdir -p /var/eos/auth /var/eos/stage
chown daemon /var/eos/auth /var/eos/stage
setfacl -m default:u:daemon:r /var/eos/auth/
# ---------------------------------------
# ------------- s y s c o n f i g -------
# ---------------------------------------
SERVER_HOST=lyoeostestmgm.in2p3.fr
INSTANCE_NAME=lyoeostestmgm.in2p3.fr
GEO_TAG=local
EOS_XROOTD=/opt/eos/xrootd/
LD_LIBRARY_PATH=/opt/eos/xrootd//lib64:/opt/eos/grpc/lib64
LD_PRELOAD=/usr/lib64/libjemalloc.so
# ---------------------------------------
# ------------- m o d u l e s -----------
# ---------------------------------------
# ---------------------------------------
# ------------- x r o o t d ------------
# ---------------------------------------
# running config file: /var/run/eos/xrd.cf.mq
xrootd.fslib libXrdMqOfs.so
all.export /eos/ nolock
all.role server
xrootd.async off nosf
xrootd.seclib libXrdSec.so
sec.protocol sss -c /etc/eos.keytab -s /etc/eos.keytab
sec.protbind * only sss
xrd.sched mint 16 maxt 1024 idle 128
xrd.port 1097
xrd.network keepalive
xrd.timeout idle 120
mq.maxmessagebacklog 100000
mq.maxqueuebacklog 50000
mq.rejectqueuebacklog 100000
mq.trace low
mq.queue /eos/
mgmofs.qdbcluster lyoeostestqdb.in2p3.fr:7777 lyoeostestqdb.in2p3.fr:7778 lyoeostestqdb.in2p3.fr:7779
mq.qdbpassword_file /etc/eos.keytab
#########################################
The MGM dies and loops restarted and die (again) :
Here ate some CRIT and ERROR from the logs :
The CRIT ones:
240416 17:03:41 time=1713279821.921484 func=SetupGlobalConfig level=CRIT logid=xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx unit=mgm@lyoeostestmgm.in2p3.fr:1094 tid=00007fb33abf9700 source=XrdMgmOfsConfigure:2477 tident=<single-exec> sec= uid=0 gid=0 name= geo="" msg="cannot add global config queue" qpath="/config/lyoeostestmgm.in2p3.fr/mgm/"
240416 17:03:41 time=1713279821.921502 func=SetupGlobalConfig level=CRIT logid=xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx unit=mgm@lyoeostestmgm.in2p3.fr:1094 tid=00007fb33abf9700 source=XrdMgmOfsConfigure:2485 tident=<single-exec> sec= uid=0 gid=0 name= geo="" msg="cannot add global config queue" qpath="/config/lyoeostestmgm.in2p3.fr/all/"
240416 17:03:41 time=1713279821.921511 func=SetupGlobalConfig level=CRIT logid=xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx unit=mgm@lyoeostestmgm.in2p3.fr:1094 tid=00007fb33abf9700 source=XrdMgmOfsConfigure:2493 tident=<single-exec> sec= uid=0 gid=0 name= geo="" msg="cannot add global config queue" qpath="/config/lyoeostestmgm.in2p3.fr/fst/"
… later
240416 17:03:42 time=1713279822.273780 func=Configure level=INFO logid=static.............................. unit=mgm@lyoeostestmgm.in2p3.fr:1094 tid=00007fb383cdf8c0 source=XrdMgmOfsConfigure:2096 tident= sec=(null) uid=99 gid=99 name=- geo="" Printing a test stacktrace to check for debugging symbols: [bt]: (1) /lib64/libXrdEosMgm-5.so : XrdMgmOfs::Configure(XrdSysError&)+0x7cf5 [0x7fb37a451785]
[bt]: (2) /lib64/libXrdEosMgm-5.so : XrdSfsGetFileSystem+0x12d [0x7fb37a4b90ed]
[bt]: (3) /lib64/libXrdEosMgm-5.so : XrdSfsGetFileSystem2+0x16 [0x7fb37a4b91b6]
[bt]: (4) /opt/eos/xrootd/lib64/libXrdServer.so.3 : XrdXrootdloadFileSystem(XrdSysError*, XrdSfsFileSystem*, char const*, char const*, XrdOucEnv*)+0x69 [0x7fb3835f25b9]
[bt]: (5) /opt/eos/xrootd/lib64/libXrdServer.so.3 : XrdXrootdProtocol::ConfigFS(XrdOucEnv&, char const*)+0x4a [0x7fb3835e819a]
[bt]: (6) /opt/eos/xrootd/lib64/libXrdServer.so.3 : XrdXrootdProtocol::Configure(char*, XrdProtocol_Config*)+0x624 [0x7fb3835ec434]
[bt]: (7) /opt/eos/xrootd/lib64/libXrdServer.so.3 : XrdgetProtocol+0x5f [0x7fb3835fb59f]
[bt]: (8) eos-mgm : XrdProtLoad::Load(char const*, char const*, char*, XrdProtocol_Config*, bool)+0x48 [0x40f478]
[bt]: (9) eos-mgm : XrdConfig::Setup(char*, char*)+0x2ac [0x40b83c]
[bt]: (10) eos-mgm : XrdConfig::Configure(int, char**)+0xd62 [0x40d4f2]
[bt]: (11) eos-mgm : main+0x7c [0x40764c]
[bt]: (12) /lib64/libc.so.6 : __libc_start_main+0xf5 [0x7fb38207f555]
[bt]: (13) eos-mgm() [0x407832]
… later
240416 17:03:42 time=1713279822.923369 func=Config level=NOTE logid=83f77944-fc02-11ee-a6e2-b604754f6513 unit=mgm@lyoeostestmgm.in2p3.fr:1094 tid=00007fb38
3cdf8c0 source=EosMgmHttpHandler:151 tident=<service> sec= uid=0 gid=0 name= geo="" configuration: redirect-to-https:1
240416 17:03:42 time=1713279822.923375 func=GetHttpExtPlugin level=INFO logid=83f77944-fc02-11ee-a6e2-b604754f6513 unit=mgm@lyoeostestmgm.in2p3.fr:1094 tid=00007fb38
3cdf8c0 source=EosMgmHttpHandler:517 tident=<service> sec= uid=0 gid=0 name= geo="" msg="loading HttpExtHandler(XrdMacaroons) plugin" path="libXrdMacaroons-5.so
"
Plugin loaded unreleased XrdMacaroons 5.5.10 from httpexthandler libXrdMacaroons-5.so
240416 17:03:42 1803 sysInitialize: Creating new Macaroon handler object
=====> macaroons.secretkey /etc/eos.macaroon.secret
=====> macaroons.trace all
=====> all.sitename lyoeostestmgm.in2p3.fr
240416 17:03:42 time=1713279822.923925 func=GetHttpExtPlugin level=INFO logid=83f77944-fc02-11ee-a6e2-b604754f6513 unit=mgm@lyoeostestmgm.in2p3.fr:1094 tid=00007fb38
3cdf8c0 source=EosMgmHttpHandler:538 tident=<service> sec= uid=0 gid=0 name= geo="" msg="successfully loaded XrdHttpGetExtHandler" lib="libXrdMacaroons-5.so"
240416 17:03:42 time=1713279822.923943 func=Config level=INFO logid=83f77944-fc02-11ee-a6e2-b604754f6513 unit=mgm@lyoeostestmgm.in2p3.fr:1094 tid=00007fb38
3cdf8c0 source=EosMgmHttpHandler:169 tident=<service> sec= uid=0 gid=0 name= geo="" msg="chaining XrdAccAuthorize object" lib="libXrdAccSciTokens.so"
240416 17:03:42 time=1713279822.923949 func=GetAuthzPlugin level=INFO logid=83f77944-fc02-11ee-a6e2-b604754f6513 unit=mgm@lyoeostestmgm.in2p3.fr:1094 tid=00007fb38
3cdf8c0 source=EosMgmHttpHandler:567 tident=<service> sec= uid=0 gid=0 name= geo="" msg="loading XrdAccAuthorize plugin" lib="libXrdAccSciTokens-5.so"
Plugin /opt/eos/xrootd/lib64/libXrdAccSciTokens-5.so: undefined symbol: scitoken_get_claim_string_list authz libXrdAccSciTokens-5.so
240416 17:03:42 1803 sysEosMgmHttpHandler: Unable to Failed config of EosMgmHttpHandler; invalid argument
240416 17:03:42 time=1713279822.924930 func=GetAuthzPlugin level=ERROR logid=83f77944-fc02-11ee-a6e2-b604754f6513 unit=mgm@lyoeostestmgm.in2p3.fr:1094 tid=00007fb38
3cdf8c0 source=EosMgmHttpHandler:583 tident=<service> sec= uid=0 gid=0 name= geo="" msg="failed loading XrdAccAuthorizeObject" lib="libXrdAccSciTokens-5.so"
240416 17:03:42 time=1713279822.924950 func=Config level=ERROR logid=83f77944-fc02-11ee-a6e2-b604754f6513 unit=mgm@lyoeostestmgm.in2p3.fr:1094 tid=00007fb38
3cdf8c0 source=EosMgmHttpHandler:174 tident=<service> sec= uid=0 gid=0 name= geo="" msg="failed to chain XrdAccAuthorize plugin" lib="libXrdAccSciTokens.so"
240416 17:03:42 time=1713279822.924963 func=~EosMgmHttpHandler level=INFO logid=83f77944-fc02-11ee-a6e2-b604754f6513 unit=mgm@lyoeostestmgm.in2p3.fr:1094 tid=00007fb38
3cdf8c0 source=EosMgmHttpHandler:93 tident=<service> sec= uid=0 gid=0 name= geo="" msg="call ~EosMgmHttpHandler destructor"
------ HTTP protocol initialization failed.
240416 17:03:42 1803 XrdProtocol: Protocol XrdHttp could not be loaded
------ eos-mgm mgm@lyoeostestmgm.in2p3.fr:-1 initialization failed.
and over…
I spent some days on this issue, I didn’t found what is wrong in my configuration.
Thanks for your suggestions and help
Best regards,
Denis