[20230616]One Deadlock of 'row cache lock' and 'library cache lock'.txt

寻技术 Oracle 2023年07月11日 115
[20230616]One Deadlock of 'row cache lock' and 'library cache lock'.txt

--//链接http://ksun-oracle.blogspot.com/2023/06/one-deadlock-of-row-cache-lock-and.html演示一个有趣的测试.
--//他测试采用cluster表,我估计普通表这样操作不会出现这样的情况,先重复作者的测试看看.

1.环境:

SCOTT@test01p> @ ver1
PORT_STRING                    VERSION        BANNER                                                                               CON_ID
------------------------------ -------------- -------------------------------------------------------------------------------- ----------
IBMPC/WIN_NT64-9.1.0           12.2.0.1.0     Oracle Database 12c Enterprise Edition Release 12.2.0.1.0 - 64bit Production              0

2.建立测试脚本:
--//drop table test_tab purge;
--//drop cluster test_cluster;

SCOTT@test01p> create cluster test_cluster (x number);
Cluster created.

SCOTT@test01p> create index test_cluster_indx on cluster test_cluster;
Index created.

SCOTT@test01p> create table test_tab (x, y) cluster test_cluster(x) as select level, 'abc' from dual connect by level <= 3;
Table created.

create or replace procedure test_proc_sel_tab(p_cnt number) as
begin   
  for i in 1..p_cnt loop
    for c in (select * from test_tab) loop
      null;
    end loop;
    --dbms_session.sleep(0.01);
    dbms_lock.sleep(0.01);
  end loop;
end;
/

-- exec test_proc_sel_tab(5);

create or replace procedure test_proc_alt_cluster(p_cnt number) as
begin
  for i in 1..p_cnt loop
    execute immediate 'ALTER CLUSTER test_cluster PCTFREE 18 INITRANS 19';
  end loop;
end;
/

-- exec test_proc_alt_cluster(5);
--//简单说明:存储过程test_proc_sel_tab 不断执行select操作,每次修改停顿0.01秒.存储过程test_proc_alt_cluster不断的执行
--//alert修改cluster表的PCTFREE,INITRANS属性.

3.测试:

--//session 1,按照作者介绍需要10秒完成, 感觉不对0.01*10000 = 100秒. 我修改为1e3.
SCOTT@test01p> @ spid
       SID    SERIAL# PROCESS                  SERVER    SPID                     PID  P_SERIAL# C50
---------- ---------- ------------------------ --------- -------------------- ------- ---------- --------------------------------------------------
       252      29639 4784:8016                DEDICATED 260                       27          7 alter system kill session '252,29639' immediate;

SCOTT@test01p> @zzdate
C30                                    C30                                    C31
-------------------------------------- -------------------------------------- --------------------------------------
2023-06-19 21:15:59                    trunc(sysdate)+21/24+15/1440+59/86400  "timestamp'2023-06-19 21:15:59'"

SCOTT@test01p> exec test_proc_sel_tab(1e3);
PL/SQL procedure successfully completed.
Elapsed: 00:00:14.27

--//注:先执行test_proc_sel_tab ,切换会话执行test_proc_alt_cluster。

--//session 2:
SCOTT@test01p> @ spid
       SID    SERIAL# PROCESS                  SERVER    SPID                     PID  P_SERIAL# C50
---------- ---------- ------------------------ --------- -------------------- ------- ---------- --------------------------------------------------
       267       7490 3624:6852                DEDICATED 6964                      59         13 alter system kill session '267,7490' immediate;

SCOTT@test01p> exec test_proc_alt_cluster(1e4);
BEGIN test_proc_alt_cluster(1e4); END;

*
ERROR at line 1:
ORA-00060: deadlock detected while waiting for resource
ORA-06512: at "SCOTT.TEST_PROC_ALT_CLUSTER", line 4
ORA-06512: at line 1

SCOTT@test01p> @zzdate
C30                                    C30                                    C31
-------------------------------------- -------------------------------------- --------------------------------------
2023-06-19 21:18:38                    trunc(sysdate)+21/24+18/1440+38/86400  "timestamp'2023-06-19 21:18:38'"

--//作者给出的解析,实际上就是形成了相锁.当然这个给看运气,当然对于一个在cluster中的表如果频繁访问,如果修改cluster属性,很
--//有可能遇到这样的情况.

4.我的分析:
--//作者的分析使用gdb,我的测试环境是windows,仅仅简单的使用ashtop观察看看.

SCOTT@test01p> @ ashtop sql_id,SESSION_ID,SESSION_SERIAL#,event 1=1 trunc(sysdate)+21/24+15/1440+59/86400 trunc(sysdate)+21/24+18/1440+38/86400
    Total                                                                                                                       Distinct Distinct
  Seconds     AAS %This   SQL_ID        SESSION_ID SESSION_SERIAL# EVENT              FIRST_SEEN          LAST_SEEN           Execs Seen  Tstamps
--------- ------- ------- ------------- ---------- --------------- ------------------ ------------------- ------------------- ---------- --------
        5      .0   42% | 42y5q6y5vyxsw        267            7490 library cache lock 2023-06-19 21:17:09 2023-06-19 21:17:13          1        5
        5      .0   42% | g677askf3bry6        252           29639 row cache lock     2023-06-19 21:17:09 2023-06-19 21:17:13          1        5
        2      .0   17% | 42y5q6y5vyxsw        267            7490                    2023-06-19 21:17:07 2023-06-19 21:17:08          2        2


SCOTT@test01p> @ sql_id g677askf3bry6
--SQL_ID = g677askf3bry6
SELECT * FROM TEST_TAB;

SCOTT@test01p> @ sql_id 42y5q6y5vyxsw
--SQL_ID = 42y5q6y5vyxsw
--//alter 语句这样查询不到。

SYS@test> @ sharepool/shp4x 42y5q6y5vyxsw 0
TEXT                  KGLHDADR         KGLHDPAR         C40                                        KGLHDLMD   KGLHDPMD   KGLHDIVC KGLOBHD0         KGLOBHD6           KGLOBHS0   KGLOBHS6   KGLOBT16   N0_6_16        N20   KGLNAHSH KGLOBT03        KGLOBT09
--------------------- ---------------- ---------------- ---------------------------------------- ---------- ---------- ---------- ---------------- ---------------- ---------- ---------- ---------- --------- ---------- ---------- ------------- ----------
child handle address  000007FF1527C4D8 000007FF11755510 ALTER CLUSTER test_cluster PCTFREE 18 IN          0          0      10000 00               00                        0          0       3186      3186       3186 2344580892 42y5q6y5vyxsw          0
parent handle address 000007FF11755510 000007FF11755510 ALTER CLUSTER test_cluster PCTFREE 18 IN          0          0      10000 000007FF1153E188 00                     4072          0          0      4072       4072 2344580892 42y5q6y5vyxsw      65535


SCOTT@test01p> @ ash/ash_wait_chains2.sql username||':'||program2||event2 session_type='FOREGROUND'  trunc(sysdate)+21/24+15/1440+59/86400 trunc(sysdate)+21/24+18/1440+38/86400

-- Display ASH Wait Chain Signatures script v0.7 by Tanel Poder ( http://blog.tanelpoder.com )
%This     SECONDS     AAS WAIT_CHAIN                                                                                                                    FIRST_SEEN          LAST_SEEN
------ ---------- ------- ----------------------------------------------------------------------------------------------------------------------------- ------------------- -------------------
  42%           5      .0 -> SCOTT:(sqlplus.exe) row cache lock  -> SCOTT:(sqlplus.exe) library cache lock  -> [idle blocker 1,252,29639 (sqlplus.exe)] 2023-06-19 21:17:09 2023-06-19 21:17:13
  42%           5      .0 -> SCOTT:(sqlplus.exe) library cache lock  -> SCOTT:(sqlplus.exe) row cache lock  -> [idle blocker 1,267,7490 (sqlplus.exe)]  2023-06-19 21:17:09 2023-06-19 21:17:13
  17%           2      .0 -> SCOTT:(sqlplus.exe) ON CPU                                                                                                 2023-06-19 21:17:07 2023-06-19 21:17:08


SCOTT@test01p> @wcx trunc(sysdate)+21/24+15/1440+59/86400 trunc(sysdate)+21/24+18/1440+38/86400
-- Display ASH Wait Chain Signatures script v0.7 by Tanel Poder ( http://blog.tanelpoder.com )
%This     SECONDS     AAS WAIT_CHAIN                                                                                                                                 FIRST_SEEN          LAST_SEEN
------ ---------- ------- ------------------------------------------------------------------------------------------------------------------------------------------ ------------------- -------------------
  42%           5      .0 -> 267,7490,@1=>252,29639,@1=>row cache lock -> 252,29639,@1=>267,7490,@1=>library cache lock -> [idle blocker 1,252,29639 (sqlplus.exe)]  2023-06-19 21:17:09 2023-06-19 21:17:13
  42%           5      .0 -> 252,29639,@1=>267,7490,@1=>library cache lock -> 267,7490,@1=>252,29639,@1=>row cache lock -> [idle blocker 1,267,7490 (sqlplus.exe)]   2023-06-19 21:17:09 2023-06-19 21:17:13
  17%           2      .0 -> ,,@=>267,7490,@1=>                                                                                                                      2023-06-19 21:17:07 2023-06-19 21:17:08
--//  row cache lock 和 library cache lock 形成相互等待,两个session导致互锁。ORA-00060: deadlock detected while waiting for resource。

5. Root Cacuse Analysis
--//链接http://ksun-oracle.blogspot.com/2023/06/one-deadlock-of-row-cache-lock-and.html的解析:

Look gdb lock request order of both sessions:

(a). ALTER CLUSTER GDB shows:
     first 'row cache lock'     reqeust on TEST_CLUSTER by kqrLockPo,
     then  'library cache lock' reqeust on TEST_TAB by kgllkal.

(b). Query GDB shows
     first 'library cache lock' reqeust on TEST_TAB by kgllkal,
     then  'row cache lock'     reqeust on TEST_CLUSTER by kqrLockPo.

We can see that the deadlock is caused by the crossing order of 'row cache lock' and 'library cache lock' reqeusts.

There are also similar cases of deadlocks documented in:

Oracle MOS: Deadlock on Row Cache Lock and Library Cache Lock Dropping REF-PARTITION (Doc ID 2796372.1)
Deadlock Caused by Row Cache and Library Cache Locks (https://nenadnoveljic.com/blog/deadlock-row-cache-lock-library-cache-lock/)

6.附上测试脚本:

> cat wcx.sql
@ tpt/ash/ash_wait_chains2 BLOCKING_SESSION||','||BLOCKING_SESSION_SERIAL#||',@'||BLOCKING_INST_ID||'=>'||session_id||','||SESSION_SERIAL#||',@'||inst_id||'=>'||event 1=1  "&&1" "&&2"

> cat sharepool/shp4x.sql
column N0_6_16 format 99999999
SELECT /*+ USE_CONCAT(@"SEL$1" 8 OR_PREDICATES(1)) */ DECODE (kglhdadr,
               kglhdpar, 'parent handle address',
               'child handle address')
        text,
       kglhdadr,
       kglhdpar,
       substr(kglnaobj,1,40) c40,
           KGLHDLMD,
           KGLHDPMD,
           kglhdivc,
       kglobhd0,
       kglobhd6,
       kglobhs0,kglobhs6,kglobt16,
       kglobhs0+kglobhs6+kglobt16 N0_6_16,
           kglobhs0+kglobhs1+kglobhs2+kglobhs3+kglobhs4+kglobhs5+kglobhs6+kglobt16 N20,
           kglnahsh,
           kglobt03,
           kglobt09
  FROM x$kglob
 WHERE kglobt03 = lower('&1') or KGLNAHSH= &2;

7.如果换成表呢?

--//drop table test_tab purge;
--//drop cluster test_cluster;

SCOTT@test01p> create table test_tab (x, y) as select level, 'abc' from dual connect by level <= 3;
Table created.


create or replace procedure test_proc_alt_table(p_cnt number) as
begin
  for i in 1..p_cnt loop
    execute immediate 'ALTER TABLE test_tab PCTFREE 18 INITRANS 19';
  end loop;
end;
/

--//session 1:
SCOTT@test01p> exec test_proc_sel_tab(1e3);
PL/SQL procedure successfully completed.
Elapsed: 00:00:11.26

--//session 2:
SCOTT@test01p> exec test_proc_alt_table(1e4);
PL/SQL procedure successfully completed.
--//没有任何问题。
关闭

用微信“扫一扫”