云服务器内容精选

  • 现象描述 某局点测试中:ddw_f10_op_cust_asset_mon为分区表,分区键为year_mth,此字段是由年月两个值拼接而成的字符串。 测试SQL如下: 1 2 3 4 select count(1) from t_ddw_f10_op_cust_asset_mon b1 where b1.year_mth between to_char(add_months(to_date(''20170222'','yyyymmdd'), -11),'yyyymm') and substr(''20170222'',1 ,6 ); 测试结果显示此SQL的表Scan耗时长达135s。初步猜测可能是性能瓶颈点。 add_months为本地适配函数: 1 2 3 4 5 6 7 8 9 10 11 12 CREATE OR REPLACE FUNCTION ADD_MONTHS(date, integer) RETURNS date AS $$ SELECT CASE WHEN (EXTRACT(day FROM $1) = EXTRACT(day FROM (date_trunc('month', $1) + INTERVAL '1 month - 1 day'))) THEN date_trunc('month', $1) + CAST($2 + 1 || ' month - 1 day' as interval) ELSE $1 + CAST($2 || ' month' as interval) END $$ LANGUAGE SQL IMMUTABLE;
  • 案例环境准备 为了便于案例演示,需准备建表语句如下: --清理环境 DROP SCHEMA IF EXISTS dn_gather_test CASCADE; CREATE SCHEMA dn_gather_test; SET current_schema=dn_gather_test; --创建测试表 CREATE TABLE t1(a INT, b INT, c INT, d INT); CREATE TABLE t2(a INT, b INT, c INT, d INT); CREATE TABLE t3(a INT, b INT, c INT, d INT); CREATE TABLE t4(a INT, b INT, c INT, d INT);
  • 案例环境准备 为了便于规则的使用场景演示,需准备建表语句如下: SET client_min_messages = warning; SET CLIENT_ENCODING = 'UTF8'; --清理环境。 DROP SCHEMA IF EXISTS costbased_rule_test cascade; CREATE SCHEMA costbased_rule_test; SET current_schema = costbased_rule_test; SET enable_codegen = off; DROP TABLE IF EXISTS costbased_rule_test.ct1; DROP TABLE IF EXISTS costbased_rule_test.ct2; DROP TABLE IF EXISTS costbased_rule_test.ct3; DROP TABLE IF EXISTS costbased_rule_test.ct4; --创建测试表。 CREATE TABLE ct1 (a INT, b INT, c INT, d INT); CREATE TABLE ct2 (a INT, b INT, c INT, d INT); CREATE TABLE ct3 (a INT, b INT, c INT, d INT); CREATE TABLE ct4 (a INT, b INT, c INT, d INT); CREATE INDEX idx_ct1_b ON ct1(b); CREATE INDEX idx_ct2_c ON ct2(c); CREATE INDEX idx_ct3_c ON ct3(c); --插入数据。 INSERT INTO ct1 (a, b, c) VALUES (generate_series(1, 100), generate_series(200, 300), left(random()::int, 100)); INSERT INTO ct2 VALUES(1,2,3,4),(3,4,5,6); INSERT INTO ct3 (a, b, c, d) VALUES (generate_series(1, 10), generate_series(20, 30), left(random()::int, 10), left(random()::int, 10)); --更新统计信息。 ANALYZE ct1; ANALYZE ct2; ANALYZE ct3;
  • 案例环境准备 为了便于规则的使用场景演示,需准备建表语句如下: --清理环境 DROP SCHEMA IF EXISTS rewrite_rule_guc_test CASCADE; CREATE SCHEMA rewrite_rule_guc_test; SET current_schema=rewrite_rule_guc_test; --创建测试表 CREATE TABLE t(c1 INT, c2 INT, c3 INT, c4 INT); CREATE TABLE t1(c1 INT, c2 INT, c3 INT, c4 INT); CREATE TABLE t2(c1 INT, c2 INT, c3 INT, c4 INT);
  • 现象描述 查询与销售部所有员工的信息: 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 --建表 CREATE TABLE staffs (staff_id NUMBER(6) NOT NULL, first_name VARCHAR2(20), last_name VARCHAR2(25), employment_id VARCHAR2(10), section_id NUMBER(4), state_name VARCHAR2(10), city VARCHAR2(10)); CREATE TABLE sections(section_id NUMBER(4), place_id NUMBER(4), section_name VARCHAR2(20)); CREATE TABLE states(state_id NUMBER(4)); CREATE TABLE places(place_id NUMBER(4), state_id NUMBER(4)); --优化前查询 EXPLAIN SELECT staff_id,first_name,last_name,employment_id,state_name,city FROM staffs,sections,states,places WHERE sections.section_name='Sales' AND staffs.section_id = sections.section_id AND sections.place_id = places.place_id AND places.state_id = states.state_id ORDER BY staff_id; --创建索引 CREATE INDEX loc_id_pk ON places(place_id); CREATE INDEX state_c_id_pk ON states(state_id); --优化后查询 EXPLAIN SELECT staff_id,first_name,last_name,employment_id,state_name,city FROM staffs,sections,states,places WHERE sections.section_name='Sales' AND staffs.section_id = sections.section_id AND sections.place_id = places.place_id AND places.state_id = states.state_id ORDER BY staff_id;
  • 案例环境准备 为了便于规则的使用场景演示,需准备建表语句如下: SET client_min_messages = warning; SET CLIENT_ENCODING = 'UTF8'; --清理环境。 DROP SCHEMA IF EXISTS costbased_rule_test cascade; CREATE SCHEMA costbased_rule_test; SET current_schema = costbased_rule_test; SET enable_codegen = off; DROP TABLE IF EXISTS costbased_rule_test.ct1; DROP TABLE IF EXISTS costbased_rule_test.ct2; DROP TABLE IF EXISTS costbased_rule_test.ct3; DROP TABLE IF EXISTS costbased_rule_test.ct4; --创建测试表。 CREATE TABLE ct1 (a INT, b INT, c INT, d INT); CREATE TABLE ct2 (a INT, b INT, c INT, d INT); CREATE TABLE ct3 (a INT, b INT, c INT, d INT); CREATE TABLE ct4 (a INT, b INT, c INT, d INT); CREATE INDEX idx_ct1_b ON ct1(b); CREATE INDEX idx_ct2_c ON ct2(c); CREATE INDEX idx_ct3_c ON ct3(c); --插入数据。 INSERT INTO ct1 (a, b, c) VALUES (generate_series(1, 100), generate_series(200, 300), left(random()::int, 100)); INSERT INTO ct2 VALUES(1,2,3,4),(3,4,5,6); INSERT INTO ct3 (a, b, c, d) VALUES (generate_series(1, 10), generate_series(20, 30), left(random()::int, 10), left(random()::int, 10)); --更新统计信息。 ANALYZE ct1; ANALYZE ct2; ANALYZE ct3;
  • 优化说明 此优化的核心就是消除子查询。分析业务场景发现a.ca_address_sk不为NULL,那么从SQL语义出发,可以等价改写SQL为: 1 2 3 4 5 select count(*) from customer_address_001 a4, customer_address_001 a where a4.ca_address_sk = a.ca_address_sk group by a.ca_address_sk; 为了保证改写的等效性,在customer_address_001. ca_address_sk加了not null约束。
  • 现象描述 查询与销售部所有员工的信息: 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 --建表 CREATE TABLE staffs (staff_id NUMBER(6) NOT NULL, first_name VARCHAR2(20), last_name VARCHAR2(25), employment_id VARCHAR2(10), section_id NUMBER(4), state_name VARCHAR2(10), city VARCHAR2(10)); CREATE TABLE sections(section_id NUMBER(4), place_id NUMBER(4), section_name VARCHAR2(20)); CREATE TABLE states(state_id NUMBER(4)); CREATE TABLE places(place_id NUMBER(4), state_id NUMBER(4)); --优化前查询 EXPLAIN SELECT staff_id,first_name,last_name,employment_id,state_name,city FROM staffs,sections,states,places WHERE sections.section_name='Sales' AND staffs.section_id = sections.section_id AND sections.place_id = places.place_id AND places.state_id = states.state_id ORDER BY staff_id; --创建索引 CREATE INDEX loc_id_pk ON places(place_id); CREATE INDEX state_c_id_pk ON states(state_id); --优化后查询 EXPLAIN SELECT staff_id,first_name,last_name,employment_id,state_name,city FROM staffs,sections,states,places WHERE sections.section_name='Sales' AND staffs.section_id = sections.section_id AND sections.place_id = places.place_id AND places.state_id = states.state_id ORDER BY staff_id;
  • 现象描述 查询与销售部所有员工的信息: 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 --建表 CREATE TABLE staffs (staff_id NUMBER(6) NOT NULL, first_name VARCHAR2(20), last_name VARCHAR2(25), employment_id VARCHAR2(10), section_id NUMBER(4), state_name VARCHAR2(10), city VARCHAR2(10)); CREATE TABLE sections(section_id NUMBER(4), place_id NUMBER(4), section_name VARCHAR2(20)); CREATE TABLE states(state_id NUMBER(4)); CREATE TABLE places(place_id NUMBER(4), state_id NUMBER(4)); --优化前查询 EXPLAIN SELECT staff_id,first_name,last_name,employment_id,state_name,city FROM staffs,sections,states,places WHERE sections.section_name='Sales' AND staffs.section_id = sections.section_id AND sections.place_id = places.place_id AND places.state_id = states.state_id ORDER BY staff_id; --创建索引 CREATE INDEX loc_id_pk ON places(place_id); CREATE INDEX state_c_id_pk ON states(state_id); --优化后查询 EXPLAIN SELECT staff_id,first_name,last_name,employment_id,state_name,city FROM staffs,sections,states,places WHERE sections.section_name='Sales' AND staffs.section_id = sections.section_id AND sections.place_id = places.place_id AND places.state_id = states.state_id ORDER BY staff_id;
  • 现象描述 查询与销售部所有员工的信息: 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 --建表 CREATE TABLE staffs (staff_id NUMBER(6) NOT NULL, first_name VARCHAR2(20), last_name VARCHAR2(25), employment_id VARCHAR2(10), section_id NUMBER(4), state_name VARCHAR2(10), city VARCHAR2(10)); CREATE TABLE sections(section_id NUMBER(4), place_id NUMBER(4), section_name VARCHAR2(20)); CREATE TABLE states(state_id NUMBER(4)); CREATE TABLE places(place_id NUMBER(4), state_id NUMBER(4)); --优化前查询 EXPLAIN SELECT staff_id,first_name,last_name,employment_id,state_name,city FROM staffs,sections,states,places WHERE sections.section_name='Sales' AND staffs.section_id = sections.section_id AND sections.place_id = places.place_id AND places.state_id = states.state_id ORDER BY staff_id; --创建索引 CREATE INDEX loc_id_pk ON places(place_id); CREATE INDEX state_c_id_pk ON states(state_id); --优化后查询 EXPLAIN SELECT staff_id,first_name,last_name,employment_id,state_name,city FROM staffs,sections,states,places WHERE sections.section_name='Sales' AND staffs.section_id = sections.section_id AND sections.place_id = places.place_id AND places.state_id = states.state_id ORDER BY staff_id;
  • 优化分析2 在以上查询中,supplier、lineitem、partsupp三表做hashjoin的条件为(lineitem.l_suppkey = supplier.s_suppkey) AND (lineitem.l_partkey = partsupp.ps_partkey),此hashjoin条件中存在两个过滤条件,这前一个过滤条件中的lineitem.l_suppkey和后一个过滤条件中的lineitem.l_partkey同为lineitem表的两列,这两列存在强相关的关联关系。在这种情况,估算hashjoin条件的选择率时,如果使用cost_param的bit1为0时,实际是将AND的两个过滤条件分别计算的2个选择率的值相乘来得到hashjoin条件的选择率,导致行数估算不准确,查询性能较差。所以需要将cost_param的bit1为1时,选择最小的选择率作为总的选择率估算行数比较准确,查询性能较好,优化后的计划如下图所示:
  • 现象描述2 当cost_param的bit1(set cost_param=2)为1时,表示求多个过滤条件(Filter)的选择率时,选择最小的作为总的选择率,而非两者乘积,此方法在过滤条件的列之间关联性较强时估算更加准确。下面查询的例子是cost_param的bit1为1时的优化场景。 表结构如下所示: 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 CREATE TABLE NATION ( N_NATIONKEY INT NOT NULL , N_NAME CHAR(25) NOT NULL , N_REGIONKEY INT NOT NULL , N_COMMENT VARCHAR(152) ) distribute by replication; CREATE TABLE SUPPLIER ( S_SUPPKEY BIGINT NOT NULL , S_NAME CHAR(25) NOT NULL , S_ADDRESS VARCHAR(40) NOT NULL , S_NATIONKEY INT NOT NULL , S_PHONE CHAR(15) NOT NULL , S_ACCTBAL DECIMAL(15,2) NOT NULL , S_COMMENT VARCHAR(101) NOT NULL ) distribute by hash(S_SUPPKEY); CREATE TABLE PARTSUPP ( PS_PARTKEY BIGINT NOT NULL , PS_SUPPKEY BIGINT NOT NULL , PS_AVAILQTY BIGINT NOT NULL , PS_SUPPLYCOST DECIMAL(15,2)NOT NULL , PS_COMMENT VARCHAR(199) NOT NULL )distribute by hash(PS_PARTKEY); 查询语句如下所示: 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 set cost_param=2; explain verbose select nation, sum(amount) as sum_profit from ( select n_name as nation, l_extendedprice * (1 - l_discount) - ps_supplycost * l_quantity as amount from supplier, lineitem, partsupp, nation where s_suppkey = l_suppkey and ps_suppkey = l_suppkey and ps_partkey = l_partkey and s_nationkey = n_nationkey ) as profit group by nation order by nation; 当cost_param的bit1为0时,执行计划如下图所示:
  • 现象描述 查询与销售部所有员工的信息: 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 --建表 CREATE TABLE staffs (staff_id NUMBER(6) NOT NULL, first_name VARCHAR2(20), last_name VARCHAR2(25), employment_id VARCHAR2(10), section_id NUMBER(4), state_name VARCHAR2(10), city VARCHAR2(10)); CREATE TABLE sections(section_id NUMBER(4), place_id NUMBER(4), section_name VARCHAR2(20)); CREATE TABLE states(state_id NUMBER(4)); CREATE TABLE places(place_id NUMBER(4), state_id NUMBER(4)); --优化前查询 EXPLAIN SELECT staff_id,first_name,last_name,employment_id,state_name,city FROM staffs,sections,states,places WHERE sections.section_name='Sales' AND staffs.section_id = sections.section_id AND sections.place_id = places.place_id AND places.state_id = states.state_id ORDER BY staff_id; --创建索引 CREATE INDEX loc_id_pk ON places(place_id); CREATE INDEX state_c_id_pk ON states(state_id); --优化后查询 EXPLAIN SELECT staff_id,first_name,last_name,employment_id,state_name,city FROM staffs,sections,states,places WHERE sections.section_name='Sales' AND staffs.section_id = sections.section_id AND sections.place_id = places.place_id AND places.state_id = states.state_id ORDER BY staff_id;
  • 优化说明 此优化的核心就是消除子查询。分析业务场景发现a.ca_address_sk不为null,那么从SQL语义出发,可以等价改写SQL为: 1 2 3 4 5 select count(*) from customer_address_001 a4, customer_address_001 a where a4.ca_address_sk = a.ca_address_sk group by a.ca_address_sk; 为了保证改写的等效性,在customer_address_001. ca_address_sk加了not null约束。
  • 现象描述 某局点测试中:ddw_f10_op_cust_asset_mon为分区表,分区键为year_mth,此字段是由年月两个值拼接而成的字符串。 测试SQL如下: 1 2 3 4 select count(1) from t_ddw_f10_op_cust_asset_mon b1 where b1.year_mth between to_char(add_months(to_date(''20170222'','yyyymmdd'), -11),'yyyymm') and substr(''20170222'',1 ,6 ); 测试结果显示此SQL的表Scan耗时长达135s。初步猜测可能是性能瓶颈点。 add_months为本地适配函数: 1 2 3 4 5 6 7 8 9 10 11 12 CREATE OR REPLACE FUNCTION ADD_MONTHS(date, integer) RETURNS date AS $$ SELECT CASE WHEN (EXTRACT(day FROM $1) = EXTRACT(day FROM (date_trunc('month', $1) + INTERVAL '1 month - 1 day'))) THEN date_trunc('month', $1) + CAST($2 + 1 || ' month - 1 day' as interval) ELSE $1 + CAST($2 || ' month' as interval) END $$ LANGUAGE SQL IMMUTABLE;