首页 > 代码库 > Postgresql 报表性能测试
Postgresql 报表性能测试
1. 测试数据
表结构 一:
CREATE TABLE fact_data
(
day_id integer,
popdb_id integer,
region_id integer,
area_id integer,
city_id integer,
product_id integer,
f1 integer,
f2 integer,
f3 integer,
f4 integer,
f5 integer,
f6 integer,
f7 integer,
f8 integer,
user_id integer
)
表结构二
CREATE TABLE fact_data_hstore
(
day_id integer,
popdb_id integer,
region_id integer,
area_id integer,
city_id integer,
product_id integer,
user_id integer,
data hstore
)
2.测试报告under Windows
2 CPUs, 每个CPU 4 核,3GHZ
磁盘 EVA 4400 36 disks
Postgresql 9.3
2.1测试结果(一个字段 四百万条记录)
SQLServer:4060579 records
select region_id, avg(f1*1.0) from fact_data group by region_id
结果集合:返回9条记录,2个字段,耗时 500毫秒
Postgresql: 4060579 records
select region_id, avg(f1) from fact_data group by region_id
结果集合:返回9条记录,2个字段,耗时 1310毫秒
Postgresql: 4060579 records不加索引
select region_id, avg((data->‘f1‘)::integer ) from fact_data_hstore
group by region_id
结果集合:返回9条记录,2个字段,耗时 3051毫秒
Postgresql: 4060579 records 增加索引
select region_id, avg((data->‘f1‘)::integer ) from fact_data_hstore
group by region_id
结果集合:返回9条记录,2个字段,耗时 3000毫秒
索引对这个SQL语句性能没有作用。
2.2测试结果(八个字段 四百万条记录)
SQLServer:4060579 records
select region_id,avg(f1) as af1
from fact_data
group by region_id
结果集合:返回9条记录,2个字段,耗时 1316毫秒
Postgresql: 4060579 records
select region_id,avg(f1) as af1, avg(f2) as af2, avg(f3) as af3, avg(f4) as af4, avg(f5) as af5, avg(f6) as af6, avg(f7) as af7, avg(f8) as af8
from fact_data
group by region_id
结果集合:返回9条记录,9个字段,耗时 2850毫秒
Postgresql: 4060579 records不加索引
select region_id, avg((data->‘f1‘)::integer) as af1, avg((data->‘f2‘)::integer) as af2, avg((data->‘f3‘)::integer) as af3, avg((data->‘f4‘)::integer) as af4,
avg((data->‘f5‘)::integer) as af5, avg((data->‘f6‘)::integer) as af6, avg((data->‘f7‘)::integer) as af7, avg((data->‘f8‘)::integer) as af8
from fact_data_hstore
group by region_id
结果集合:返回9条记录,9个字段,耗时 12690毫秒
2.3测试结果(八个字段 一百万条记录)
Postgresql: 1000000 records
select region_id,avg(f1) as af1, avg(f2) as af2, avg(f3) as af3, avg(f4) as af4, avg(f5) as af5, avg(f6) as af6, avg(f7) as af7, avg(f8) as af8
from fact_data_1m
group by region_id
结果集合:返回9条记录,9个字段,耗时 720毫秒
Postgresql: 1000000 records不加索引
select region_id, avg((data->‘f1‘)::integer) as af1, avg((data->‘f2‘)::integer) as af2, avg((data->‘f3‘)::integer) as af3, avg((data->‘f4‘)::integer) as af4,
avg((data->‘f5‘)::integer) as af5, avg((data->‘f6‘)::integer) as af6, avg((data->‘f7‘)::integer) as af7, avg((data->‘f8‘)::integer) as af8
from fact_data_hstore_1m
group by region_id
结果集合:返回9条记录,9个字段,耗时 3109毫秒
3.测试报告under Linux virtual machine
虚拟机 2个虚核,普通硬盘
3.1测试结果(一个字段 四百万条记录)
Postgresql: 4060579 records
select region_id, avg(f1) from fact_data group by region_id
结果集合:返回9条记录,2个字段,耗时 860毫秒
Postgresql: 4060579 records不加索引
select region_id, avg((data->‘f1‘)::integer ) from fact_data_hstore
group by region_id
结果集合:返回9条记录,2个字段,耗时 1630毫秒
3.2测试结果(八个字段 四百万条记录)
Postgresql: 4060579 records
select region_id,avg(f1) as af1, avg(f2) as af2, avg(f3) as af3, avg(f4) as af4, avg(f5) as af5, avg(f6) as af6, avg(f7) as af7, avg(f8) as af8
from fact_data
group by region_id
结果集合:返回9条记录,9个字段,耗时 1708毫秒
Postgresql: 4060579 records不加索引
select region_id, avg((data->‘f1‘)::integer) as af1, avg((data->‘f2‘)::integer) as af2, avg((data->‘f3‘)::integer) as af3, avg((data->‘f4‘)::integer) as af4,
avg((data->‘f5‘)::integer) as af5, avg((data->‘f6‘)::integer) as af6, avg((data->‘f7‘)::integer) as af7, avg((data->‘f8‘)::integer) as af8
from fact_data_hstore
group by region_id
结果集合:返回9条记录,9个字段,耗时 6771毫秒
3.3测试结果(八个字段 一百万条记录)
Postgresql: 1000000 records
select region_id,avg(f1) as af1, avg(f2) as af2, avg(f3) as af3, avg(f4) as af4, avg(f5) as af5, avg(f6) as af6, avg(f7) as af7, avg(f8) as af8
from fact_data_1m
group by region_id
结果集合:返回9条记录,9个字段,耗时 470毫秒
Postgresql: 1000000 records不加索引
select region_id, avg((data->‘f1‘)::integer) as af1, avg((data->‘f2‘)::integer) as af2, avg((data->‘f3‘)::integer) as af3, avg((data->‘f4‘)::integer) as af4,
avg((data->‘f5‘)::integer) as af5, avg((data->‘f6‘)::integer) as af6, avg((data->‘f7‘)::integer) as af7, avg((data->‘f8‘)::integer) as af8
from fact_data_hstore_1m
group by region_id
结果集合:返回9条记录,9个字段,耗时 1700毫秒
4.LU表分拆测试
添加LU_POPDB
CREATE TABLE lu_pop
(
popdb_id integer,
region_id integer,
area_id integer,
city_id integer
)
WITH (
OIDS=FALSE
);
ALTER TABLE lu_pop
OWNER TO postgres;
insert into lu_pop
select distinct popdb_id,region_id,area_id,city_id from fact_data
Postgresql:
select p.region_id, avg(f1) as af1, avg(f2) as af2, avg(f3) as af3, avg(f4) as af4, avg(f5) as af5, avg(f6) as af6, avg(f7) as af7, avg(f8) as af8 from
fact_data f
inner join lu_pop p on f.popdb_id = p.popdb_id
group by p.region_id
结果集合:
四百万 返回9条记录,9个字段,耗时 3201毫秒
一百万 返回9条记录,9个字段,耗时 820毫秒
Postgresql:
select p.region_id, avg((data->‘f1‘)::integer) as af1, avg((data->‘f2‘)::integer) as af2, avg((data->‘f3‘)::integer) as af3, avg((data->‘f4‘)::integer) as af4,
avg((data->‘f5‘)::integer) as af5, avg((data->‘f6‘)::integer) as af6, avg((data->‘f7‘)::integer) as af7, avg((data->‘f8‘)::integer) as af8
from fact_data_hstore f
inner join lu_pop p on f.popdb_id = p.popdb_id
group by p.region_id
结果集合:
四百万返回9条记录,9个字段,耗时 9101毫秒
一百万返回9条记录,9个字段,耗时 2310毫秒
添加LU_POPDB_hstore
CREATE TABLE lu_pop_hstore
(
popdb_id integer,
hierarchies hstore
)
WITH (
OIDS=FALSE
);
ALTER TABLE lu_pop_hstore
OWNER TO postgres;
insert into lu_pop_hstore
select popdb_id,hstore(‘region_id=>‘||region_id||‘,‘||‘area_id=>‘||area_id||‘,‘||‘city_id=>‘||city_id) from lu_pop;
select p.hierarchies->‘region_id‘, avg(f1) as af1, avg(f2) as af2, avg(f3) as af3, avg(f4) as af4, avg(f5) as af5, avg(f6) as af6, avg(f7) as af7, avg(f8) as af8 from
fact_data f
inner join lu_pop_hstore p on f.popdb_id = p.popdb_id
group by p.hierarchies->‘region_id‘
结果集合:
四百万数据 返回9条记录,9个字段,耗时 3760毫秒
一百万数据 返回9条记录,9个字段,耗时 990毫秒
select p.hierarchies->‘region_id‘, avg((data->‘f1‘)::integer) as af1, avg((data->‘f2‘)::integer) as af2, avg((data->‘f3‘)::integer) as af3, avg((data->‘f4‘)::integer) as af4,
avg((data->‘f5‘)::integer) as af5, avg((data->‘f6‘)::integer) as af6, avg((data->‘f7‘)::integer) as af7, avg((data->‘f8‘)::integer) as af8
from fact_data_hstore f
inner join lu_pop_hstore p on f.popdb_id = p.popdb_id
group by p.hierarchies->‘region_id‘
结果集合:
四百万数据 返回9条记录,9个字段,耗时 9852毫秒
一百万数据 返回9条记录,9个字段,耗时 2500毫秒
5.测试结论
| 常规列 Fact (四百万) | Hstore Fact(四百万) | 常规列 Fact (一百万) | Hstore Fact(一百万) |
全打入事实表Windows | 2,850 | 12,690 | 720 | 3,109 |
全打入事实表 | 1,708 | 6,771 | 470 | 1,700 |
拆分LU | 3,200 | 9,101 | 820 | 2,310 |
拆分LU为Hstore格式 | 3,760 | 9,852 | 990 | 2,500 |
分列存放Fact数据 VS Hstore存放事实数据
分列存放的fact数据比Hstore存放的事实数据性能高150%到300%倍左右。因而用Hstore存放事实数据性能较差。
独立LU表VS把LU全部放入事实表
拆分LU表与把LU全部放入事实表相比,全部放入事实表比拆分到LU表中性能高30%到80%倍左右,把LU拆分出来的性能较差。
独立LU表VS 独立LU Hstore表
按照分列存放LU的数据比用Hstore格式存放LU数据,性能高8% 到20% 左右。因而用HStore存放LU数据带来的性能降低还是可以接受的。
数据量四百万 VS 一百万
响应时间和数据量几乎成正比,因而一定要进行表分区,让数据量控制在可以接受的范围之内。
Windows VS Linux 虚拟机
即使Windows的硬件比较强大,但是性能还是比Linux虚拟机差 50% 到80%。
Postgresql 报表性能测试