wiki:db/tests/MySql/AriaVsMyISAM
Last modified 5 years ago Last modified on 02/21/2014 09:25:14 AM

Test of Aria vs MyISAM using MariaDB distribution

Test run on lsst-dbdev6.ncsa.illinois.edu, mid Feb, 2014, using "MariaDB-10.0.8 (release candidate)"

building/configuring/installing

Building:

cmake -DCMAKE_INSTALL_PREFIX=/usr/local/home/becla/mariaDB/dist
make
make install

Configuring:

  • contents of ../my.cnf:
[mysqld]
datadir=/u2/jacek/mariaDB/data
tmpdir=/u2/jacek/mariaDB/tmp

socket=/u2/jacek/mariaDB/mysql.sock

default-storage-engine=MyISAM 
max_connections=1000
open-files-limit=16364
### general_log=1
### log-output=TABLE
max_allowed_packet = 16M
key_buffer_size = 4GB
myisam_max_sort_file_size = 3072GB
interactive_timeout=31449600

[mysqld_safe]
log-error=/u2/jacek/mariaDB/mysqld.err
pid-file=/u2/jacek/mariaDB/mysqld.pid

Install database

./scripts/mysql_install_db --defaults-file=/usr/local/home/becla/mariaDB/my.cnf

start server

./bin/mysqld_safe --defaults-file=/usr/local/home/becla/mariaDB/my.cnf&

connect

./bin/mysql -S /u2/jacek/mariaDB/mysql.sock

prepare test data

Data used for the test: 25 million 50-column rows based on DeepForcedSource?.

# (on lsst10, took 2m24s)
CREATE TABLE jacek.deepFSrc_25Mrows_50cols AS 
   SELECT deepForcedSourceId, scienceCcdExposureId, filterId, timeMid, expTime,  deepSourceId, ra, decl, raVar, declVar, radeclCov, htmId20, x, y, xVar, yVar, xyCov, psfFlux, psfFluxSigma, apFlux, apFluxSigma, modelFlux, modelFluxSigma, instFlux, instFluxSigma, apCorrection, apCorrectionSigma, shapeIx, shapeIy, shapeIxVar, shapeIyVar, shapeIxIyCov, shapeIxx, shapeIyy, shapeIxy, shapeIxxVar, shapeIyyVar, shapeIxyVar, shapeIxxIyyCov, shapeIxxIxyCov, shapeIyyIxyCov, extendedness, flagNegative, flagBadMeasCentroid, flagPixEdge, flagPixInterpAny, flagPixInterpCen, flagPixSaturAny, flagPixSaturCen, flagBadPsfFlux FROM DC_W13_Stripe82.DeepForcedSource LIMIT 25000000;

# on lsst-dbdev, (took 11 min. Output file is 14GB)
time mysqldump --host lsst10 -u adm_becla -p jacek deepFSrc_25Mrows_50cols > /u1/jacek/deepFSrc_25Mrows_50cols.dump

# create db
mysql -S /u2/jacek/mariaDB/mysql.sock -e "create database t"

# load table (took 33m 41s)
time mysql -S /u2/jacek/mariaDB/mysql.sock t < /u1/jacek/deepFSrc_25Mrows_50cols.dump

# create tables with different # columns

rename table deepFSrc_25Mrows_50cols to t_50cols_myisam;

# 2 min 48 sec
create table t_40cols_myisam as select deepForcedSourceId, scienceCcdExposureId, filterId, timeMid, expTime, deepSourceId, ra, decl, raVar, declVar, radeclCov, htmId20, x, y, xVar, yVar, xyCov, psfFlux, psfFluxSigma, apFlux, apFluxSigma, modelFlux, modelFluxSigma, instFlux, instFluxSigma, apCorrection, apCorrectionSigma, shapeIx, shapeIy, shapeIxVar, shapeIyVar, shapeIxIyCov, shapeIxx, shapeIyy, shapeIxy, shapeIxxVar, shapeIyyVar, shapeIxyVar, shapeIxxIyyCov, shapeIxxIxyCov FROM t_50cols_myisam;

# 1 min 30 sec
create table t_30cols_myisam as select deepForcedSourceId, scienceCcdExposureId, filterId, timeMid, expTime, deepSourceId, ra, decl, raVar, declVar, radeclCov, htmId20, x, y, xVar, yVar, xyCov, psfFlux, psfFluxSigma, apFlux, apFluxSigma, modelFlux, modelFluxSigma, instFlux, instFluxSigma, apCorrection, apCorrectionSigma, shapeIx, shapeIy, shapeIxVar FROM t_40cols_myisam;

# 1 min 30 sec
create table t_20cols_myisam as select deepForcedSourceId, scienceCcdExposureId, filterId, timeMid, expTime, deepSourceId, ra, decl, raVar, declVar, radeclCov, htmId20, x, y, xVar, yVar, xyCov, psfFlux, psfFluxSigma, apFlux FROM t_30cols_myisam;

# 43 sec
create table t_10cols_myisam as select deepForcedSourceId, scienceCcdExposureId, filterId, timeMid, expTime, deepSourceId, ra, decl, raVar, declVar FROM t_20cols_myisam;

create table t_05cols_myisam as select deepForcedSourceId, scienceCcdExposureId, filterId, timeMid, expTime FROM t_10cols_myisam;

create table t_04cols_myisam as select deepForcedSourceId, scienceCcdExposureId, filterId, timeMid FROM t_05cols_myisam;

create table t_03cols_myisam as select deepForcedSourceId, scienceCcdExposureId, filterId FROM t_04cols_myisam;

create table t_02cols_myisam as select deepForcedSourceId, scienceCcdExposureId FROM t_03cols_myisam;

create table t_01cols_myisam as select deepForcedSourceId FROM t_02cols_myisam;

# and create similar set for Aria

create table t_50cols_aria like t_50cols_myisam; alter table t_50cols_aria engine=Aria;
create table t_40cols_aria like t_40cols_myisam; alter table t_40cols_aria engine=Aria;
create table t_30cols_aria like t_30cols_myisam; alter table t_30cols_aria engine=Aria;
create table t_20cols_aria like t_20cols_myisam; alter table t_20cols_aria engine=Aria;
create table t_10cols_aria like t_10cols_myisam; alter table t_10cols_aria engine=Aria;
create table t_05cols_aria like t_05cols_myisam; alter table t_05cols_aria engine=Aria;
create table t_04cols_aria like t_04cols_myisam; alter table t_04cols_aria engine=Aria;
create table t_03cols_aria like t_03cols_myisam; alter table t_03cols_aria engine=Aria;
create table t_02cols_aria like t_02cols_myisam; alter table t_02cols_aria engine=Aria;
create table t_01cols_aria like t_01cols_myisam; alter table t_01cols_aria engine=Aria;

insert into t_50cols_aria select * from t_50cols_myisam; # took 12 min
insert into t_40cols_aria select * from t_40cols_myisam; # took 11 min
insert into t_30cols_aria select * from t_30cols_myisam; # took 9 min
insert into t_20cols_aria select * from t_20cols_myisam; # took 5 min
insert into t_10cols_aria select * from t_10cols_myisam;
insert into t_05cols_aria select * from t_05cols_myisam;
insert into t_04cols_aria select * from t_04cols_myisam;
insert into t_03cols_aria select * from t_03cols_myisam;
insert into t_02cols_aria select * from t_02cols_myisam;
insert into t_01cols_aria select * from t_01cols_myisam;

Storage overhead

columns	   aria		   MyISAM	aria:myisam
	  [bytes]	   [bytes]	
50	6,151,643,136	5,613,095,416	1.10
40	5,817,974,784	7,925,000,000	0.73
30	5,120,294,912	5,900,000,000	0.87
20	3,776,249,856	3,875,000,000	0.97
10	2,023,079,936	1,825,000,000	1.11
5	1,083,662,336	  825,000,000	1.31
4	1,083,662,336	  725,000,000	1.49
3	1,083,662,336	  525,000,000	2.06
2	1,083,662,336	  425,000,000	2.55
1	1,083,662,336	  225,000,000	4.82

The unusual file size for MyISAM 50-column table is due to the dynamic row format (all other tables have fixed row format). In the dynamic row format, all numeric columns got "no zeros" flag (myisamchk -dvv shows that). So, if the value in the column is exactly 0, it's not stored at all (but instead a special bit in the row header is set). That's why a dynamic row is notably smaller (we have lots of zeros in the table).

Thought: we should use a data set with less zeros to do a more realistic estimates (for both MyISAM and Aria).

Performance

Full table scan:

  • select count(*) from t_50cols_myisam where psfFlux > 0.1; # took 47.23 sec
  • select count(*) from t_50cols_aria where psfFlux > 0.1; # took 53.42 sec

Concurrency - run 8 queries concurrently

"select count(*) from tMyISAM where psfFlux > 0.01",
"select count(*) from tMyISAM where psfFlux > 0.1",
"select count(*) from tMyISAM where psfFlux > 0.1 and deepForcedSourceId > 12345",
"select count(*) from tMyISAM where deepForcedSourceId > 5446",
"select count(*) from tMyISAM where deepForcedSourceId <= 5134446",
"select count(*) from tMyISAM where psfFlux > 0.8 and deepForcedSourceId = 44",
"select count(*) from tMyISAM where psfFlux > -0.2",
"select count(*) from tMyISAM where psfFlux > 0.23"

~47 sec each for myisam, ~57 for aria