Recipe How DC2 Object Catalog Was Built
[DC2 Test Data] [LSST Database]
Used hardware: lsst-dev02.slac.stanford.edu
1) prepare input files (used files: D4.2007B.R.cat and D4.2007B.I.cat)
- remove leading whitespaces
- convert spaces to ','
- remove lines with #
2) create schema (schema.sql file)
CREATE DATABASE D4; USE D4; CREATE TABLE r ( NUMBER DOUBLE NOT NULL, X_IMAGE DOUBLE NOT NULL, Y_IMAGE DOUBLE NOT NULL, ALPHA_J2000 DOUBLE NOT NULL, DELTA_J2000 DOUBLE NOT NULL, MAG_AUTO DOUBLE NOT NULL, MAGERR_AUTO DOUBLE NOT NULL, MAG_BEST DOUBLE NOT NULL, MAGERR_BEST DOUBLE NOT NULL, MAG_APER DOUBLE NOT NULL, MAGERR_APER DOUBLE NOT NULL, A_WORLD DOUBLE NOT NULL, ERRA_WORLD DOUBLE NOT NULL, B_WORLD DOUBLE NOT NULL, ERRB_WORLD DOUBLE NOT NULL, THETA_J2000 DOUBLE NOT NULL, ERRTHETA_J2000 DOUBLE NOT NULL, ISOAREA_IMAGE DOUBLE NOT NULL, MU_MAX DOUBLE NOT NULL, FLUX_RADIUS DOUBLE NOT NULL, FLAGS DOUBLE NOT NULL, INDEX(NUMBER) ); CREATE TABLE i LIKE r; CREATE TABLE riAll ( rNUMBER DOUBLE, iNUMBER DOUBLE, radius DOUBLE, INDEX(rNUMBER), INDEX(iNUMBER) ); CREATE TABLE ri LIKE riAll; CREATE TABLE ir LIKE riAll; CREATE TABLE riAllNoDup LIKE riAll; CREATE TABLE NoMatchR ( NUMBER DOUBLE, INDEX(NUMBER) ); CREATE TABLE NoMatchI LIKE NoMatchR; CREATE TABLE Object ( -- ... all 300+ columns from the LSST DC2 Database Schema )
3) Load data into database
mysql < schema.sql mysql D4 -e "load data infile '/u2/D4/D4.2007B.R.cat' into TABLE r FIELDS TERMINATED BY ',' " mysql D4 -e "load data infile '/u2/D4/D4.2007B.I.cat' into TABLE i FIELDS TERMINATED BY ',' "
4) add zones
alter table r add column zone INT; alter table r add index(zone); alter table i add column zone INT; alter table i add index(zone); update r set zone = round(DELTA_J2000*1000); update i set zone = round(DELTA_J2000*1000);
5) find neighbors for r-i
INSERT INTO riAll
SELECT r.NUMBER AS rNUMBER,
i.NUMBER AS iNUMBER,
POWER(r.ALPHA_J2000-i.ALPHA_J2000, 2)+
POWER(r.DELTA_J2000-i.DELTA_J2000, 2) AS radius
FROM r, i
WHERE (r.zone = i.zone-1 OR
r.zone = i.zone OR
r.zone = i.zone+1
)
AND POWER(r.ALPHA_J2000-i.ALPHA_J2000, 2)+
POWER(r.DELTA_J2000-i.DELTA_J2000, 2) < POWER(3.0/3600,2);
-- this took 1 hour 4 min 41.02 sec
-- select i closest to every r
INSERT INTO ri
SELECT a.rNUMBER, a.iNUMBER, a.radius
FROM riAll a,
(SELECT rNUMBER, MIN(radius) AS radius FROM riAll GROUP BY rNUMBER) AS x
WHERE a.rNUMBER = x.rNUMBER
AND a.radius = x.radius;
-- 345508 rows affected (20.31 sec)
-- another simpler but somewhat slower way of expressing the same thing:
-- INSERT INTO ri
-- SELECT *
-- FROM riAll a
-- WHERE a.radius = (SELECT MIN(radius) FROM riAll b WHERE b.rNUMBER = a.rNUMBER);
-- select r closest to every i
INSERT INTO ir
SELECT a.rNUMBER, a.iNUMBER, a.radius
FROM riAll a,
(SELECT iNUMBER, MIN(radius) as radius FROM riAll GROUP BY iNUMBER) AS x
WHERE a.iNUMBER = x.iNUMBER
AND a.radius = x.radius;
-- 349281 rows affected (20.35 sec)
-- select pairs that are closest matches to each other
INSERT INTO riAllNoDup
SELECT a.rNUMBER, a.iNUMBER, a.radius
FROM ri a, ir b
WHERE a.rNUMBER = b.rNUMBER
AND a.iNUMBER = b.iNUMBER;
-- 328017 rows affected (14.64 sec)
-- check to make sure there are no duplicates
SELECT COUNT(*), COUNT(DISTINCT rNUMBER), COUNT(DISTINCT iNUMBER) FROM riAllNoDup;
-- Returns 328017, 328017, 328017
-- get a histogram of the match distances
SELECT ROUND(SQRT(radius)*36000)/10 AS arcsec, COUNT(*) FROM riAllNoDup GROUP BY arcsec;
+--------+----------+
| arcsec | COUNT(*) |
+--------+----------+
| 0.0000 | 132022 | 0-50 milliarcsec
| 0.1000 | 145138 | 50-150 milliarcsec
| 0.2000 | 32054 | etc.
| 0.3000 | 8831 |
| 0.4000 | 3511 |
| 0.5000 | 1714 |
| 0.6000 | 839 |
| 0.7000 | 476 |
| 0.8000 | 309 |
| 0.9000 | 241 |
| 1.0000 | 200 |
| 1.1000 | 165 |
| 1.2000 | 153 |
| 1.3000 | 141 |
| 1.4000 | 136 |
| 1.5000 | 122 |
... etc ...
| 2.9000 | 102 |
| 3.0000 | 57 | 2.95-3.00 arcsec
+--------+----------+
6) create final Object table
INSERT INTO Object (objectId,
ra,
decl,
rPetroMag,
rPetroMagErr,
rMag,
rMagErr,
rApMag,
rApMagErr,
rIxx,
rIxxErr,
rIyy,
rIyyErr,
rIxy,
rIxyErr,
rIsoAreaImage,
rMuMax,
rFluxRadius,
rFlags,
iPetroMag,
iPetroMagErr,
iMag,
iMagErr,
iApMag,
iApMagErr,
iIxx,
iIxxErr,
iIyy,
iIyyErr,
iIxy,
iIxyErr,
iIsoAreaImage,
iMuMax,
iFluxRadius,
iFlags
)
SELECT 1000000 + r.NUMBER,
r.ALPHA_J2000,
r.DELTA_J2000,
r.MAG_AUTO,
r.MAGERR_AUTO,
r.MAG_BEST,
r.MAGERR_BEST,
r.MAG_APER,
r.MAGERR_APER,
r.A_WORLD,
r.ERRA_WORLD,
r.B_WORLD,
r.ERRB_WORLD,
r.THETA_J2000,
r.ERRTHETA_J2000,
r.ISOAREA_IMAGE,
r.MU_MAX,
r.FLUX_RADIUS,
r.FLAGS,
i.MAG_AUTO,
i.MAGERR_AUTO,
i.MAG_BEST,
i.MAGERR_BEST,
i.MAG_APER,
i.MAGERR_APER,
i.A_WORLD,
i.ERRA_WORLD,
i.B_WORLD,
i.ERRB_WORLD,
i.THETA_J2000,
i.ERRTHETA_J2000,
i.ISOAREA_IMAGE,
i.MU_MAX,
i.FLUX_RADIUS,
i.FLAGS
FROM riAllNoDup a
JOIN r ON (a.rNUMBER = r.NUMBER)
JOIN i ON (a.iNUMBER = i.NUMBER)
-- 328017 rows affected (48.44 sec)
7) take care of unmatched detections
-- 328017 matches
-- 367175 in r (39158 unmatched)
-- 378169 in i (50152 unmatched)
INSERT INTO NoMatchR
SELECT r.NUMBER
FROM r
LEFT JOIN riAllNoDup a ON ( r.NUMBER= a.rNUMBER)
WHERE a.rNUMBER IS NULL;
-- 39158 rows affected (11.42 sec)
INSERT INTO NoMatchI
SELECT i.NUMBER
FROM i
LEFT JOIN riAllNoDup a ON ( i.NUMBER= a.iNUMBER)
WHERE a.iNUMBER IS NULL;
-- 50152 rows affected (11.53 sec)
INSERT INTO Object (objectId,
ra,
decl,
rPetroMag,
rPetroMagErr,
rMag,
rMagErr,
rApMag,
rApMagErr,
rIxx,
rIxxErr,
rIyy,
rIyyErr,
rIxy,
rIxyErr,
rIsoAreaImage,
rMuMax,
rFluxRadius,
rFlags
)
SELECT 1000000 + r.NUMBER,
r.ALPHA_J2000,
r.DELTA_J2000,
r.MAG_AUTO,
r.MAGERR_AUTO,
r.MAG_BEST,
r.MAGERR_BEST,
r.MAG_APER,
r.MAGERR_APER,
r.A_WORLD,
r.ERRA_WORLD,
r.B_WORLD,
r.ERRB_WORLD,
r.THETA_J2000,
r.ERRTHETA_J2000,
r.ISOAREA_IMAGE,
r.MU_MAX,
r.FLUX_RADIUS,
r.FLAGS
FROM r
JOIN NoMatchR n ON (r.NUMBER = n.NUMBER);
-- 39158 rows affected (17.87 sec)
INSERT INTO Object (objectId,
ra,
decl,
iPetroMag,
iPetroMagErr,
iMag,
iMagErr,
iApMag,
iApMagErr,
iIxx,
iIxxErr,
iIyy,
iIyyErr,
iIxy,
iIxyErr,
iIsoAreaImage,
iMuMax,
iFluxRadius,
iFlags
)
SELECT 2000000 + i.NUMBER,
i.ALPHA_J2000,
i.DELTA_J2000,
i.MAG_AUTO,
i.MAGERR_AUTO,
i.MAG_BEST,
i.MAGERR_BEST,
i.MAG_APER,
i.MAGERR_APER,
i.A_WORLD,
i.ERRA_WORLD,
i.B_WORLD,
i.ERRB_WORLD,
i.THETA_J2000,
i.ERRTHETA_J2000,
i.ISOAREA_IMAGE,
i.MU_MAX,
i.FLUX_RADIUS,
i.FLAGS
FROM i
JOIN NoMatchI n ON (i.NUMBER = n.NUMBER);
-- 50152 rows affected (31.31 sec)
SELECT COUNT(*) FROM Object
-- 417327
8) dump the catalog
time mysqldump D4 Object > d4_object_ri.sql -- this took 35 sec
