wiki:dbD4ObjectCatalog
Last modified 11 years ago Last modified on 01/09/2008 01:28:41 PM

Recipe How DC2 Object Catalog Was Built

DC2 Test Data LSST Database

Used hardware: lsst-dev02.slac.stanford.edu

1) prepare input files (used files: D4.2007B.R.cat and D4.2007B.I.cat)

  • remove leading whitespaces
  • convert spaces to ','
  • remove lines with #

2) create schema (schema.sql file)

CREATE DATABASE D4;
USE D4;

CREATE TABLE r (
  NUMBER          DOUBLE NOT NULL,
  X_IMAGE         DOUBLE NOT NULL,
  Y_IMAGE         DOUBLE NOT NULL,
  ALPHA_J2000     DOUBLE NOT NULL,
  DELTA_J2000     DOUBLE NOT NULL,
  MAG_AUTO        DOUBLE NOT NULL,
  MAGERR_AUTO     DOUBLE NOT NULL,
  MAG_BEST        DOUBLE NOT NULL,
  MAGERR_BEST     DOUBLE NOT NULL,
  MAG_APER        DOUBLE NOT NULL,
  MAGERR_APER     DOUBLE NOT NULL,
  A_WORLD         DOUBLE NOT NULL,
  ERRA_WORLD      DOUBLE NOT NULL,
  B_WORLD         DOUBLE NOT NULL,
  ERRB_WORLD      DOUBLE NOT NULL,
  THETA_J2000     DOUBLE NOT NULL,
  ERRTHETA_J2000  DOUBLE NOT NULL,
  ISOAREA_IMAGE   DOUBLE NOT NULL,
  MU_MAX          DOUBLE NOT NULL,
  FLUX_RADIUS     DOUBLE NOT NULL,
  FLAGS           DOUBLE NOT NULL,
  INDEX(NUMBER)
);

CREATE TABLE i LIKE r;

CREATE TABLE riAll (
  rNUMBER        DOUBLE,
  iNUMBER        DOUBLE,
  radius         DOUBLE,
  INDEX(rNUMBER),
  INDEX(iNUMBER)
);

CREATE TABLE ri LIKE riAll;
CREATE TABLE ir LIKE riAll;
CREATE TABLE riAllNoDup LIKE riAll;


CREATE TABLE NoMatchR (
  NUMBER DOUBLE,
  INDEX(NUMBER)
);

CREATE TABLE NoMatchI LIKE NoMatchR;

CREATE TABLE Object (
 -- ... all 300+ columns from the LSST DC2 Database Schema
)


3) Load data into database

   mysql < schema.sql
   mysql D4 -e "load data infile '/u2/D4/D4.2007B.R.cat' into TABLE r FIELDS TERMINATED BY ',' "
   mysql D4 -e "load data infile '/u2/D4/D4.2007B.I.cat' into TABLE i FIELDS TERMINATED BY ',' "

4) add zones

alter table r add column zone INT; alter table r add index(zone);
alter table i add column zone INT; alter table i add index(zone);

update r set zone = round(DELTA_J2000*1000);
update i set zone = round(DELTA_J2000*1000);

5) find neighbors for r-i

INSERT INTO riAll
  SELECT  r.NUMBER AS rNUMBER,
          i.NUMBER AS iNUMBER,
          POWER(r.ALPHA_J2000-i.ALPHA_J2000, 2)+
          POWER(r.DELTA_J2000-i.DELTA_J2000, 2) AS radius
  FROM    r, i
  WHERE   (r.zone = i.zone-1 OR
           r.zone = i.zone   OR
           r.zone = i.zone+1
          )
     AND  POWER(r.ALPHA_J2000-i.ALPHA_J2000, 2)+
          POWER(r.DELTA_J2000-i.DELTA_J2000, 2) < POWER(3.0/3600,2);
-- this took 1 hour 4 min 41.02 sec




-- select i closest to every r
INSERT INTO ri
SELECT a.rNUMBER, a.iNUMBER, a.radius
FROM   riAll a,
       (SELECT rNUMBER, MIN(radius) AS radius FROM riAll GROUP BY rNUMBER) AS x
WHERE  a.rNUMBER = x.rNUMBER
  AND  a.radius  = x.radius;
-- 345508 rows affected (20.31 sec)

-- another simpler but somewhat slower way of expressing the same thing:
-- INSERT INTO ri
-- SELECT *
-- FROM   riAll a
-- WHERE  a.radius = (SELECT MIN(radius) FROM riAll b WHERE b.rNUMBER = a.rNUMBER);

-- select r closest to every i
INSERT INTO ir
SELECT a.rNUMBER, a.iNUMBER, a.radius
FROM   riAll a,
       (SELECT iNUMBER, MIN(radius) as radius FROM riAll GROUP BY iNUMBER) AS x
WHERE  a.iNUMBER = x.iNUMBER
  AND  a.radius  = x.radius;
-- 349281 rows affected (20.35 sec)


-- select pairs that are closest matches to each other
INSERT INTO riAllNoDup
SELECT a.rNUMBER, a.iNUMBER, a.radius
FROM   ri a, ir b
WHERE  a.rNUMBER = b.rNUMBER
  AND  a.iNUMBER = b.iNUMBER;
-- 328017 rows affected (14.64 sec)

-- check to make sure there are no duplicates
SELECT COUNT(*), COUNT(DISTINCT rNUMBER), COUNT(DISTINCT iNUMBER) FROM riAllNoDup;
-- Returns 328017, 328017, 328017

-- get a histogram of the match distances
SELECT ROUND(SQRT(radius)*36000)/10 AS arcsec, COUNT(*) FROM riAllNoDup GROUP BY arcsec;
+--------+----------+                                                         
| arcsec | COUNT(*) |                                                         
+--------+----------+                                                         
| 0.0000 |   132022 | 0-50 milliarcsec                                        
| 0.1000 |   145138 | 50-150 milliarcsec                                      
| 0.2000 |    32054 | etc.                                                    
| 0.3000 |     8831 |
| 0.4000 |     3511 |
| 0.5000 |     1714 |
| 0.6000 |      839 |
| 0.7000 |      476 |
| 0.8000 |      309 |
| 0.9000 |      241 |
| 1.0000 |      200 |
| 1.1000 |      165 |
| 1.2000 |      153 |
| 1.3000 |      141 |
| 1.4000 |      136 |
| 1.5000 |      122 |
... etc ...
| 2.9000 |      102 |
| 3.0000 |       57 | 2.95-3.00 arcsec
+--------+----------+

6) create final Object table

INSERT INTO Object (objectId,
                    ra,
                    decl,

                    rPetroMag,
                    rPetroMagErr,
                    rMag,
                    rMagErr,
                    rApMag,
                    rApMagErr,
                    rIxx,
                    rIxxErr,
                    rIyy,
                    rIyyErr,
                    rIxy,
                    rIxyErr,
                    rIsoAreaImage,
                    rMuMax,
                    rFluxRadius,
                    rFlags,

                    iPetroMag,
                    iPetroMagErr,
                    iMag,
                    iMagErr,
                    iApMag,
                    iApMagErr,
                    iIxx,
                    iIxxErr,
                    iIyy,
                    iIyyErr,
                    iIxy,
                    iIxyErr,
                    iIsoAreaImage,
                    iMuMax,
                    iFluxRadius,
                    iFlags
                    )

SELECT 1000000 + r.NUMBER,
       r.ALPHA_J2000,
       r.DELTA_J2000,

       r.MAG_AUTO,
       r.MAGERR_AUTO,
       r.MAG_BEST,
       r.MAGERR_BEST,
       r.MAG_APER,
       r.MAGERR_APER,
       r.A_WORLD,
       r.ERRA_WORLD,
       r.B_WORLD,
       r.ERRB_WORLD,
       r.THETA_J2000,
       r.ERRTHETA_J2000,
       r.ISOAREA_IMAGE,
       r.MU_MAX,
       r.FLUX_RADIUS,
       r.FLAGS,

       i.MAG_AUTO,
       i.MAGERR_AUTO,
       i.MAG_BEST,
       i.MAGERR_BEST,
       i.MAG_APER,
       i.MAGERR_APER,
       i.A_WORLD,
       i.ERRA_WORLD,
       i.B_WORLD,
       i.ERRB_WORLD,
       i.THETA_J2000,
       i.ERRTHETA_J2000,
       i.ISOAREA_IMAGE,
       i.MU_MAX,
       i.FLUX_RADIUS,
       i.FLAGS
FROM riAllNoDup a
JOIN r ON (a.rNUMBER = r.NUMBER)
JOIN i ON (a.iNUMBER = i.NUMBER)
-- 328017 rows affected (48.44 sec)

7) take care of unmatched detections

 -- 328017 matches
 -- 367175 in r (39158 unmatched)
 -- 378169 in i (50152 unmatched)

INSERT INTO NoMatchR
   SELECT    r.NUMBER
   FROM      r
   LEFT JOIN riAllNoDup a ON ( r.NUMBER= a.rNUMBER)
   WHERE     a.rNUMBER IS NULL;
-- 39158 rows affected (11.42 sec)

INSERT INTO NoMatchI
   SELECT    i.NUMBER
   FROM      i
   LEFT JOIN riAllNoDup a ON ( i.NUMBER= a.iNUMBER)
   WHERE     a.iNUMBER IS NULL;
-- 50152 rows affected (11.53 sec)

INSERT INTO Object (objectId,
                    ra,
                    decl,

                    rPetroMag,
                    rPetroMagErr,
                    rMag,
                    rMagErr,
                    rApMag,
                    rApMagErr,
                    rIxx,
                    rIxxErr,
                    rIyy,
                    rIyyErr,
                    rIxy,
                    rIxyErr,
                    rIsoAreaImage,
                    rMuMax,
                    rFluxRadius,
                    rFlags
                    )
SELECT 1000000 + r.NUMBER,
       r.ALPHA_J2000,
       r.DELTA_J2000,

       r.MAG_AUTO,
       r.MAGERR_AUTO,
       r.MAG_BEST,
       r.MAGERR_BEST,
       r.MAG_APER,
       r.MAGERR_APER,
       r.A_WORLD,
       r.ERRA_WORLD,
       r.B_WORLD,
       r.ERRB_WORLD,
       r.THETA_J2000,
       r.ERRTHETA_J2000,
       r.ISOAREA_IMAGE,
       r.MU_MAX,
       r.FLUX_RADIUS,
       r.FLAGS
FROM   r
JOIN   NoMatchR n ON (r.NUMBER = n.NUMBER);
-- 39158 rows affected (17.87 sec)

INSERT INTO Object (objectId,
                    ra,
                    decl,

                    iPetroMag,
                    iPetroMagErr,
                    iMag,
                    iMagErr,
                    iApMag,
                    iApMagErr,
                    iIxx,
                    iIxxErr,
                    iIyy,
                    iIyyErr,
                    iIxy,
                    iIxyErr,
                    iIsoAreaImage,
                    iMuMax,
                    iFluxRadius,
                    iFlags
                    )
SELECT 2000000 + i.NUMBER,
       i.ALPHA_J2000,
       i.DELTA_J2000,

       i.MAG_AUTO,
       i.MAGERR_AUTO,
       i.MAG_BEST,
       i.MAGERR_BEST,
       i.MAG_APER,
       i.MAGERR_APER,
       i.A_WORLD,
       i.ERRA_WORLD,
       i.B_WORLD,
       i.ERRB_WORLD,
       i.THETA_J2000,
       i.ERRTHETA_J2000,
       i.ISOAREA_IMAGE,
       i.MU_MAX,
       i.FLUX_RADIUS,
       i.FLAGS
FROM   i
JOIN   NoMatchI n ON (i.NUMBER = n.NUMBER);
-- 50152 rows affected (31.31 sec)

SELECT COUNT(*) FROM Object
-- 417327

8) dump the catalog

  time mysqldump D4 Object > d4_object_ri.sql
  -- this took 35 sec