PostgreSQL
 sql >> Datenbank >  >> RDS >> PostgreSQL

Zählen Sie gruppierte Zeitlücken für den Zeitbereich

Nur um doppelte Arbeit zu vermeiden, hier sind die Daten (ich habe die inklusive obere Begrenzungsbucht durch eine exklusive ersetzt, was meiner Meinung nach häufiger vorkommt):

-- CREATE SCHEMA tmp;
DROP TABLE tmp.gaps CASCADE;
CREATE TABLE tmp.gaps
        ( id INTEGER NOT NULL PRIMARY KEY       -- surrogate key
        , ztype CHAR(1) NOT NULL
        , start_datetime TIMESTAMP NOT NULL     -- lower boundary := inclusive
        , end_datetime TIMESTAMP NOT NULL       -- upper boundary := exclusive
        );
CREATE UNIQUE INDEX gaps_forward ON tmp.gaps(ztype,start_datetime);
CREATE UNIQUE INDEX gaps_backward ON tmp.gaps(ztype,end_datetime);

INSERT INTO tmp.gaps(id,ztype,start_datetime,end_datetime) VALUES
 (1,'a', '2012-01-11 00:00:00', '2012-01-15 00:00:00' )
,(2,'a', '2012-01-18 00:00:00', '2012-01-21 00:00:00' )
,(3,'b', '2012-01-14 00:00:00', '2012-01-20 00:00:00' )
,(4,'c', '2012-01-10 00:00:00', '2012-01-16 00:00:00' )
,(5,'d', '2012-01-11 00:00:00', '2012-01-21 00:00:00' )
,(6,'e', '2012-01-11 00:00:00', '2012-01-15 00:00:00' ) -- added this
,(7,'e', '2012-01-15 00:00:00', '2012-01-21 00:00:00' ) -- and this
        ;
-- SELECT * FROM tmp.gaps;

UPDATE:hier kommt der CTE. In der ersten UNION füge ich zwei falsche Intervalle links und rechts vom gewünschten Intervall (12. Januar - 19. Januar) hinzu.

Pro ztype zähle ich die Gesamtzahl der Intervalle. Dies sollte eines sein, wenn es keine Löcher gibt, zwei, wenn es ein Loch gibt, und so weiter. Dadurch werden auch Lücken für Ztypes gefunden, die keine Datensätze im gewünschten Intervall haben.

-- EXPLAIN ANALYZE
WITH RECURSIVE meuk(ztype,start_datetime,end_datetime) AS (
        -- For every possible "ztype" add two dummie records
        -- just before and just after our wanted interval.
        WITH plus2 AS (
                SELECT g0.ztype,g0.start_datetime,g0.end_datetime FROM tmp.gaps g0
                WHERE (g0.start_datetime <= '2012-01-12 00:00:00' AND g0.end_datetime >= '2012-01-12 00:00:00')
                   OR (g0.start_datetime >= '2012-01-12 00:00:00' AND g0.end_datetime <= '2012-01-19 00:00:00')
                   OR (g0.start_datetime <= '2012-01-19 00:00:00' AND g0.end_datetime >= '2012-01-19 00:00:00')
                UNION ALL SELECT DISTINCT g1.ztype, '1900-01-01 00:00:00'::timestamp, '2012-01-12 00:00:00'::timestamp FROM tmp.gaps g1
                UNION ALL SELECT DISTINCT g2.ztype, '2012-01-19 00:00:00'::timestamp, '2100-01-01 00:00:00'::timestamp FROM tmp.gaps g2
                )
        SELECT p0.ztype,p0.start_datetime,p0.end_datetime
        FROM plus2 p0
                -- the start of a stretch: there is no older overlapping 
                -- (or touching) interval
        WHERE NOT EXISTS (SELECT *
                FROM plus2 nx
                WHERE nx.ztype = p0.ztype
                AND nx.start_datetime < p0.start_datetime -- older
                AND nx.end_datetime >= p0.start_datetime  -- touching or overlapping
                )
        UNION
        SELECT mk.ztype
                , LEAST(mk.start_datetime,p1.start_datetime)
                , GREATEST(mk.end_datetime,p1.end_datetime)
        FROM plus2 p1
        , meuk mk
        WHERE p1.ztype = mk.ztype
        AND (p1.start_datetime >= mk.start_datetime AND p1.start_datetime <= mk.end_datetime AND p1.end_datetime > mk.end_datetime)
        )
SELECT ztype, COUNT(*)-1 AS ngap
FROM meuk mk
WHERE NOT EXISTS (SELECT *
        FROM meuk  nx
        WHERE nx.ztype = mk.ztype
        AND (nx.start_datetime,nx.end_datetime) OVERLAPS( mk.start_datetime,mk.end_datetime)
        AND (nx.end_datetime - nx.start_datetime) > (mk.end_datetime - mk.start_datetime)
        )
GROUP BY ztype
ORDER BY ztype
        ;

Das Erstellen der Endsumme bleibt dem Leser als Übung überlassen;-)

ERGEBNISSE:

 ztype | ngap 
-------+------
 a     |    1
 b     |    1
 c     |    1
 d     |    0
 e     |    0
(5 rows)