Merge date ranges in SQL Server

I have the following data:

StartDate   |  EndDate
-------------------------
1982.03.02  |  1982.09.30 
1982.10.01  |  1985.01.17 
1985.06.26  |  1985.07.26 
1985.07.30  |  1991.12.31 
1992.01.01  |  1995.12.31 
1996.01.01  |  2004.05.31 
2004.06.05  |  2006.01.31 
2006.02.01  |  2011.05.20              

I need to combine any adjacent intervals (both the beginning and the end date are included in the intervals, so the interval ending on 2003.05.06 is adjacent to the interval starting on 2003.05.07), so in this case, the result set should be:

StartDate   |  EndDate
-------------------------
1982.03.02  |  1985.01.17 
1985.06.26  |  1985.07.26 
1985.07.30  |  2004.05.31 
2004.06.05  |  2011.05.20              

For me, the obvious way to do this is to iterate over the set with the cursor and build a series of results in rows. However, this functionality will be in code that could potentially be called thousands of times a day on the server under heavy load, so I would prefer not to have performance issues. Any data set is small (20 rows) and the data range is large, so any solution that generates all the dates in the range is not possible.

, ?


( ):

CREATE TABLE Periods (
    StartDate datetime NOT NULL CONSTRAINT PK_Periods PRIMARY KEY CLUSTERED,
    EndDate datetime NOT NULL
)

INSERT INTO Periods(StartDate,EndDate)
SELECT '19820302', '19820930'
UNION ALL SELECT '19821001', '19850117'
UNION ALL SELECT '19850626', '19850726'
UNION ALL SELECT '19850730', '19911231'
UNION ALL SELECT '19920101', '19951231'
UNION ALL SELECT '19960101', '20040531'
UNION ALL SELECT '20040605', '20060131'
UNION ALL SELECT '20060201', '20110520'
+2
7

, . , , CREATE TABLE INSERT/SELECT. , , :

create table Periods (
    StartDate date not null,
    EndDate date not null
)
go
insert into Periods(StartDate,EndDate)
select '19820302','19820930' union all
select '19821001','19850117' union all
select '19850626','19850726' union all
select '19850730','19911231' union all
select '19920101','19951231' union all
select '19960101','20040531' union all
select '20040605','20060131' union all
select '20060201','20110520'
go
; with MergedPeriods as (
    Select p1.StartDate, p1.EndDate
    from
        Periods p1
            left join
        Periods p2
            on
                p1.StartDate = DATEADD(day,1,p2.EndDate)
    where
        p2.StartDate is null
    union all
    select p1.StartDate,p2.EndDate
    from
        MergedPeriods p1
            inner join
        Periods p2
            on
                p1.EndDate = DATEADD(day,-1,p2.StartDate)
)
select StartDate,MAX(EndDate) as EndDate
from MergedPeriods group by StartDate

:

StartDate   EndDate
1982-03-02  1985-01-17
1985-06-26  1985-07-26
1985-07-30  2004-05-31
2004-06-05  2011-05-20
+5

, , ( ). , . , , . .

WITH Grps AS (
   SELECT
      (Row_Number() OVER (ORDER BY P1.StartDate) - 1) / 2 Grp,
      P1.StartDate,
      P1.EndDate
   FROM
      Periods P1
      CROSS JOIN (SELECT -1 UNION ALL SELECT 1) D (Dir)
      LEFT JOIN Periods P2 ON
         DateAdd(Day, D.Dir, P1.StartDate) = P2.EndDate
         OR DateAdd(Day, D.Dir, P1.EndDate) = P2.StartDate
   WHERE
      (Dir = -1 AND P2.EndDate IS NULL)
      OR (Dir = 1 AND P2.StartDate IS NULL)
)
SELECT
   Min(StartDate) StartDate,
   Max(EndDate) EndDate
FROM Grps
GROUP BY Grp;

, , , , , ( "" ) :

StartDate   | EndDate     | EndDate
(Inclusive) | (Inclusive) | (Exclusive)
---------------------------------------
1982.03.02  | 1982.09.30  | 1982.10.01
1982.10.01  | 1985.01.17  | 1985.01.18

( ) - , , , . , 12 24 , , , , !

, :

WITH Grps AS (
   SELECT
      (Row_Number() OVER (ORDER BY P1.StartDate) - 1) / 2 Grp,
      P1.StartDate,
      P1.EndDate
   FROM
      Periods P1
      CROSS JOIN (SELECT 1 UNION ALL SELECT 2) X (Which)
      LEFT JOIN Periods P2 ON
         (X.Which = 1 AND P1.StartDate = P2.EndDate)
         OR (X.Which = 2 AND P1.EndDate = P2.StartDate)
   WHERE
      P2.EndDate IS NULL
      OR P2.StartDate IS NULL
)
SELECT
   Min(StartDate) StartDate,
   Max(EndDate) EndDate
FROM Grps
GROUP BY Grp;

, DateAdd DateDiff, "1 ", , , , 12- .

Update

, , 5 . , 3 , , , , , , .

WITH Groups AS (
   SELECT Grp = Row_Number() OVER (ORDER BY StartDate) / 2, *
   FROM
      #Periods
      (VALUES (0), (0)) X (Dup)
), Ranges AS (
   SELECT StartDate = Max(StartDate), EndDate = Min(EndDate)
   FROM Groups
   GROUP BY Grp
   HAVING Max(StartDate) <> DateAdd(day, 1, Min(EndDate))
), ReGroups AS (
   SELECT
      Grp = Row_Number() OVER (ORDER BY StartDate) / 2,
      StartDate,
      EndDate
   FROM
      Ranges
      CROSS JOIN (VALUES (0), (0)) X (Dup)
)
SELECT
   StartDate = Min(StartDate),
   EndDate = Max(EndDate)
FROM ReGroups
GROUP BY Grp
HAVING Count(*) = 2
;

, windowing ( , ):

WITH LeadLag AS (
   SELECT
      PrevEndDate = Coalesce(Lag(EndDate) OVER (ORDER BY StartDate), '00010101'),
      NextStartDate = Coalesce(Lead(StartDate) OVER (ORDER BY StartDate), '99991231'),
      *
   FROM #Periods
), Dates AS (
   SELECT
      X.*
   FROM
      LeadLag
      CROSS APPLY (
         SELECT
            StartDate = CASE WHEN DateAdd(day, 1, PrevEndDate) <> StartDate THEN StartDate ELSE NULL END,
            EndDate = CASE WHEN DateAdd(day, 1, EndDate) <> NextStartDate THEN EndDate ELSE NULL END
      ) X
   WHERE
      X.StartDate IS NOT NULL
      OR X.EndDate IS NOT NULL
), Final AS (
   SELECT
      StartDate,
      EndDate = Min(EndDate) OVER (ORDER BY EndDate ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING)
   FROM Dates
)
SELECT *
FROM Final
WHERE StartDate IS NOT NULL
;
+6

: , . ​​:

; with heads as
        (
        select  StartDate
        ,       EndDate
        ,       row_number() over (order by StartDate) as rn
        from    @YourTable h
        where   not exists
                (
                select  *
                from    @YourTable next
                where   next.EndDate = dateadd(day, -1, h.StartDate)
                )
        )
select  heads.StartDate
,       (
        select  top 1 EndDate
        from    @YourTable
        where   EndDate < COALESCE(
                (
                select  StartDate
                from    heads h2
                where   heads.rn + 1 = h2.rn
                ), '9999-01-01')
        order by
                EndDate desc
        ) as EndDate
from    heads

ODATA.

+1

... ,

, , .

- , . .

, .

CREATE TABLE Numbers (
   Num int NOT NULL CONSTRAINT PK_Numbers PRIMARY KEY CLUSTERED
)
INSERT Numbers VALUES (0)
WHILE @@RowCount < 65536
   INSERT Numbers SELECT Num FROM Numbers + (SELECT Max(Num) FROM Numbers) + 1

!

WITH Dts AS (
   SELECT
      DateAdd(Day, Num, StartDate) Dt,
      DateAdd(
         Day,
         -DENSE_RANK() OVER (ORDER BY StartDate, Num),
         DateAdd(Day, Num, StartDate)
      ) Grp
   FROM
      Periods P
      INNER JOIN Numbers N ON DateDiff(Day, P.StartDate, P.EndDate) >= N.Num
)
SELECT Min(Dt) StartDate, Max(Dt) EndDate
FROM Dts
GROUP BY Grp
ORDER BY StartDate

SQL 2000, , , , , .

+1

PostgreSQL:

PostgreSQL

T-SQL, , , , GIST ( R-tree) . .

( peufeu ):

CREATE TABLE segments( start INTEGER NOT NULL, stop INTEGER NOT NULL, range_box BOX NOT NULL );
INSERT INTO segments SELECT n,n+1,BOX(POINT(n,-1),POINT(n+1,1)) FROM generate_series( 1, 1000000 ) n;
CREATE INDEX segments_box ON segments USING gist( range_box );
CREATE INDEX segments_start ON segments(start);
CREATE INDEX segments_stop ON segments(stop);

EXPLAIN ANALYZE SELECT * FROM segments WHERE 300000 BETWEEN start AND stop;
 Index Scan using segments_start on segments  (cost=0.00..12959.24 rows=209597 width=72) (actual time=91.990..91.990 rows=2 loops=1)
   Index Cond: (300000 >= start)
   Filter: (300000 <= stop)
 Total runtime: 92.023 ms

EXPLAIN ANALYZE SELECT * FROM segments WHERE range_box && '(300000,0,300000,0)'::BOX;
 Bitmap Heap Scan on segments  (cost=283.49..9740.27 rows=5000 width=72) (actual time=0.036..0.037 rows=2 loops=1)
   Recheck Cond: (range_box && '(300000,0),(300000,0)'::box)
   ->  Bitmap Index Scan on segments_box  (cost=0.00..282.24 rows=5000 width=0) (actual time=0.032..0.032 rows=2 loops=1)
         Index Cond: (range_box && '(300000,0),(300000,0)'::box)
 Total runtime: 0.064 ms

, PostgreSQL, , , , // T-SQL.

0

, - PostGIS, :

-- Create the data:
drop table if exists periods;
create temporary table periods as
select '19820302'::date as StartDate,'19820930'::date as EndDate union all
select '19821001'::date,'19850117'::date union all
select '19850626'::date,'19850726'::date union all
select '19850730'::date,'19911231'::date union all
select '19920101'::date,'19951231'::date union all
select '19960101'::date,'20040531'::date union all
select '20040605'::date,'20060131'::date union all
select '20060201'::date,'20110520'::date;

-- Run with PostGIS
-- Convert all intervals to lines, and then do point intersection.
select 
  '1970-01-01'::date+st_x(st_astext(st_pointn(line,1)))::int4 as start, 
  '1970-01-01'::date+st_x(st_astext(st_pointn(line,st_numpoints(line))))::int4-1 as end 
from 
(select (st_dump(st_linemerge(st_union(the_geom)))).geom as line from 
(select st_makeline(st_makepoint(startdate-'1970-01-01',0),
        st_makepoint(enddate-'1970-01-01'+1,0)) as the_geom from periods)t 
)x;  

-- Result
start       |  end
-------------------------
1982-03-02  |  1985-01-17 
1985-06-26  |  1985-07-26 
1985-07-30  |  2004-05-31 
2004-06-05  |  2011-05-20  
0
alter table MergedPeriods (
   StartDate date not null,
EndDate date not null
)
go
insert into MergedPeriods(StartDate,EndDate)
select '20130210','20130215' union all
select '20130216','20130228' union all
select '20130302','20130312' union all
select '20130317','20130325' union all
select '20130326','20130405' union all
select '20130406','20130411' union all
select '20130502','20130610' 
go
; with MergedPeriods as (
    Select p1.StartDate, p1.EndDate
    from
        [test].[dbo].[Periods] p1
            left join
        [test].[dbo].[Periods] p2
            on
                p1.StartDate = DATEADD(day,1,p2.EndDate)
    where

       p2.StartDate is null
    union all
    select p1.StartDate,p2.EndDate
    from
        MergedPeriods p1
            inner join
        [test].[dbo].[Periods] p2
            on
                p1.EndDate = DATEADD(day,-1,p2.StartDate)



)

select MIN(StartDate),MAX(EndDate) as EndDate
from MergedPeriods group by StartDate
0

All Articles