SQL data fetching

We had a request to provide some data to an external company. They only require data sampling, simple? wrong.

Here are their selection criteria:

  • The total number of records divided by 720 (the required sample size) - this gives the sampling interval (if the result is equal to the fractional part, it is rounded to the next integer).

  • Reduce the sampling time to get the starting point.

  • Return each entry by adding a sampling interval.

Example:

  • 10,000 records - sampling interval = 13 (10,000/720)
  • Starting point = 6 (rounded at 13/2)
  • Return records 6, 19 (6 + 13), 32 (19 + 13), 45 (32 + 13), etc ...

Please someone tell me how (if) something similar is possible in SQL.

+5
source share
4

ROW_NUMBER(), .

SELECT
  *
FROM
(
  SELECT
    ROW_NUMBER() OVER (ORDER BY a, b, c, d) AS record_id,
    *
  FROM
    yourTable
)
  AS data
WHERE
  (record_id + 360) % 720 = 0

ROW_NUMBER() ( , id ). , (ORDER BY a, b, c, d).

id, Modulo ( %), , 720- , 1440- .. ( 720% 720 = 0).

, id 360, .

, 720- , 720 .

, 720 (SELECT COUNT(*) / 720 FROM yourTable)

360 (SELECT (COUNT(*) / 720) / 2 FROM yourTable)

720 . , 1.

WHERE
  (record_id + (SELECT COUNT(*) FROM yourTable) / 1440.0)
  %
  ((SELECT COUNT(*) FROM yourTable) / 720.0)
  <
  1.0
+2
declare @sample_size int, @starting_point int

select @sample_size = 200

select top (@sample_size) col1, col2, col3, col4 
from (
    select *, row_number() over (order by col1, col2) as row
    from your_table
) t
where  (row   % ((select count(*) from your_table) / @sample_size)) - (select count(*) from your_table) / @sample_size / 2) = 0

SQL Server 2005 +.

TOP (@variable) ( where - , ) ROW_NUMBER() .

: http://data.stackexchange.com/stackoverflow/query/62315/sql-data-sampling :

declare @tab table (id int identity(1,1), col1 varchar(3), col2 varchar(3))

declare @i int

set @i = 0

while @i <= 1000
begin
  insert into @tab
  select 'aaa', 'bbb'
  set @i = @i+1
end

declare @sample_size int

select @sample_size = 123

select ((select count(*) from @tab) / @sample_size) as sample_interval

select top (@sample_size) *
from (
    select *, row_number() over (order by col1, col2, id desc) as row
    from @tab
) t
where  (row   % ((select count(*) from @tab) / @sample_size)) - ((select count(*) from @tab) / @sample_size / 2) = 0
+1

SQL- .
SELECT FirstName, LastName . (10 );

0

, . 10000 , 6, 19, 32 .., 769 .

CREATE TABLE Tbl (
    Data varchar (255)
)
GO

DECLARE @i int 
SET @i = 0
WHILE (@i < 10000)
BEGIN
    INSERT INTO Tbl (Data) VALUES (CONVERT(varchar(255), NEWID()))
    SET @i = @i + 1
END
GO

DECLARE @interval int
DECLARE @start int
DECLARE @total int
SELECT  @total = COUNT(*),
        @start = FLOOR(COUNT(*) / 720) / 2,
        @interval = FLOOR(COUNT(*) / 720)
FROM Tbl

PRINT 'Start record: ' + CAST(@start as varchar(10)) 
PRINT 'Interval: ' + CAST(@interval as varchar(10)) 

SELECT rank, Data
FROM (
    SELECT rank() 
    OVER (ORDER BY t.Data) as rank, t.Data AS Data
    FROM Tbl t) q
WHERE   ((rank + 1) + @start) % @interval = 0
-1
source

All Articles