I have a temp table that is being populated and below is a representation of the data.Basically, for each mile_code and rep combination there will be ‘X’ number of records.(In the example below there are 30 records.)What I need to do is eliminate the top and bottom 10% (This could be any percentage and I want to be able to change that easily if I have to.) of DeltaT for each mile_code and rep combination and then average the remaining values in DeltaT for each mile_code and rep combination.
I’ve tried a bunch of different approaches and none give me what I’m looking for.Any ideas or solutions?
mile_code DeltaT rep close_date
Approach 1 Coleman 6/5/2007 16:52
Approach 1 COLEMAN 6/5/2007 14:32
Approach 1 COLEMAN 6/5/2007 9:26
Approach 21 COLEMAN 5/31/2007 14:07
Approach 14 COLEMAN 5/31/2007 9:53
Approach 7 Coleman 5/30/2007 16:12
Approach 36 COLEMAN 5/29/2007 15:53
Approach 8 COLEMAN 5/29/2007 13:54
Approach 8 COLEMAN 5/29/2007 13:23
Approach 8 COLEMAN 5/29/2007 9:00
… (20 more records)
Interview 1 Coleman 6/5/2007 16:52
Interview 1 COLEMAN 6/5/2007 14:32
Interview 1 COLEMAN 6/5/2007 9:26
Interview 8 COLEMAN 5/29/2007 13:54
Interview 8 COLEMAN 5/29/2007 9:00
Interview 14 COLEMAN 5/25/2007 8:37
Interview 61 COLEMAN 5/24/2007 10:57
Interview 14 COLEMAN 5/23/2007 9:32
Interview 26 COLEMAN 5/18/2007 9:39
Interview 23 Coleman 5/14/2007 13:25
… (20 more records)
Followed by 30 each of the other 3 milecodes for Coleman
Approach 0 DOLAN 6/6/2007 9:27
Approach 1 DOLAN 6/5/2007 13:39
Approach 1 DOLAN 6/5/2007 12:20
Approach 1 DOLAN 6/5/2007 11:37
Approach 92 DOLAN 6/5/2007 11:12
Approach 91 DOLAN 6/5/2007 9:44
Approach 2 Dolan 6/4/2007 11:53
Approach 2 DOLAN 6/4/2007 11:11
Approach 5 Dolan 6/1/2007 16:07
Approach 177 DOLAN 6/1/2007 10:58
… (20 more records)
Interview 96 DOLAN 6/5/2007 16:38
Interview 167 DOLAN 6/5/2007 14:24
Interview 2 Dolan 6/4/2007 14:54
Interview 2 DOLAN 6/4/2007 11:11
Interview 5 Dolan 6/1/2007 16:07
Interview 27 DOLAN 6/1/2007 14:48
Interview 323 DOLAN 6/1/2007 13:49
Interview 15 Dolan 6/1/2007 11:48
Interview 224 DOLAN 5/31/2007 16:40
Interview 126 Dolan 5/30/2007 15:02
… (20 more records)
…Here is one way.
Code Snippet
select *
from tb t1
where t1.deltat in (select top 90 percent t2.deltat from tb t2 where t2.mile_code=t1.mile_code order by t2.deltat asc)
and t1.deltat in (select top 90 percent t3.deltat from tb t3 where t3.mile_code=t1.mile_code order by t3.deltat desc)
|||What should be the criteria to eliminate the top and bottom 10 % of [DeltaT], other than [mile_code] and [rep]?
Should we sort by [mile_code], [rep] and [DeltaT] and then eliminate the top and bottom 10%?
AMB
|||I thought of this on the way to work this morning and realized the way I described what I wanted was incorrect. If the data is sorted by DeltaT for each mile_code and rep combination then in this example (30 records for each group and 10% gone from top to bottom) the first 3 records and last 3 records should be removed. So it doesn't matter what the values of DeltaT are. If there were fractions caused by the number of records chosen and/or the percentage then I'd want it to just round to the nearest whole number.|||Try:
Code Snippet
createtable #t (
mile_code varchar(25)notnull,
rep varchar(25)notnull,
deltat intnotnull
)
go
declare @.i int, @.j int
declare @.mile_code varchar(25)
declare @.rep varchar(25)
set @.j = 1
while @.j <= 2
begin
set @.rep =
case
when @.j = 1 then'COLEMAN'
when @.j = 2 then'DOLAN '
end
set @.i = 1
while @.i <= 60
begin
set @.mile_code =
case
when @.i between 1 and 30 then'Approach'
when @.i between 31 and 60 then'Interview'
end
insertinto #t values(@.mile_code, @.rep,(abs(checksum(newid()))% 100)+ 1)
set @.i = @.i + 1
end
set @.j = @.j + 1
end
go
declare @.percent int
set @.percent = 10
;with cte
as
(
select
mile_code,
rep,
deltat,
((row_number()over(partitionby mile_code, rep orderby deltat))* 100.00)/(count(*)over(partitionby mile_code, rep))as pct
from
#t
)
select
mile_code,
rep,
avg(deltat)as avg_deltat
from
cte
where
pct between @.percent and (100.00 - @.percent)
groupby
mile_code,
rep
orderby
mile_code,
rep
go
droptable #t
go
AMB
|||I am not perfectly clear about what you are trying to do, but here is a solution that removes the top 10 percent using a CTE and a couple of aggregates. Note this will only work in 2005...
;with rowCounts as (
select row_number() over (partition by mile_code,rep order by closeDate) as rowNum, --gives a row number per group, ordered by close date
count(*) over (partition by mile_code,rep) as totalRows, --gets total rows by mile_code and rep. This is how I do the work later
mile_code,rep, deltaT, closeDate
from #t) --this is the same as his #t, plus the close date. I will repeat it after this explanation
select mile_code,rep, deltaT, closeDate, rowNum, totalRows
from rowCounts
where rowNum > totalRows * (@.percent / 100.0) --removes the bottom 10 percent
and rowNum < totalRows - (totalRows * (@.percent / 100.0)) -- removes the top
The whole code (thanks to hunchback):
set nocount on
create table #t (
mile_code varchar(25) not null,
rep varchar(25) not null,
deltat int not null,
closeDate datetime
)
go
declare @.i int, @.j int
declare @.mile_code varchar(25)
declare @.rep varchar(25)
declare @.closeDate datetime
set @.j = 1
while @.j <= 2
begin
set @.rep =
case
when @.j = 1 then 'COLEMAN'
when @.j = 2 then 'DOLAN '
end
set @.i = 1
while @.i <= 60
begin
set @.mile_code =
case
when @.i between 1 and 22 then 'Approach'
when @.i between 23 and 60 then 'Interview'
end
insert into #t values(@.mile_code, @.rep, (abs(checksum(newid())) % 100) + 1,
dateadd(day,(abs(checksum(newid())) % 100) + 1,'20070101'))
set @.i = @.i + 1
end
set @.j = @.j + 1
end
go
declare @.percent int
set @.percent = 10
;with rowCounts as (
select row_number() over (partition by mile_code,rep order by closeDate) as rowNum,
count(*) over (partition by mile_code,rep) as totalRows,
mile_code,rep, deltaT, closeDate
from #t)
select mile_code,rep, deltaT, closeDate, rowNum, totalRows
from rowCounts
where rowNum > totalRows * (@.percent / 100.0)
and rowNum < totalRows - (totalRows * (@.percent / 100.0))
drop table #t
go
|||here's another alternative,again, thanks to hunchback for the test data
create table #t (
mile_code varchar(25) not null,
rep varchar(25) not null,
deltat int not null
)
go
declare @.i int, @.j int
declare @.mile_code varchar(25)
declare @.rep varchar(25)
set @.j = 1
while @.j <= 2
begin
set @.rep =
case
when @.j = 1 then 'COLEMAN'
when @.j = 2 then 'DOLAN '
end
set @.i = 1
while @.i <= 60
begin
set @.mile_code =
case
when @.i between 1 and 30 then 'Approach'
when @.i between 31 and 60 then 'Interview'
end
insert into #t values(@.mile_code, @.rep, (abs(checksum(newid())) % 100) + 1)
set @.i = @.i + 1
end
set @.j = @.j + 1
end
go
SELECT IDENTITY(int,1,1) AS row_number
, *
INTO #a
FROM #t
ORDER BY
mile_code
, rep
, deltat
DECLARE @.percnt int
SET @.percnt = 10
SELECT a.mile_code
, a.rep
, a.deltat
, row_number - minrow AS grp_row_number
, maxrow * (@.percnt/100.0) as countpergrp
FROM #a a INNER JOIN
(
SELECT mile_code
, rep
, MIN(row_number) - 1 AS minrow
, COUNT(*) AS maxrow
FROM #a
GROUP BY
mile_code
, rep
) b on a.mile_code = b.mile_code
and a.rep = b.rep
WHERE row_number - minrow between (maxrow * (@.percnt/100.0)) and (maxrow - (maxrow * (@.percnt/100.0)))
DROP TABLE #t
DROP TABLE #a
GO|||
Thanks everyone. Still have a little more work to do to get the final result before putting it into producton (Have to add some other calculations but will do that by joining in another temp table or something.) but all of your input helped a lot. Here is what I ended up with thanks to your help:
DECLARE @.percent int,
@.recentindex int
SET @.percent = 10
SET @.recentindex = 45
Then crate temp table #miledata to get the last 'x' number of records for each rep in each mile_code that they have completed. Do this based off of the @.recentindex variable.
Then use either of the following to get results. Haven't decided which I'll put into production yet.
OPTION A
/*This creates a common table expression to run our aggregate query against.*/
;WITH cte_AveRecords AS
(
SELECT row_number() OVER(partition BY mile_code, rep ORDER BY deltaT) AS rowNum, --gives a row number per group, ordered by deltaT
COUNT(*) OVER (partition BY mile_code, rep) AS totalRows, --gets total rows by mile_code and rep.
mile_code,
rep,
deltaT,
recentindex
FROM#miledata
)
/*This deterimines the average per mile_code and rep by selecting the rows that fall above the low percent and less than or equal to the upper percent.*/
SELECT c.mile_code,
c.rep,
CAST(AVG(CAST(a.deltaT AS Decimal(5,1))) AS Decimal(5,1)) AS AvgTime
FROM
(
SELECT mile_code, rep, deltaT, recentindex, rowNum, totalRows
FROM cte_AveRecords
WHERE rowNum > totalRows * (@.percent / 100.0) --removes the bottom 'x' percent
AND rowNum < totalRows - (totalRows * (@.percent / 100.0)) -- removes the top
)a
JOIN cte_AveRecords c ON a.rep = c.rep AND a.mile_code = c.mile_code
GROUP BY c.mile_code,
c.rep
ORDER BY c.rep,
c.mile_code
OPTION B
/*This creates a common table expression to run our aggregate query against.*/
;WITH cte_AveRecords AS
(
SELECT
mile_code,
rep,
deltat,
/*This assigns a percentage to each row number in the mile_code and rep group.See explaination A below*/
((row_number() over(partition by mile_code, rep order by deltat)) * 100.00) / (count(*) over(partition by mile_code, rep)) as pct
FROM #miledata
)
/*This deterimines the average per mile_code and rep based on what we populated the CTE with..*/
SELECT mile_code,
rep,
CAST(AVG(CAST(deltat AS Decimal(5,1))) AS Decimal(5,1)) AS avg_deltat
FROM cte_AveRecords
WHERE pct > @.percent and pct < (100.00 - @.percent)
GROUP BY mile_code,
rep
ORDER BY rep,
mile_code
GO
No comments:
Post a Comment