drop table kmeans_price_paid_cluster_final;
create table kmeans_price_paid_cluster_final
(postcode_out char(04) not null /* PK */
,avg_sale_price integer not null
,sale_count integer not null
,cluster_rank smallint not null
,cluster_desc char(07) not null)
distributed by (postcode_out);
insert into kmeans_price_paid_cluster_final
(postcode_out
,avg_sale_price
,sale_count
,cluster_rank
,cluster_desc)
select t2.postcode_out
,t2.avg_sale_price
,t2.sale_count
,t1.cluster_rank
,t1.cluster_desc
from kmeans_price_paid_cluster_summary t1
inner join kmeans_price_paid_cluster t2
on t1.cluster_id = t2.column_id;
2,242 rows inserted
drop table kmeans_price_paid_cluster_final;
create table kmeans_price_paid_cluster_final
(postcode_out char(04) not null /* PK */
,avg_sale_price integer not null
,sale_count integer not null
,cluster_rank smallint not null
,cluster_desc char(07) not null)
distributed by (postcode_out);
insert into kmeans_price_paid_cluster_final
(postcode_out
,avg_sale_price
,sale_count
,cluster_rank
,cluster_desc)
select t2.postcode_out
,t2.avg_sale_price
,t2.sale_count
,t1.cluster_rank
,t1.cluster_desc
from kmeans_price_paid_cluster_summary t1
inner join kmeans_price_paid_cluster t2
on t1.cluster_id = t2.column_id;
2,242 rows inserted
This Data Science (DS) demonstration makes use of MADlib, an open-source technology for Machine Learning (ML) in SQL.
MADlib allows ML algorithms to be executed within the database using SQL – no Python needed!
Our example database is hosted using Greenplum, the Massively Parallel Processing (MPP) data platform based on PostgreSQL.

drop table kmeans_price_paid_cluster_final;
create table kmeans_price_paid_cluster_final
(postcode_out char(04) not null /* PK */
,avg_sale_price integer not null
,sale_count integer not null
,cluster_rank smallint not null
,cluster_desc char(07) not null)
distributed by (postcode_out);
insert into kmeans_price_paid_cluster_final
(postcode_out
,avg_sale_price
,sale_count
,cluster_rank
,cluster_desc)
select t2.postcode_out
,t2.avg_sale_price
,t2.sale_count
,t1.cluster_rank
,t1.cluster_desc
from kmeans_price_paid_cluster_summary t1
inner join kmeans_price_paid_cluster t2
on t1.cluster_id = t2.column_id;
2,242 rows inserted
drop table kmeans_price_paid_cluster_final;
create table kmeans_price_paid_cluster_final
(postcode_out char(04) not null /* PK */
,avg_sale_price integer not null
,sale_count integer not null
,cluster_rank smallint not null
,cluster_desc char(07) not null)
distributed by (postcode_out);
insert into kmeans_price_paid_cluster_final
(postcode_out
,avg_sale_price
,sale_count
,cluster_rank
,cluster_desc)
select t2.postcode_out
,t2.avg_sale_price
,t2.sale_count
,t1.cluster_rank
,t1.cluster_desc
from kmeans_price_paid_cluster_summary t1
inner join kmeans_price_paid_cluster t2
on t1.cluster_id = t2.column_id;
2,242 rows inserted
drop table kmeans_price_paid_cluster_final;
create table kmeans_price_paid_cluster_final
(postcode_out char(04) not null /* PK */
,avg_sale_price integer not null
,sale_count integer not null
,cluster_rank smallint not null
,cluster_desc char(07) not null)
distributed by (postcode_out);
insert into kmeans_price_paid_cluster_final
(postcode_out
,avg_sale_price
,sale_count
,cluster_rank
,cluster_desc)
select t2.postcode_out
,t2.avg_sale_price
,t2.sale_count
,t1.cluster_rank
,t1.cluster_desc
from kmeans_price_paid_cluster_summary t1
inner join kmeans_price_paid_cluster t2
on t1.cluster_id = t2.column_id;
2,242 rows inserted
drop table kmeans_price_paid_cluster_final;
create table kmeans_price_paid_cluster_final
(postcode_out char(04) not null /* PK */
,avg_sale_price integer not null
,sale_count integer not null
,cluster_rank smallint not null
,cluster_desc char(07) not null)
distributed by (postcode_out);
insert into kmeans_price_paid_cluster_final
(postcode_out
,avg_sale_price
,sale_count
,cluster_rank
,cluster_desc)
select t2.postcode_out
,t2.avg_sale_price
,t2.sale_count
,t1.cluster_rank
,t1.cluster_desc
from kmeans_price_paid_cluster_summary t1
inner join kmeans_price_paid_cluster t2
on t1.cluster_id = t2.column_id;
2,242 rows inserted

drop table kmeans_price_paid_cluster_final;
create table kmeans_price_paid_cluster_final
(postcode_out char(04) not null /* PK */
,avg_sale_price integer not null
,sale_count integer not null
,cluster_rank smallint not null
,cluster_desc char(07) not null)
distributed by (postcode_out);
insert into kmeans_price_paid_cluster_final
(postcode_out
,avg_sale_price
,sale_count
,cluster_rank
,cluster_desc)
select t2.postcode_out
,t2.avg_sale_price
,t2.sale_count
,t1.cluster_rank
,t1.cluster_desc
from kmeans_price_paid_cluster_summary t1
inner join kmeans_price_paid_cluster t2
on t1.cluster_id = t2.column_id;
2,242 rows inserted

drop table kmeans_price_paid_cluster_final;
create table kmeans_price_paid_cluster_final
(postcode_out char(04) not null /* PK */
,avg_sale_price integer not null
,sale_count integer not null
,cluster_rank smallint not null
,cluster_desc char(07) not null)
distributed by (postcode_out);
insert into kmeans_price_paid_cluster_final
(postcode_out
,avg_sale_price
,sale_count
,cluster_rank
,cluster_desc)
select t2.postcode_out
,t2.avg_sale_price
,t2.sale_count
,t1.cluster_rank
,t1.cluster_desc
from kmeans_price_paid_cluster_summary t1
inner join kmeans_price_paid_cluster t2
on t1.cluster_id = t2.column_id;
2,242 rows inserted
drop table kmeans_price_paid_cluster_final;
create table kmeans_price_paid_cluster_final
(postcode_out char(04) not null /* PK */
,avg_sale_price integer not null
,sale_count integer not null
,cluster_rank smallint not null
,cluster_desc char(07) not null)
distributed by (postcode_out);
insert into kmeans_price_paid_cluster_final
(postcode_out
,avg_sale_price
,sale_count
,cluster_rank
,cluster_desc)
select t2.postcode_out
,t2.avg_sale_price
,t2.sale_count
,t1.cluster_rank
,t1.cluster_desc
from kmeans_price_paid_cluster_summary t1
inner join kmeans_price_paid_cluster t2
on t1.cluster_id = t2.column_id;
2,242 rows inserted

drop table kmeans_price_paid_cluster_final;
create table kmeans_price_paid_cluster_final
(postcode_out char(04) not null /* PK */
,avg_sale_price integer not null
,sale_count integer not null
,cluster_rank smallint not null
,cluster_desc char(07) not null)
distributed by (postcode_out);
insert into kmeans_price_paid_cluster_final
(postcode_out
,avg_sale_price
,sale_count
,cluster_rank
,cluster_desc)
select t2.postcode_out
,t2.avg_sale_price
,t2.sale_count
,t1.cluster_rank
,t1.cluster_desc
from kmeans_price_paid_cluster_summary t1
inner join kmeans_price_paid_cluster t2
on t1.cluster_id = t2.column_id;
2,242 rows inserted
drop table kmeans_price_paid_cluster_final;
create table kmeans_price_paid_cluster_final
(postcode_out char(04) not null /* PK */
,avg_sale_price integer not null
,sale_count integer not null
,cluster_rank smallint not null
,cluster_desc char(07) not null)
distributed by (postcode_out);
insert into kmeans_price_paid_cluster_final
(postcode_out
,avg_sale_price
,sale_count
,cluster_rank
,cluster_desc)
select t2.postcode_out
,t2.avg_sale_price
,t2.sale_count
,t1.cluster_rank
,t1.cluster_desc
from kmeans_price_paid_cluster_summary t1
inner join kmeans_price_paid_cluster t2
on t1.cluster_id = t2.column_id;
2,242 rows inserted

drop table kmeans_price_paid_cluster_final;
create table kmeans_price_paid_cluster_final
(postcode_out char(04) not null /* PK */
,avg_sale_price integer not null
,sale_count integer not null
,cluster_rank smallint not null
,cluster_desc char(07) not null)
distributed by (postcode_out);
insert into kmeans_price_paid_cluster_final
(postcode_out
,avg_sale_price
,sale_count
,cluster_rank
,cluster_desc)
select t2.postcode_out
,t2.avg_sale_price
,t2.sale_count
,t1.cluster_rank
,t1.cluster_desc
from kmeans_price_paid_cluster_summary t1
inner join kmeans_price_paid_cluster t2
on t1.cluster_id = t2.column_id;
2,242 rows inserted
drop table kmeans_price_paid_cluster_final;
create table kmeans_price_paid_cluster_final
(postcode_out char(04) not null /* PK */
,avg_sale_price integer not null
,sale_count integer not null
,cluster_rank smallint not null
,cluster_desc char(07) not null)
distributed by (postcode_out);
insert into kmeans_price_paid_cluster_final
(postcode_out
,avg_sale_price
,sale_count
,cluster_rank
,cluster_desc)
select t2.postcode_out
,t2.avg_sale_price
,t2.sale_count
,t1.cluster_rank
,t1.cluster_desc
from kmeans_price_paid_cluster_summary t1
inner join kmeans_price_paid_cluster t2
on t1.cluster_id = t2.column_id;
2,242 rows inserted
drop table kmeans_price_paid_cluster_final;
create table kmeans_price_paid_cluster_final
(postcode_out char(04) not null /* PK */
,avg_sale_price integer not null
,sale_count integer not null
,cluster_rank smallint not null
,cluster_desc char(07) not null)
distributed by (postcode_out);
insert into kmeans_price_paid_cluster_final
(postcode_out
,avg_sale_price
,sale_count
,cluster_rank
,cluster_desc)
select t2.postcode_out
,t2.avg_sale_price
,t2.sale_count
,t1.cluster_rank
,t1.cluster_desc
from kmeans_price_paid_cluster_summary t1
inner join kmeans_price_paid_cluster t2
on t1.cluster_id = t2.column_id;
2,242 rows inserted

The HM Land Registry ‘Price Paid Data’ (PPD) contains information on all property sold in England and Wales, ranging all the way from the start of 1995 up until the end of the previous month (June 2022 at the time of writing).
Fields relevant to our worked example include ‘sale price’ and ‘postcode’, which are self-explanatory. These are the chosen data points of interest for the clustering model.
drop table kmeans_price_paid_cluster_final;
create table kmeans_price_paid_cluster_final
(postcode_out char(04) not null /* PK */
,avg_sale_price integer not null
,sale_count integer not null
,cluster_rank smallint not null
,cluster_desc char(07) not null)
distributed by (postcode_out);
insert into kmeans_price_paid_cluster_final
(postcode_out
,avg_sale_price
,sale_count
,cluster_rank
,cluster_desc)
select t2.postcode_out
,t2.avg_sale_price
,t2.sale_count
,t1.cluster_rank
,t1.cluster_desc
from kmeans_price_paid_cluster_summary t1
inner join kmeans_price_paid_cluster t2
on t1.cluster_id = t2.column_id;
2,242 rows inserted
drop table kmeans_price_paid_cluster_final;
create table kmeans_price_paid_cluster_final
(postcode_out char(04) not null /* PK */
,avg_sale_price integer not null
,sale_count integer not null
,cluster_rank smallint not null
,cluster_desc char(07) not null)
distributed by (postcode_out);
insert into kmeans_price_paid_cluster_final
(postcode_out
,avg_sale_price
,sale_count
,cluster_rank
,cluster_desc)
select t2.postcode_out
,t2.avg_sale_price
,t2.sale_count
,t1.cluster_rank
,t1.cluster_desc
from kmeans_price_paid_cluster_summary t1
inner join kmeans_price_paid_cluster t2
on t1.cluster_id = t2.column_id;
2,242 rows inserted
drop table kmeans_price_paid_cluster_final;
create table kmeans_price_paid_cluster_final
(postcode_out char(04) not null /* PK */
,avg_sale_price integer not null
,sale_count integer not null
,cluster_rank smallint not null
,cluster_desc char(07) not null)
distributed by (postcode_out);
insert into kmeans_price_paid_cluster_final
(postcode_out
,avg_sale_price
,sale_count
,cluster_rank
,cluster_desc)
select t2.postcode_out
,t2.avg_sale_price
,t2.sale_count
,t1.cluster_rank
,t1.cluster_desc
from kmeans_price_paid_cluster_summary t1
inner join kmeans_price_paid_cluster t2
on t1.cluster_id = t2.column_id;
2,242 rows inserted
drop table kmeans_price_paid_cluster_final;
create table kmeans_price_paid_cluster_final
(postcode_out char(04) not null /* PK */
,avg_sale_price integer not null
,sale_count integer not null
,cluster_rank smallint not null
,cluster_desc char(07) not null)
distributed by (postcode_out);
insert into kmeans_price_paid_cluster_final
(postcode_out
,avg_sale_price
,sale_count
,cluster_rank
,cluster_desc)
select t2.postcode_out
,t2.avg_sale_price
,t2.sale_count
,t1.cluster_rank
,t1.cluster_desc
from kmeans_price_paid_cluster_summary t1
inner join kmeans_price_paid_cluster t2
on t1.cluster_id = t2.column_id;
2,242 rows inserted
drop table kmeans_price_paid_cluster_final;
create table kmeans_price_paid_cluster_final
(postcode_out char(04) not null /* PK */
,avg_sale_price integer not null
,sale_count integer not null
,cluster_rank smallint not null
,cluster_desc char(07) not null)
distributed by (postcode_out);
insert into kmeans_price_paid_cluster_final
(postcode_out
,avg_sale_price
,sale_count
,cluster_rank
,cluster_desc)
select t2.postcode_out
,t2.avg_sale_price
,t2.sale_count
,t1.cluster_rank
,t1.cluster_desc
from kmeans_price_paid_cluster_summary t1
inner join kmeans_price_paid_cluster t2
on t1.cluster_id = t2.column_id;
2,242 rows inserted
drop table kmeans_price_paid_cluster_final;
create table kmeans_price_paid_cluster_final
(postcode_out char(04) not null /* PK */
,avg_sale_price integer not null
,sale_count integer not null
,cluster_rank smallint not null
,cluster_desc char(07) not null)
distributed by (postcode_out);
insert into kmeans_price_paid_cluster_final
(postcode_out
,avg_sale_price
,sale_count
,cluster_rank
,cluster_desc)
select t2.postcode_out
,t2.avg_sale_price
,t2.sale_count
,t1.cluster_rank
,t1.cluster_desc
from kmeans_price_paid_cluster_summary t1
inner join kmeans_price_paid_cluster t2
on t1.cluster_id = t2.column_id;
2,242 rows inserted
drop table kmeans_price_paid_cluster_final;
create table kmeans_price_paid_cluster_final
(postcode_out char(04) not null /* PK */
,avg_sale_price integer not null
,sale_count integer not null
,cluster_rank smallint not null
,cluster_desc char(07) not null)
distributed by (postcode_out);
insert into kmeans_price_paid_cluster_final
(postcode_out
,avg_sale_price
,sale_count
,cluster_rank
,cluster_desc)
select t2.postcode_out
,t2.avg_sale_price
,t2.sale_count
,t1.cluster_rank
,t1.cluster_desc
from kmeans_price_paid_cluster_summary t1
inner join kmeans_price_paid_cluster t2
on t1.cluster_id = t2.column_id;
2,242 rows inserted
drop table kmeans_price_paid_cluster_final;
create table kmeans_price_paid_cluster_final
(postcode_out char(04) not null /* PK */
,avg_sale_price integer not null
,sale_count integer not null
,cluster_rank smallint not null
,cluster_desc char(07) not null)
distributed by (postcode_out);
insert into kmeans_price_paid_cluster_final
(postcode_out
,avg_sale_price
,sale_count
,cluster_rank
,cluster_desc)
select t2.postcode_out
,t2.avg_sale_price
,t2.sale_count
,t1.cluster_rank
,t1.cluster_desc
from kmeans_price_paid_cluster_summary t1
inner join kmeans_price_paid_cluster t2
on t1.cluster_id = t2.column_id;
2,242 rows inserted
drop table kmeans_price_paid_cluster_final; create table kmeans_price_paid_cluster_final(postcode_out char(04) not null /* PK */,avg_sale_price integer not null,sale_count integer not null,cluster_rank smallint not null,cluster_desc char(07) not null)distributed by (postcode_out); insert into kmeans_price_paid_cluster_final(postcode_out,avg_sale_price,sale_count,cluster_rank,cluster_desc)select t2.postcode_out,t2.avg_sale_price,t2.sale_count,t1.cluster_rank,t1.cluster_descfrom kmeans_price_paid_cluster_summary t1inner join kmeans_price_paid_cluster t2on t1.cluster_id = t2.column_id; 2,242 rows inserted
drop table kmeans_price_paid_cluster_final;
create table kmeans_price_paid_cluster_final
(postcode_out char(04) not null /* PK */
,avg_sale_price integer not null
,sale_count integer not null
,cluster_rank smallint not null
,cluster_desc char(07) not null)
distributed by (postcode_out);
insert into kmeans_price_paid_cluster_final
(postcode_out
,avg_sale_price
,sale_count
,cluster_rank
,cluster_desc)
select t2.postcode_out
,t2.avg_sale_price
,t2.sale_count
,t1.cluster_rank
,t1.cluster_desc
from kmeans_price_paid_cluster_summary t1
inner join kmeans_price_paid_cluster t2
on t1.cluster_id = t2.column_id;
2,242 rows inserted
drop table kmeans_price_paid_cluster_final; create table kmeans_price_paid_cluster_final(postcode_out char(04) not null /* PK */,avg_sale_price integer not null,sale_count integer not null,cluster_rank smallint not null,cluster_desc char(07) not null)distributed by (postcode_out); insert into kmeans_price_paid_cluster_final(postcode_out,avg_sale_price,sale_count,cluster_rank,cluster_desc)select t2.postcode_out,t2.avg_sale_price,t2.sale_count,t1.cluster_rank,t1.cluster_descfrom kmeans_price_paid_cluster_summary t1inner join kmeans_price_paid_cluster t2on t1.cluster_id = t2.column_id; 2,242 rows inserted
drop table kmeans_price_paid_cluster_final;
create table kmeans_price_paid_cluster_final
(postcode_out char(04) not null /* PK */
,avg_sale_price integer not null
,sale_count integer not null
,cluster_rank smallint not null
,cluster_desc char(07) not null)
distributed by (postcode_out);
insert into kmeans_price_paid_cluster_final
(postcode_out
,avg_sale_price
,sale_count
,cluster_rank
,cluster_desc)
select t2.postcode_out
,t2.avg_sale_price
,t2.sale_count
,t1.cluster_rank
,t1.cluster_desc
from kmeans_price_paid_cluster_summary t1
inner join kmeans_price_paid_cluster t2
on t1.cluster_id = t2.column_id;
2,242 rows inserted
drop table kmeans_price_paid_cluster_final;
create table kmeans_price_paid_cluster_final
(postcode_out char(04) not null /* PK */
,avg_sale_price integer not null
,sale_count integer not null
,cluster_rank smallint not null
,cluster_desc char(07) not null)
distributed by (postcode_out);
insert into kmeans_price_paid_cluster_final
(postcode_out
,avg_sale_price
,sale_count
,cluster_rank
,cluster_desc)
select t2.postcode_out
,t2.avg_sale_price
,t2.sale_count
,t1.cluster_rank
,t1.cluster_desc
from kmeans_price_paid_cluster_summary t1
inner join kmeans_price_paid_cluster t2
on t1.cluster_id = t2.column_id;
2,242 rows inserted
drop table kmeans_price_paid_cluster_final;
create table kmeans_price_paid_cluster_final
(postcode_out char(04) not null /* PK */
,avg_sale_price integer not null
,sale_count integer not null
,cluster_rank smallint not null
,cluster_desc char(07) not null)
distributed by (postcode_out);
insert into kmeans_price_paid_cluster_final
(postcode_out
,avg_sale_price
,sale_count
,cluster_rank
,cluster_desc)
select t2.postcode_out
,t2.avg_sale_price
,t2.sale_count
,t1.cluster_rank
,t1.cluster_desc
from kmeans_price_paid_cluster_summary t1
inner join kmeans_price_paid_cluster t2
on t1.cluster_id = t2.column_id;
2,242 rows inserted
drop table kmeans_price_paid_cluster_final; create table kmeans_price_paid_cluster_final(postcode_out char(04) not null /* PK */,avg_sale_price integer not null,sale_count integer not null,cluster_rank smallint not null,cluster_desc char(07) not null)distributed by (postcode_out); insert into kmeans_price_paid_cluster_final(postcode_out,avg_sale_price,sale_count,cluster_rank,cluster_desc)select t2.postcode_out,t2.avg_sale_price,t2.sale_count,t1.cluster_rank,t1.cluster_descfrom kmeans_price_paid_cluster_summary t1inner join kmeans_price_paid_cluster t2on t1.cluster_id = t2.column_id; 2,242 rows inserted
drop table kmeans_price_paid_cluster_final;
create table kmeans_price_paid_cluster_final
(postcode_out char(04) not null /* PK */
,avg_sale_price integer not null
,sale_count integer not null
,cluster_rank smallint not null
,cluster_desc char(07) not null)
distributed by (postcode_out);
insert into kmeans_price_paid_cluster_final
(postcode_out
,avg_sale_price
,sale_count
,cluster_rank
,cluster_desc)
select t2.postcode_out
,t2.avg_sale_price
,t2.sale_count
,t1.cluster_rank
,t1.cluster_desc
from kmeans_price_paid_cluster_summary t1
inner join kmeans_price_paid_cluster t2
on t1.cluster_id = t2.column_id;
2,242 rows inserted
drop table kmeans_price_paid_cluster_final; create table kmeans_price_paid_cluster_final(postcode_out char(04) not null /* PK */,avg_sale_price integer not null,sale_count integer not null,cluster_rank smallint not null,cluster_desc char(07) not null)distributed by (postcode_out); insert into kmeans_price_paid_cluster_final(postcode_out,avg_sale_price,sale_count,cluster_rank,cluster_desc)select t2.postcode_out,t2.avg_sale_price,t2.sale_count,t1.cluster_rank,t1.cluster_descfrom kmeans_price_paid_cluster_summary t1inner join kmeans_price_paid_cluster t2on t1.cluster_id = t2.column_id; 2,242 rows inserted
drop table kmeans_price_paid_cluster_final;
create table kmeans_price_paid_cluster_final
(postcode_out char(04) not null /* PK */
,avg_sale_price integer not null
,sale_count integer not null
,cluster_rank smallint not null
,cluster_desc char(07) not null)
distributed by (postcode_out);
insert into kmeans_price_paid_cluster_final
(postcode_out
,avg_sale_price
,sale_count
,cluster_rank
,cluster_desc)
select t2.postcode_out
,t2.avg_sale_price
,t2.sale_count
,t1.cluster_rank
,t1.cluster_desc
from kmeans_price_paid_cluster_summary t1
inner join kmeans_price_paid_cluster t2
on t1.cluster_id = t2.column_id;
2,242 rows inserted
drop table kmeans_price_paid_cluster_final; create table kmeans_price_paid_cluster_final(postcode_out char(04) not null /* PK */,avg_sale_price integer not null,sale_count integer not null,cluster_rank smallint not null,cluster_desc char(07) not null)distributed by (postcode_out); insert into kmeans_price_paid_cluster_final(postcode_out,avg_sale_price,sale_count,cluster_rank,cluster_desc)select t2.postcode_out,t2.avg_sale_price,t2.sale_count,t1.cluster_rank,t1.cluster_descfrom kmeans_price_paid_cluster_summary t1inner join kmeans_price_paid_cluster t2on t1.cluster_id = t2.column_id; 2,242 rows inserted
drop table kmeans_price_paid_cluster_final;
create table kmeans_price_paid_cluster_final
(postcode_out char(04) not null /* PK */
,avg_sale_price integer not null
,sale_count integer not null
,cluster_rank smallint not null
,cluster_desc char(07) not null)
distributed by (postcode_out);
insert into kmeans_price_paid_cluster_final
(postcode_out
,avg_sale_price
,sale_count
,cluster_rank
,cluster_desc)
select t2.postcode_out
,t2.avg_sale_price
,t2.sale_count
,t1.cluster_rank
,t1.cluster_desc
from kmeans_price_paid_cluster_summary t1
inner join kmeans_price_paid_cluster t2
on t1.cluster_id = t2.column_id;
2,242 rows inserted
drop table kmeans_price_paid_cluster_final;
create table kmeans_price_paid_cluster_final
(postcode_out char(04) not null /* PK */
,avg_sale_price integer not null
,sale_count integer not null
,cluster_rank smallint not null
,cluster_desc char(07) not null)
distributed by (postcode_out);
insert into kmeans_price_paid_cluster_final
(postcode_out
,avg_sale_price
,sale_count
,cluster_rank
,cluster_desc)
select t2.postcode_out
,t2.avg_sale_price
,t2.sale_count
,t1.cluster_rank
,t1.cluster_desc
from kmeans_price_paid_cluster_summary t1
inner join kmeans_price_paid_cluster t2
on t1.cluster_id = t2.column_id;
2,242 rows inserted
drop table kmeans_price_paid_cluster_final;
create table kmeans_price_paid_cluster_final
(postcode_out char(04) not null /* PK */
,avg_sale_price integer not null
,sale_count integer not null
,cluster_rank smallint not null
,cluster_desc char(07) not null)
distributed by (postcode_out);
insert into kmeans_price_paid_cluster_final
(postcode_out
,avg_sale_price
,sale_count
,cluster_rank
,cluster_desc)
select t2.postcode_out
,t2.avg_sale_price
,t2.sale_count
,t1.cluster_rank
,t1.cluster_desc
from kmeans_price_paid_cluster_summary t1
inner join kmeans_price_paid_cluster t2
on t1.cluster_id = t2.column_id;
2,242 rows inserted
drop table kmeans_price_paid_cluster_final; create table kmeans_price_paid_cluster_final(postcode_out char(04) not null /* PK */,avg_sale_price integer not null,sale_count integer not null,cluster_rank smallint not null,cluster_desc char(07) not null)distributed by (postcode_out); insert into kmeans_price_paid_cluster_final(postcode_out,avg_sale_price,sale_count,cluster_rank,cluster_desc)select t2.postcode_out,t2.avg_sale_price,t2.sale_count,t1.cluster_rank,t1.cluster_descfrom kmeans_price_paid_cluster_summary t1inner join kmeans_price_paid_cluster t2on t1.cluster_id = t2.column_id; 2,242 rows inserted
drop table kmeans_price_paid_cluster_final;
create table kmeans_price_paid_cluster_final
(postcode_out char(04) not null /* PK */
,avg_sale_price integer not null
,sale_count integer not null
,cluster_rank smallint not null
,cluster_desc char(07) not null)
distributed by (postcode_out);
insert into kmeans_price_paid_cluster_final
(postcode_out
,avg_sale_price
,sale_count
,cluster_rank
,cluster_desc)
select t2.postcode_out
,t2.avg_sale_price
,t2.sale_count
,t1.cluster_rank
,t1.cluster_desc
from kmeans_price_paid_cluster_summary t1
inner join kmeans_price_paid_cluster t2
on t1.cluster_id = t2.column_id;
2,242 rows inserted
drop table kmeans_price_paid_cluster_final;
create table kmeans_price_paid_cluster_final
(postcode_out char(04) not null /* PK */
,avg_sale_price integer not null
,sale_count integer not null
,cluster_rank smallint not null
,cluster_desc char(07) not null)
distributed by (postcode_out);
insert into kmeans_price_paid_cluster_final
(postcode_out
,avg_sale_price
,sale_count
,cluster_rank
,cluster_desc)
select t2.postcode_out
,t2.avg_sale_price
,t2.sale_count
,t1.cluster_rank
,t1.cluster_desc
from kmeans_price_paid_cluster_summary t1
inner join kmeans_price_paid_cluster t2
on t1.cluster_id = t2.column_id;
2,242 rows inserted
drop table kmeans_price_paid_cluster_final; create table kmeans_price_paid_cluster_final(postcode_out char(04) not null /* PK */,avg_sale_price integer not null,sale_count integer not null,cluster_rank smallint not null,cluster_desc char(07) not null)distributed by (postcode_out); insert into kmeans_price_paid_cluster_final(postcode_out,avg_sale_price,sale_count,cluster_rank,cluster_desc)select t2.postcode_out,t2.avg_sale_price,t2.sale_count,t1.cluster_rank,t1.cluster_descfrom kmeans_price_paid_cluster_summary t1inner join kmeans_price_paid_cluster t2on t1.cluster_id = t2.column_id; 2,242 rows inserted
drop table kmeans_price_paid_cluster_final;
create table kmeans_price_paid_cluster_final
(postcode_out char(04) not null /* PK */
,avg_sale_price integer not null
,sale_count integer not null
,cluster_rank smallint not null
,cluster_desc char(07) not null)
distributed by (postcode_out);
insert into kmeans_price_paid_cluster_final
(postcode_out
,avg_sale_price
,sale_count
,cluster_rank
,cluster_desc)
select t2.postcode_out
,t2.avg_sale_price
,t2.sale_count
,t1.cluster_rank
,t1.cluster_desc
from kmeans_price_paid_cluster_summary t1
inner join kmeans_price_paid_cluster t2
on t1.cluster_id = t2.column_id;
2,242 rows inserted
drop table kmeans_price_paid_cluster_final;
create table kmeans_price_paid_cluster_final
(postcode_out char(04) not null /* PK */
,avg_sale_price integer not null
,sale_count integer not null
,cluster_rank smallint not null
,cluster_desc char(07) not null)
distributed by (postcode_out);
insert into kmeans_price_paid_cluster_final
(postcode_out
,avg_sale_price
,sale_count
,cluster_rank
,cluster_desc)
select t2.postcode_out
,t2.avg_sale_price
,t2.sale_count
,t1.cluster_rank
,t1.cluster_desc
from kmeans_price_paid_cluster_summary t1
inner join kmeans_price_paid_cluster t2
on t1.cluster_id = t2.column_id;
2,242 rows inserted

drop table kmeans_price_paid_cluster_final;
create table kmeans_price_paid_cluster_final
(postcode_out char(04) not null /* PK */
,avg_sale_price integer not null
,sale_count integer not null
,cluster_rank smallint not null
,cluster_desc char(07) not null)
distributed by (postcode_out);
insert into kmeans_price_paid_cluster_final
(postcode_out
,avg_sale_price
,sale_count
,cluster_rank
,cluster_desc)
select t2.postcode_out
,t2.avg_sale_price
,t2.sale_count
,t1.cluster_rank
,t1.cluster_desc
from kmeans_price_paid_cluster_summary t1
inner join kmeans_price_paid_cluster t2
on t1.cluster_id = t2.column_id;
2,242 rows inserted
drop table kmeans_price_paid_cluster_final;
create table kmeans_price_paid_cluster_final
(postcode_out char(04) not null /* PK */
,avg_sale_price integer not null
,sale_count integer not null
,cluster_rank smallint not null
,cluster_desc char(07) not null)
distributed by (postcode_out);
insert into kmeans_price_paid_cluster_final
(postcode_out
,avg_sale_price
,sale_count
,cluster_rank
,cluster_desc)
select t2.postcode_out
,t2.avg_sale_price
,t2.sale_count
,t1.cluster_rank
,t1.cluster_desc
from kmeans_price_paid_cluster_summary t1
inner join kmeans_price_paid_cluster t2
on t1.cluster_id = t2.column_id;
2,242 rows inserted

drop table kmeans_price_paid_cluster_final;
create table kmeans_price_paid_cluster_final
(postcode_out char(04) not null /* PK */
,avg_sale_price integer not null
,sale_count integer not null
,cluster_rank smallint not null
,cluster_desc char(07) not null)
distributed by (postcode_out);
insert into kmeans_price_paid_cluster_final
(postcode_out
,avg_sale_price
,sale_count
,cluster_rank
,cluster_desc)
select t2.postcode_out
,t2.avg_sale_price
,t2.sale_count
,t1.cluster_rank
,t1.cluster_desc
from kmeans_price_paid_cluster_summary t1
inner join kmeans_price_paid_cluster t2
on t1.cluster_id = t2.column_id;
2,242 rows inserted
drop table kmeans_price_paid_cluster_final;
create table kmeans_price_paid_cluster_final
(postcode_out char(04) not null /* PK */
,avg_sale_price integer not null
,sale_count integer not null
,cluster_rank smallint not null
,cluster_desc char(07) not null)
distributed by (postcode_out);
insert into kmeans_price_paid_cluster_final
(postcode_out
,avg_sale_price
,sale_count
,cluster_rank
,cluster_desc)
select t2.postcode_out
,t2.avg_sale_price
,t2.sale_count
,t1.cluster_rank
,t1.cluster_desc
from kmeans_price_paid_cluster_summary t1
inner join kmeans_price_paid_cluster t2
on t1.cluster_id = t2.column_id;
2,242 rows inserted
drop table kmeans_price_paid_cluster_final;
create table kmeans_price_paid_cluster_final
(postcode_out char(04) not null /* PK */
,avg_sale_price integer not null
,sale_count integer not null
,cluster_rank smallint not null
,cluster_desc char(07) not null)
distributed by (postcode_out);
insert into kmeans_price_paid_cluster_final
(postcode_out
,avg_sale_price
,sale_count
,cluster_rank
,cluster_desc)
select t2.postcode_out
,t2.avg_sale_price
,t2.sale_count
,t1.cluster_rank
,t1.cluster_desc
from kmeans_price_paid_cluster_summary t1
inner join kmeans_price_paid_cluster t2
on t1.cluster_id = t2.column_id;
2,242 rows inserted
drop table kmeans_price_paid_cluster_final;
create table kmeans_price_paid_cluster_final
(postcode_out char(04) not null /* PK */
,avg_sale_price integer not null
,sale_count integer not null
,cluster_rank smallint not null
,cluster_desc char(07) not null)
distributed by (postcode_out);
insert into kmeans_price_paid_cluster_final
(postcode_out
,avg_sale_price
,sale_count
,cluster_rank
,cluster_desc)
select t2.postcode_out
,t2.avg_sale_price
,t2.sale_count
,t1.cluster_rank
,t1.cluster_desc
from kmeans_price_paid_cluster_summary t1
inner join kmeans_price_paid_cluster t2
on t1.cluster_id = t2.column_id;
2,242 rows inserted
drop table kmeans_price_paid_cluster_final;
create table kmeans_price_paid_cluster_final
(postcode_out char(04) not null /* PK */
,avg_sale_price integer not null
,sale_count integer not null
,cluster_rank smallint not null
,cluster_desc char(07) not null)
distributed by (postcode_out);
insert into kmeans_price_paid_cluster_final
(postcode_out
,avg_sale_price
,sale_count
,cluster_rank
,cluster_desc)
select t2.postcode_out
,t2.avg_sale_price
,t2.sale_count
,t1.cluster_rank
,t1.cluster_desc
from kmeans_price_paid_cluster_summary t1
inner join kmeans_price_paid_cluster t2
on t1.cluster_id = t2.column_id;
2,242 rows inserted
drop table kmeans_price_paid_cluster_final;
create table kmeans_price_paid_cluster_final
(postcode_out char(04) not null /* PK */
,avg_sale_price integer not null
,sale_count integer not null
,cluster_rank smallint not null
,cluster_desc char(07) not null)
distributed by (postcode_out);
insert into kmeans_price_paid_cluster_final
(postcode_out
,avg_sale_price
,sale_count
,cluster_rank
,cluster_desc)
select t2.postcode_out
,t2.avg_sale_price
,t2.sale_count
,t1.cluster_rank
,t1.cluster_desc
from kmeans_price_paid_cluster_summary t1
inner join kmeans_price_paid_cluster t2
on t1.cluster_id = t2.column_id;
2,242 rows inserted


drop table kmeans_price_paid_cluster_final;
create table kmeans_price_paid_cluster_final
(postcode_out char(04) not null /* PK */
,avg_sale_price integer not null
,sale_count integer not null
,cluster_rank smallint not null
,cluster_desc char(07) not null)
distributed by (postcode_out);
insert into kmeans_price_paid_cluster_final
(postcode_out
,avg_sale_price
,sale_count
,cluster_rank
,cluster_desc)
select t2.postcode_out
,t2.avg_sale_price
,t2.sale_count
,t1.cluster_rank
,t1.cluster_desc
from kmeans_price_paid_cluster_summary t1
inner join kmeans_price_paid_cluster t2
on t1.cluster_id = t2.column_id;
2,242 rows inserted
drop table kmeans_price_paid_cluster_final;
create table kmeans_price_paid_cluster_final
(postcode_out char(04) not null /* PK */
,avg_sale_price integer not null
,sale_count integer not null
,cluster_rank smallint not null
,cluster_desc char(07) not null)
distributed by (postcode_out);
insert into kmeans_price_paid_cluster_final
(postcode_out
,avg_sale_price
,sale_count
,cluster_rank
,cluster_desc)
select t2.postcode_out
,t2.avg_sale_price
,t2.sale_count
,t1.cluster_rank
,t1.cluster_desc
from kmeans_price_paid_cluster_summary t1
inner join kmeans_price_paid_cluster t2
on t1.cluster_id = t2.column_id;
2,242 rows inserted
drop table kmeans_price_paid_cluster_final;
create table kmeans_price_paid_cluster_final
(postcode_out char(04) not null /* PK */
,avg_sale_price integer not null
,sale_count integer not null
,cluster_rank smallint not null
,cluster_desc char(07) not null)
distributed by (postcode_out);
insert into kmeans_price_paid_cluster_final
(postcode_out
,avg_sale_price
,sale_count
,cluster_rank
,cluster_desc)
select t2.postcode_out
,t2.avg_sale_price
,t2.sale_count
,t1.cluster_rank
,t1.cluster_desc
from kmeans_price_paid_cluster_summary t1
inner join kmeans_price_paid_cluster t2
on t1.cluster_id = t2.column_id;
2,242 rows inserted
drop table kmeans_price_paid_cluster_final;
create table kmeans_price_paid_cluster_final
(postcode_out char(04) not null /* PK */
,avg_sale_price integer not null
,sale_count integer not null
,cluster_rank smallint not null
,cluster_desc char(07) not null)
distributed by (postcode_out);
insert into kmeans_price_paid_cluster_final
(postcode_out
,avg_sale_price
,sale_count
,cluster_rank
,cluster_desc)
select t2.postcode_out
,t2.avg_sale_price
,t2.sale_count
,t1.cluster_rank
,t1.cluster_desc
from kmeans_price_paid_cluster_summary t1
inner join kmeans_price_paid_cluster t2
on t1.cluster_id = t2.column_id;
2,242 rows inserted
drop table kmeans_price_paid_cluster_final;
create table kmeans_price_paid_cluster_final
(postcode_out char(04) not null /* PK */
,avg_sale_price integer not null
,sale_count integer not null
,cluster_rank smallint not null
,cluster_desc char(07) not null)
distributed by (postcode_out);
insert into kmeans_price_paid_cluster_final
(postcode_out
,avg_sale_price
,sale_count
,cluster_rank
,cluster_desc)
select t2.postcode_out
,t2.avg_sale_price
,t2.sale_count
,t1.cluster_rank
,t1.cluster_desc
from kmeans_price_paid_cluster_summary t1
inner join kmeans_price_paid_cluster t2
on t1.cluster_id = t2.column_id;
2,242 rows inserted
- no need to export data from the database
- no need to learn new languages(s)
- use your existing favourite SQL tool
- no extra software required for Greenplum/Postgres users
- harness the power of the database engine.
drop table kmeans_price_paid_cluster_final;
create table kmeans_price_paid_cluster_final
(postcode_out char(04) not null /* PK */
,avg_sale_price integer not null
,sale_count integer not null
,cluster_rank smallint not null
,cluster_desc char(07) not null)
distributed by (postcode_out);
insert into kmeans_price_paid_cluster_final
(postcode_out
,avg_sale_price
,sale_count
,cluster_rank
,cluster_desc)
select t2.postcode_out
,t2.avg_sale_price
,t2.sale_count
,t1.cluster_rank
,t1.cluster_desc
from kmeans_price_paid_cluster_summary t1
inner join kmeans_price_paid_cluster t2
on t1.cluster_id = t2.column_id;
2,242 rows inserted