Download data

Download data#

You can compose queries to download raw data.

import doplaydo.dodata as dd
import matplotlib.pyplot as plt
import pandas as pd
import getpass

username = getpass.getuser()

PROJECT_ID = f"resistance-{username}"

You have access to:

dd.Project
dd.Die
dd.Wafer
dd.ParentCell
dd.Cell
dd.Device
dd.DeviceData

Where each model has its table columns as attributes.

You can use get_data_by_query to query a subset of data filtered by a list of clauses.

It will return a list of tuples where the first element is a DeviceData object and the second one is a pandas DataFrame.

data_tuples = dd.get_data_by_query([dd.Project.project_id == PROJECT_ID], limit=1)
device_data, df = data_tuples[0]  # each tuple has DeviceData and pd.Dataframe
device_data

DeviceData(pkey=6258, data_type=<DeviceDataType.measurement: 'measurement'>, thumbnail_path='device_data/6258/thumbnail.png', plotting_kwargs={'x_col': 'i', 'y_col': ['v'], 'x_name': 'i', 'y_name': 'v', 'scatter': False, 'sort_by': {}, 'x_units': None, 'y_units': None, 'grouping': {}, 'x_limits': None, 'y_limits': None, 'x_log_axis': False, 'y_log_axis': False}, valid=True, device_pkey=595, path='device_data/6258/data.json.gz', attributes={}, die_pkey=711, timestamp=datetime.datetime(2025, 2, 28, 20, 58, 43, 167061), timestamp_acquired=None)

device_id = device_data.device.device_id

df

	v	i	polyfit
0	0.000000e+00	0.000000	2.209364e-08
1	2.929507e-08	1.282051	2.209364e-08
2	5.786033e-08	2.564103	2.209364e-08
3	8.852705e-08	3.846154	2.209364e-08
4	1.133945e-07	5.128205	2.209364e-08
...	...	...	...
74	2.100133e-06	94.871795	2.209364e-08
75	2.211533e-06	96.153846	2.209364e-08
76	2.238454e-06	97.435897	2.209364e-08
77	2.194616e-06	98.717949	2.209364e-08
78	2.217731e-06	100.000000	2.209364e-08

79 rows × 3 columns

plt.plot(df["i"], df["v"])
plt.xlabel("I (A)")
plt.ylabel("V (V)")
plt.title(device_id)

Text(0.5, 1.0, 'resistance_resistance_sheet_W10_0_52500')

../../../_images/c681494c0607643e501b982529c9b142795513fb32894fca2389ed6d5d84eba1.png

You can aggregate the dataframes and the device data objects

dfs = [dt[1] for dt in data_tuples]  # dataframes
dds = [dt[0] for dt in data_tuples]  # device data objects

You can use the DeviceData object to traverse the data model and access additional fields.

You can go from DeviceData to any other tables by following the dashed arrows.

Each column is an attribute on the object representing the table.

print("device id: ", dds[0].device.device_id)
print("die x: ", dds[0].die.x)
print("die y: ", dds[0].die.y)
print("wafer id: ", dds[0].die.wafer.wafer_id)
print("cell id: ", dds[0].device.cell.cell_id)
print("parent cell id: ", dds[0].device.parent_cell.cell_id)
print("project id: ", dds[0].device.cell.project.project_id)

device id:  resistance_resistance_sheet_W10_0_52500
die x:  1
die y:  2

wafer id:  6d4c615ff105
cell id:  resistance_sheet_W10

parent cell id:  resistance
project id:  resistance-runner

For example, you can reach the Cell table and all its columns:

dds[0].device.cell

Cell(attributes={'x': 0, 'y': 52500, 'width_um': 10.0, 'length_um': 20, 'kfactory:info': "{'resistance': 0}", 'kfactory:ports:0': "{'cross_section': 'da0d77df_50000', 'info': {}, 'name': 'pad1', 'port_type': 'electrical', 'trans': r270 -50000,-25000}", 'kfactory:ports:1': "{'cross_section': 'da0d77df_50000', 'info': {}, 'name': 'pad2', 'port_type': 'electrical', 'trans': r270 50000,-25000}", 'kfactory:settings': "{'width': 10}", 'kfactory:function_name': 'resistance_sheet'}, timestamp=datetime.datetime(2025, 2, 28, 20, 58, 38, 170629), cell_id='resistance_sheet_W10', pkey=595, project_pkey=19)

Build table for JMP#

We recommend doing all analysis in python but we also support exporting the data to a flat table for JMP or Excel.

data_tuples = dd.get_data_by_query(
    [
        dd.Project.project_id == PROJECT_ID,
        dd.Device.device_id == device_id,
        dd.Die.x == 0,
        dd.Die.y == 0,
    ]
)
len(data_tuples)
dds = [dt[0] for dt in data_tuples]  # device data objects
dfs = [dt[1] for dt in data_tuples]  # dataframes
dfs[0]

	v	i	polyfit
0	0.000000e+00	0.000000	2.130771e-08
1	2.866153e-08	1.282051	2.130771e-08
2	5.570783e-08	2.564103	2.130771e-08
3	8.472200e-08	3.846154	2.130771e-08
4	1.123175e-07	5.128205	2.130771e-08
...	...	...	...
74	2.063772e-06	94.871795	2.130771e-08
75	2.071896e-06	96.153846	2.130771e-08
76	2.173670e-06	97.435897	2.130771e-08
77	2.193896e-06	98.717949	2.130771e-08
78	2.223151e-06	100.000000	2.130771e-08

79 rows × 3 columns

dfs_all = []

for device_data, df in zip(dds, dfs):
    df["device_id"] = device_data.device.device_id
    df["die_x"] = device_data.die.x
    df["die_y"] = device_data.die.y
    df["wafer_id"] = device_data.die.wafer.wafer_id
    df["cell_id"] = device_data.device.cell.cell_id
    df["parent_cell_id"] = device_data.device.parent_cell.cell_id
    dfs_all.append(df)

dfs_all = pd.concat(dfs_all)  # You can concatenate all dataFrames together
dfs_all

	v	i	polyfit	device_id	die_x	die_y	wafer_id	cell_id	parent_cell_id
0	0.000000e+00	0.000000	2.130771e-08	resistance_resistance_sheet_W10_0_52500	0	0	2eq221eqewq2	resistance_sheet_W10	resistance
1	2.866153e-08	1.282051	2.130771e-08	resistance_resistance_sheet_W10_0_52500	0	0	2eq221eqewq2	resistance_sheet_W10	resistance
2	5.570783e-08	2.564103	2.130771e-08	resistance_resistance_sheet_W10_0_52500	0	0	2eq221eqewq2	resistance_sheet_W10	resistance
3	8.472200e-08	3.846154	2.130771e-08	resistance_resistance_sheet_W10_0_52500	0	0	2eq221eqewq2	resistance_sheet_W10	resistance
4	1.123175e-07	5.128205	2.130771e-08	resistance_resistance_sheet_W10_0_52500	0	0	2eq221eqewq2	resistance_sheet_W10	resistance
...	...	...	...	...	...	...	...	...	...
74	2.112384e-06	94.871795	2.211458e-08	resistance_resistance_sheet_W10_0_52500	0	0	334abd342zuq	resistance_sheet_W10	resistance
75	2.221826e-06	96.153846	2.211458e-08	resistance_resistance_sheet_W10_0_52500	0	0	334abd342zuq	resistance_sheet_W10	resistance
76	2.218682e-06	97.435897	2.211458e-08	resistance_resistance_sheet_W10_0_52500	0	0	334abd342zuq	resistance_sheet_W10	resistance
77	2.267766e-06	98.717949	2.211458e-08	resistance_resistance_sheet_W10_0_52500	0	0	334abd342zuq	resistance_sheet_W10	resistance
78	2.256414e-06	100.000000	2.211458e-08	resistance_resistance_sheet_W10_0_52500	0	0	334abd342zuq	resistance_sheet_W10	resistance

237 rows × 9 columns

Advanced queries#

To build advanced queries to filter metadata you can use the attribute_filter method. You can also use or_, and_ for conditional clauses.

Conditional filter#

data_tuples = dd.get_data_by_query(
    [dd.Project.project_id == PROJECT_ID, dd.Device.device_id == device_id]
)
len(data_tuples)

By default anything you pass to the list joins the clauses as and_

data_tuples = dd.get_data_by_query(
    [dd.and_(dd.Project.project_id == PROJECT_ID, dd.Device.device_id == device_id)]
)
len(data_tuples)

You can also use an OR condition.

The query below filters by project id PROJECT_ID AND either device id (rib_0p3 OR rib_0p5).

Because there are 21 measurements per device, one for each die, if you look for two specific devices, you will get 42 measurements.

device_id1 = "resistance_resistance_sheet_W10_0_53000"
device_id2 = "resistance_resistance_sheet_W20_0_158000"

data_tuples = dd.get_data_by_query(
    [
        dd.Project.project_id == PROJECT_ID,
        dd.or_(dd.Device.device_id == device_id1, dd.Device.device_id == device_id2),
    ]
)
len(data_tuples)

You can also combine conditionals.

In the example below, you can get all the data for the specified project id, device id and either die coordinate.

data_tuples = dd.get_data_by_query(
    [
        dd.Project.project_id == PROJECT_ID,
        dd.Device.device_id == device_id,
        dd.or_(
            dd.and_(dd.Die.x == 1, dd.Die.y == 1),  # either die 1,1
            dd.and_(dd.Die.x == 0, dd.Die.y == 0),  # or die 0,0
        ),
    ]
)
len(data_tuples)

Download data

Contents

Download data#

Build table for JMP#

Advanced queries#

Conditional filter#

Attribute filter#