Skip to content

Commit 7023fc4

Browse files
authored
fix: correct image to image DDIM and TCD (#1410)
1 parent e77e4c4 commit 7023fc4

2 files changed

Lines changed: 36 additions & 46 deletions

File tree

src/denoiser.hpp

Lines changed: 33 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -1525,32 +1525,12 @@ static sd::Tensor<float> sample_ddim_trailing(denoise_cb_t model,
15251525
const std::vector<float>& sigmas,
15261526
std::shared_ptr<RNG> rng,
15271527
float eta) {
1528-
float beta_start = 0.00085f;
1529-
float beta_end = 0.0120f;
1530-
std::vector<double> alphas_cumprod(TIMESTEPS);
1531-
std::vector<double> compvis_sigmas(TIMESTEPS);
1532-
for (int i = 0; i < TIMESTEPS; i++) {
1533-
alphas_cumprod[i] =
1534-
(i == 0 ? 1.0f : alphas_cumprod[i - 1]) *
1535-
(1.0f -
1536-
std::pow(sqrtf(beta_start) +
1537-
(sqrtf(beta_end) - sqrtf(beta_start)) *
1538-
((float)i / (TIMESTEPS - 1)),
1539-
2));
1540-
compvis_sigmas[i] =
1541-
std::sqrt((1 - alphas_cumprod[i]) / alphas_cumprod[i]);
1542-
}
15431528

15441529
int steps = static_cast<int>(sigmas.size()) - 1;
15451530
for (int i = 0; i < steps; i++) {
1546-
int timestep = static_cast<int>(roundf(TIMESTEPS - i * ((float)TIMESTEPS / steps))) - 1;
1547-
int prev_timestep = timestep - TIMESTEPS / steps;
1548-
float sigma = static_cast<float>(compvis_sigmas[timestep]);
1549-
if (i == 0) {
1550-
x *= std::sqrt(sigma * sigma + 1) / sigma;
1551-
} else {
1552-
x *= std::sqrt(sigma * sigma + 1);
1553-
}
1531+
1532+
float sigma = sigmas[i];
1533+
float sigma_to = sigmas[i + 1];
15541534

15551535
auto model_output_opt = model(x, sigma, i + 1);
15561536
if (model_output_opt.empty()) {
@@ -1559,8 +1539,8 @@ static sd::Tensor<float> sample_ddim_trailing(denoise_cb_t model,
15591539
sd::Tensor<float> model_output = std::move(model_output_opt);
15601540
model_output = (x - model_output) * (1.0f / sigma);
15611541

1562-
float alpha_prod_t = static_cast<float>(alphas_cumprod[timestep]);
1563-
float alpha_prod_t_prev = static_cast<float>(prev_timestep >= 0 ? alphas_cumprod[prev_timestep] : alphas_cumprod[0]);
1542+
float alpha_prod_t = 1.0f / (sigma * sigma + 1.0f);
1543+
float alpha_prod_t_prev = 1.0f / (sigma_to * sigma_to + 1.0f);
15641544
float beta_prod_t = 1.0f - alpha_prod_t;
15651545

15661546
sd::Tensor<float> pred_original_sample = ((x / std::sqrt(sigma * sigma + 1)) -
@@ -1572,12 +1552,13 @@ static sd::Tensor<float> sample_ddim_trailing(denoise_cb_t model,
15721552
(1.0f - alpha_prod_t / alpha_prod_t_prev);
15731553
float std_dev_t = eta * std::sqrt(variance);
15741554

1575-
x = std::sqrt(alpha_prod_t_prev) * pred_original_sample +
1576-
std::sqrt(1.0f - alpha_prod_t_prev - std::pow(std_dev_t, 2)) * model_output;
1555+
x = pred_original_sample +
1556+
std::sqrt((1.0f - alpha_prod_t_prev - std::pow(std_dev_t, 2))/ alpha_prod_t_prev) * model_output;
15771557

15781558
if (eta > 0) {
1579-
x += std_dev_t * sd::Tensor<float>::randn_like(x, rng);
1559+
x+= std_dev_t / std::sqrt(alpha_prod_t_prev) * sd::Tensor<float>::randn_like(x, rng);
15801560
}
1561+
15811562
}
15821563
return x;
15831564
}
@@ -1603,19 +1584,25 @@ static sd::Tensor<float> sample_tcd(denoise_cb_t model,
16031584
std::sqrt((1 - alphas_cumprod[i]) / alphas_cumprod[i]);
16041585
}
16051586

1606-
int original_steps = 50;
1607-
int steps = static_cast<int>(sigmas.size()) - 1;
1587+
auto get_timestep_from_sigma = [&](float s) -> int {
1588+
auto it = std::lower_bound(compvis_sigmas.begin(), compvis_sigmas.end(), s);
1589+
if (it == compvis_sigmas.begin()) return 0;
1590+
if (it == compvis_sigmas.end()) return TIMESTEPS - 1;
1591+
int idx_high = static_cast<int>(std::distance(compvis_sigmas.begin(), it));
1592+
int idx_low = idx_high - 1;
1593+
if (std::abs(compvis_sigmas[idx_high] - s) < std::abs(compvis_sigmas[idx_low] - s)) {
1594+
return idx_high;
1595+
}
1596+
return idx_low;
1597+
};
1598+
1599+
int steps = static_cast<int>(sigmas.size()) - 1;
16081600
for (int i = 0; i < steps; i++) {
1609-
int timestep = TIMESTEPS - 1 - (TIMESTEPS / original_steps) * (int)floor(i * ((float)original_steps / steps));
1610-
int prev_timestep = i >= steps - 1 ? 0 : TIMESTEPS - 1 - (TIMESTEPS / original_steps) * (int)floor((i + 1) * ((float)original_steps / steps));
1611-
int timestep_s = (int)floor((1 - eta) * prev_timestep);
1612-
float sigma = static_cast<float>(compvis_sigmas[timestep]);
16131601

1614-
if (i == 0) {
1615-
x *= std::sqrt(sigma * sigma + 1) / sigma;
1616-
} else {
1617-
x *= std::sqrt(sigma * sigma + 1);
1618-
}
1602+
float sigma_to = sigmas[i + 1];
1603+
int prev_timestep = get_timestep_from_sigma(sigma_to);
1604+
int timestep_s = (int)floor((1 - eta) * prev_timestep);
1605+
float sigma = sigmas[i];
16191606

16201607
auto model_output_opt = model(x, sigma, i + 1);
16211608
if (model_output_opt.empty()) {
@@ -1624,23 +1611,24 @@ static sd::Tensor<float> sample_tcd(denoise_cb_t model,
16241611
sd::Tensor<float> model_output = std::move(model_output_opt);
16251612
model_output = (x - model_output) * (1.0f / sigma);
16261613

1627-
float alpha_prod_t = static_cast<float>(alphas_cumprod[timestep]);
1614+
float alpha_prod_t = 1.0f / (sigma * sigma + 1.0f);
16281615
float beta_prod_t = 1.0f - alpha_prod_t;
1629-
float alpha_prod_t_prev = static_cast<float>(prev_timestep >= 0 ? alphas_cumprod[prev_timestep] : alphas_cumprod[0]);
1616+
float alpha_prod_t_prev = 1.0f / (sigma_to * sigma_to + 1.0f);
16301617
float alpha_prod_s = static_cast<float>(alphas_cumprod[timestep_s]);
16311618
float beta_prod_s = 1.0f - alpha_prod_s;
16321619

16331620
sd::Tensor<float> pred_original_sample = ((x / std::sqrt(sigma * sigma + 1)) -
16341621
std::sqrt(beta_prod_t) * model_output) *
16351622
(1.0f / std::sqrt(alpha_prod_t));
16361623

1637-
x = std::sqrt(alpha_prod_s) * pred_original_sample +
1638-
std::sqrt(beta_prod_s) * model_output;
1624+
x = std::sqrt(alpha_prod_s / alpha_prod_t_prev) * pred_original_sample +
1625+
std::sqrt(beta_prod_s / alpha_prod_t_prev) * model_output;
16391626

1640-
if (eta > 0 && i != steps - 1) {
1627+
if (eta > 0 && sigma_to > 0.0f) {
16411628
x = std::sqrt(alpha_prod_t_prev / alpha_prod_s) * x +
1642-
std::sqrt(1.0f - alpha_prod_t_prev / alpha_prod_s) * sd::Tensor<float>::randn_like(x, rng);
1629+
std::sqrt(1.0f / alpha_prod_t_prev - 1.0f / alpha_prod_s) * sd::Tensor<float>::randn_like(x, rng);
16431630
}
1631+
16441632
}
16451633
return x;
16461634
}

src/stable-diffusion.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2457,8 +2457,10 @@ enum scheduler_t sd_get_default_scheduler(const sd_ctx_t* sd_ctx, enum sample_me
24572457
return EXPONENTIAL_SCHEDULER;
24582458
}
24592459
}
2460-
if (sample_method == LCM_SAMPLE_METHOD) {
2460+
if (sample_method == LCM_SAMPLE_METHOD || sample_method == TCD_SAMPLE_METHOD) {
24612461
return LCM_SCHEDULER;
2462+
} else if (sample_method == DDIM_TRAILING_SAMPLE_METHOD) {
2463+
return SIMPLE_SCHEDULER;
24622464
}
24632465
return DISCRETE_SCHEDULER;
24642466
}

0 commit comments

Comments
 (0)