robinzixuan commited on
Commit
e5ee791
1 Parent(s): 42857ca

Update modeling_opt.py

Browse files
Files changed (1) hide show
  1. modeling_opt.py +18 -18
modeling_opt.py CHANGED
@@ -157,8 +157,8 @@ class OPTAttention(nn.Module):
157
 
158
  if (self.head_dim * self.num_heads) != self.embed_dim:
159
  raise ValueError(
160
- f"embed_dim must be divisible by num_heads (got `embed_dim`: {
161
- self.embed_dim}"
162
  f" and `num_heads`: {self.num_heads})."
163
  )
164
  self.scaling = self.head_dim**-0.5
@@ -236,16 +236,16 @@ class OPTAttention(nn.Module):
236
 
237
  if attn_weights.size() != (bsz * self.num_heads, tgt_len, src_len):
238
  raise ValueError(
239
- f"Attention weights should be of size {
240
- (bsz * self.num_heads, tgt_len, src_len)}, but is"
241
  f" {attn_weights.size()}"
242
  )
243
 
244
  if attention_mask is not None:
245
  if attention_mask.size() != (bsz, 1, tgt_len, src_len):
246
  raise ValueError(
247
- f"Attention mask should be of size {(bsz, 1, tgt_len, src_len)}, but is {
248
- attention_mask.size()}"
249
  )
250
  attn_weights = attn_weights.view(
251
  bsz, self.num_heads, tgt_len, src_len) + attention_mask
@@ -266,8 +266,8 @@ class OPTAttention(nn.Module):
266
  if layer_head_mask is not None:
267
  if layer_head_mask.size() != (self.num_heads,):
268
  raise ValueError(
269
- f"Head mask for a single layer should be of size {
270
- (self.num_heads,)}, but is"
271
  f" {layer_head_mask.size()}"
272
  )
273
  attn_weights = layer_head_mask.view(
@@ -333,8 +333,8 @@ class OPTOutEffHop(OPTAttention):
333
 
334
  if (self.head_dim * self.num_heads) != self.embed_dim:
335
  raise ValueError(
336
- f"embed_dim must be divisible by num_heads (got `embed_dim`: {
337
- self.embed_dim}"
338
  f" and `num_heads`: {self.num_heads})."
339
  )
340
  self.scaling = self.head_dim**-0.5
@@ -412,16 +412,16 @@ class OPTOutEffHop(OPTAttention):
412
 
413
  if attn_weights.size() != (bsz * self.num_heads, tgt_len, src_len):
414
  raise ValueError(
415
- f"Attention weights should be of size {
416
  (bsz * self.num_heads, tgt_len, src_len)}, but is"
417
- f" {attn_weights.size()}"
418
  )
419
 
420
  if attention_mask is not None:
421
  if attention_mask.size() != (bsz, 1, tgt_len, src_len):
422
  raise ValueError(
423
- f"Attention mask should be of size {(bsz, 1, tgt_len, src_len)}, but is {
424
- attention_mask.size()}"
425
  )
426
  attn_weights = attn_weights.view(
427
  bsz, self.num_heads, tgt_len, src_len) + attention_mask
@@ -442,8 +442,8 @@ class OPTOutEffHop(OPTAttention):
442
  if layer_head_mask is not None:
443
  if layer_head_mask.size() != (self.num_heads,):
444
  raise ValueError(
445
- f"Head mask for a single layer should be of size {
446
- (self.num_heads,)}, but is"
447
  f" {layer_head_mask.size()}"
448
  )
449
  attn_weights = layer_head_mask.view(
@@ -470,8 +470,8 @@ class OPTOutEffHop(OPTAttention):
470
 
471
  if attn_output.size() != (bsz * self.num_heads, tgt_len, self.head_dim):
472
  raise ValueError(
473
- f"`attn_output` should be of size {
474
- (bsz, self.num_heads, tgt_len, self.head_dim)}, but is"
475
  f" {attn_output.size()}"
476
  )
477
 
 
157
 
158
  if (self.head_dim * self.num_heads) != self.embed_dim:
159
  raise ValueError(
160
+ f'''embed_dim must be divisible by num_heads (got `embed_dim`: {
161
+ self.embed_dim}'''
162
  f" and `num_heads`: {self.num_heads})."
163
  )
164
  self.scaling = self.head_dim**-0.5
 
236
 
237
  if attn_weights.size() != (bsz * self.num_heads, tgt_len, src_len):
238
  raise ValueError(
239
+ f'''Attention weights should be of size {
240
+ (bsz * self.num_heads, tgt_len, src_len)}, but is'''
241
  f" {attn_weights.size()}"
242
  )
243
 
244
  if attention_mask is not None:
245
  if attention_mask.size() != (bsz, 1, tgt_len, src_len):
246
  raise ValueError(
247
+ f'''Attention mask should be of size {(bsz, 1, tgt_len, src_len)}, but is {
248
+ attention_mask.size()}'''
249
  )
250
  attn_weights = attn_weights.view(
251
  bsz, self.num_heads, tgt_len, src_len) + attention_mask
 
266
  if layer_head_mask is not None:
267
  if layer_head_mask.size() != (self.num_heads,):
268
  raise ValueError(
269
+ f'''Head mask for a single layer should be of size {
270
+ (self.num_heads,)}, but is'''
271
  f" {layer_head_mask.size()}"
272
  )
273
  attn_weights = layer_head_mask.view(
 
333
 
334
  if (self.head_dim * self.num_heads) != self.embed_dim:
335
  raise ValueError(
336
+ f'''embed_dim must be divisible by num_heads (got `embed_dim`: {
337
+ self.embed_dim}'''
338
  f" and `num_heads`: {self.num_heads})."
339
  )
340
  self.scaling = self.head_dim**-0.5
 
412
 
413
  if attn_weights.size() != (bsz * self.num_heads, tgt_len, src_len):
414
  raise ValueError(
415
+ f'''Attention weights should be of size {
416
  (bsz * self.num_heads, tgt_len, src_len)}, but is"
417
+ f" {attn_weights.size()}'''
418
  )
419
 
420
  if attention_mask is not None:
421
  if attention_mask.size() != (bsz, 1, tgt_len, src_len):
422
  raise ValueError(
423
+ f'''Attention mask should be of size {(bsz, 1, tgt_len, src_len)}, but is {
424
+ attention_mask.size()}'''
425
  )
426
  attn_weights = attn_weights.view(
427
  bsz, self.num_heads, tgt_len, src_len) + attention_mask
 
442
  if layer_head_mask is not None:
443
  if layer_head_mask.size() != (self.num_heads,):
444
  raise ValueError(
445
+ f'''Head mask for a single layer should be of size {
446
+ (self.num_heads,)}, but is'''
447
  f" {layer_head_mask.size()}"
448
  )
449
  attn_weights = layer_head_mask.view(
 
470
 
471
  if attn_output.size() != (bsz * self.num_heads, tgt_len, self.head_dim):
472
  raise ValueError(
473
+ f'''`attn_output` should be of size {
474
+ (bsz, self.num_heads, tgt_len, self.head_dim)}, but is'''
475
  f" {attn_output.size()}"
476
  )
477